1 /* 2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "compiler/disassembler.hpp" 29 #include "gc_interface/collectedHeap.inline.hpp" 30 #include "interpreter/interpreter.hpp" 31 #include "memory/cardTableModRefBS.hpp" 32 #include "memory/resourceArea.hpp" 33 #include "memory/universe.hpp" 34 #include "prims/methodHandles.hpp" 35 #include "runtime/biasedLocking.hpp" 36 #include "runtime/interfaceSupport.hpp" 37 #include "runtime/objectMonitor.hpp" 38 #include "runtime/os.hpp" 39 #include "runtime/sharedRuntime.hpp" 40 #include "runtime/stubRoutines.hpp" 41 #include "utilities/macros.hpp" 42 #if INCLUDE_ALL_GCS 43 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 44 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 45 #include "gc_implementation/g1/heapRegion.hpp" 46 #endif // INCLUDE_ALL_GCS 47 48 #ifdef PRODUCT 49 #define BLOCK_COMMENT(str) /* nothing */ 50 #define STOP(error) stop(error) 51 #else 52 #define BLOCK_COMMENT(str) block_comment(str) 53 #define STOP(error) block_comment(error); stop(error) 54 #endif 55 56 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 57 58 59 #ifdef ASSERT 60 bool AbstractAssembler::pd_check_instruction_mark() { return true; } 61 #endif 62 63 static Assembler::Condition reverse[] = { 64 Assembler::noOverflow /* overflow = 0x0 */ , 65 Assembler::overflow /* noOverflow = 0x1 */ , 66 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 67 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 68 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 69 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 70 Assembler::above /* belowEqual = 0x6 */ , 71 Assembler::belowEqual /* above = 0x7 */ , 72 Assembler::positive /* negative = 0x8 */ , 73 Assembler::negative /* positive = 0x9 */ , 74 Assembler::noParity /* parity = 0xa */ , 75 Assembler::parity /* noParity = 0xb */ , 76 Assembler::greaterEqual /* less = 0xc */ , 77 Assembler::less /* greaterEqual = 0xd */ , 78 Assembler::greater /* lessEqual = 0xe */ , 79 Assembler::lessEqual /* greater = 0xf, */ 80 81 }; 82 83 84 // Implementation of MacroAssembler 85 86 // First all the versions that have distinct versions depending on 32/64 bit 87 // Unless the difference is trivial (1 line or so). 88 89 #ifndef _LP64 90 91 // 32bit versions 92 93 Address MacroAssembler::as_Address(AddressLiteral adr) { 94 return Address(adr.target(), adr.rspec()); 95 } 96 97 Address MacroAssembler::as_Address(ArrayAddress adr) { 98 return Address::make_array(adr); 99 } 100 101 int MacroAssembler::biased_locking_enter(Register lock_reg, 102 Register obj_reg, 103 Register swap_reg, 104 Register tmp_reg, 105 bool swap_reg_contains_mark, 106 Label& done, 107 Label* slow_case, 108 BiasedLockingCounters* counters) { 109 assert(UseBiasedLocking, "why call this otherwise?"); 110 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 111 assert_different_registers(lock_reg, obj_reg, swap_reg); 112 113 if (PrintBiasedLockingStatistics && counters == NULL) 114 counters = BiasedLocking::counters(); 115 116 bool need_tmp_reg = false; 117 if (tmp_reg == noreg) { 118 need_tmp_reg = true; 119 tmp_reg = lock_reg; 120 } else { 121 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 122 } 123 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 124 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 125 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 126 Address saved_mark_addr(lock_reg, 0); 127 128 // Biased locking 129 // See whether the lock is currently biased toward our thread and 130 // whether the epoch is still valid 131 // Note that the runtime guarantees sufficient alignment of JavaThread 132 // pointers to allow age to be placed into low bits 133 // First check to see whether biasing is even enabled for this object 134 Label cas_label; 135 int null_check_offset = -1; 136 if (!swap_reg_contains_mark) { 137 null_check_offset = offset(); 138 movl(swap_reg, mark_addr); 139 } 140 if (need_tmp_reg) { 141 push(tmp_reg); 142 } 143 movl(tmp_reg, swap_reg); 144 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 145 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 146 if (need_tmp_reg) { 147 pop(tmp_reg); 148 } 149 jcc(Assembler::notEqual, cas_label); 150 // The bias pattern is present in the object's header. Need to check 151 // whether the bias owner and the epoch are both still current. 152 // Note that because there is no current thread register on x86 we 153 // need to store off the mark word we read out of the object to 154 // avoid reloading it and needing to recheck invariants below. This 155 // store is unfortunate but it makes the overall code shorter and 156 // simpler. 157 movl(saved_mark_addr, swap_reg); 158 if (need_tmp_reg) { 159 push(tmp_reg); 160 } 161 get_thread(tmp_reg); 162 xorl(swap_reg, tmp_reg); 163 if (swap_reg_contains_mark) { 164 null_check_offset = offset(); 165 } 166 movl(tmp_reg, klass_addr); 167 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); 168 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 169 if (need_tmp_reg) { 170 pop(tmp_reg); 171 } 172 if (counters != NULL) { 173 cond_inc32(Assembler::zero, 174 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 175 } 176 jcc(Assembler::equal, done); 177 178 Label try_revoke_bias; 179 Label try_rebias; 180 181 // At this point we know that the header has the bias pattern and 182 // that we are not the bias owner in the current epoch. We need to 183 // figure out more details about the state of the header in order to 184 // know what operations can be legally performed on the object's 185 // header. 186 187 // If the low three bits in the xor result aren't clear, that means 188 // the prototype header is no longer biased and we have to revoke 189 // the bias on this object. 190 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 191 jcc(Assembler::notZero, try_revoke_bias); 192 193 // Biasing is still enabled for this data type. See whether the 194 // epoch of the current bias is still valid, meaning that the epoch 195 // bits of the mark word are equal to the epoch bits of the 196 // prototype header. (Note that the prototype header's epoch bits 197 // only change at a safepoint.) If not, attempt to rebias the object 198 // toward the current thread. Note that we must be absolutely sure 199 // that the current epoch is invalid in order to do this because 200 // otherwise the manipulations it performs on the mark word are 201 // illegal. 202 testl(swap_reg, markOopDesc::epoch_mask_in_place); 203 jcc(Assembler::notZero, try_rebias); 204 205 // The epoch of the current bias is still valid but we know nothing 206 // about the owner; it might be set or it might be clear. Try to 207 // acquire the bias of the object using an atomic operation. If this 208 // fails we will go in to the runtime to revoke the object's bias. 209 // Note that we first construct the presumed unbiased header so we 210 // don't accidentally blow away another thread's valid bias. 211 movl(swap_reg, saved_mark_addr); 212 andl(swap_reg, 213 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 214 if (need_tmp_reg) { 215 push(tmp_reg); 216 } 217 get_thread(tmp_reg); 218 orl(tmp_reg, swap_reg); 219 if (os::is_MP()) { 220 lock(); 221 } 222 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 223 if (need_tmp_reg) { 224 pop(tmp_reg); 225 } 226 // If the biasing toward our thread failed, this means that 227 // another thread succeeded in biasing it toward itself and we 228 // need to revoke that bias. The revocation will occur in the 229 // interpreter runtime in the slow case. 230 if (counters != NULL) { 231 cond_inc32(Assembler::zero, 232 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 233 } 234 if (slow_case != NULL) { 235 jcc(Assembler::notZero, *slow_case); 236 } 237 jmp(done); 238 239 bind(try_rebias); 240 // At this point we know the epoch has expired, meaning that the 241 // current "bias owner", if any, is actually invalid. Under these 242 // circumstances _only_, we are allowed to use the current header's 243 // value as the comparison value when doing the cas to acquire the 244 // bias in the current epoch. In other words, we allow transfer of 245 // the bias from one thread to another directly in this situation. 246 // 247 // FIXME: due to a lack of registers we currently blow away the age 248 // bits in this situation. Should attempt to preserve them. 249 if (need_tmp_reg) { 250 push(tmp_reg); 251 } 252 get_thread(tmp_reg); 253 movl(swap_reg, klass_addr); 254 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); 255 movl(swap_reg, saved_mark_addr); 256 if (os::is_MP()) { 257 lock(); 258 } 259 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 260 if (need_tmp_reg) { 261 pop(tmp_reg); 262 } 263 // If the biasing toward our thread failed, then another thread 264 // succeeded in biasing it toward itself and we need to revoke that 265 // bias. The revocation will occur in the runtime in the slow case. 266 if (counters != NULL) { 267 cond_inc32(Assembler::zero, 268 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 269 } 270 if (slow_case != NULL) { 271 jcc(Assembler::notZero, *slow_case); 272 } 273 jmp(done); 274 275 bind(try_revoke_bias); 276 // The prototype mark in the klass doesn't have the bias bit set any 277 // more, indicating that objects of this data type are not supposed 278 // to be biased any more. We are going to try to reset the mark of 279 // this object to the prototype value and fall through to the 280 // CAS-based locking scheme. Note that if our CAS fails, it means 281 // that another thread raced us for the privilege of revoking the 282 // bias of this particular object, so it's okay to continue in the 283 // normal locking code. 284 // 285 // FIXME: due to a lack of registers we currently blow away the age 286 // bits in this situation. Should attempt to preserve them. 287 movl(swap_reg, saved_mark_addr); 288 if (need_tmp_reg) { 289 push(tmp_reg); 290 } 291 movl(tmp_reg, klass_addr); 292 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 293 if (os::is_MP()) { 294 lock(); 295 } 296 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 297 if (need_tmp_reg) { 298 pop(tmp_reg); 299 } 300 // Fall through to the normal CAS-based lock, because no matter what 301 // the result of the above CAS, some thread must have succeeded in 302 // removing the bias bit from the object's header. 303 if (counters != NULL) { 304 cond_inc32(Assembler::zero, 305 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 306 } 307 308 bind(cas_label); 309 310 return null_check_offset; 311 } 312 void MacroAssembler::call_VM_leaf_base(address entry_point, 313 int number_of_arguments) { 314 call(RuntimeAddress(entry_point)); 315 increment(rsp, number_of_arguments * wordSize); 316 } 317 318 void MacroAssembler::cmpklass(Address src1, Metadata* obj) { 319 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 320 } 321 322 void MacroAssembler::cmpklass(Register src1, Metadata* obj) { 323 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 324 } 325 326 void MacroAssembler::cmpoop(Address src1, jobject obj) { 327 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 328 } 329 330 void MacroAssembler::cmpoop(Register src1, jobject obj) { 331 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 332 } 333 334 void MacroAssembler::extend_sign(Register hi, Register lo) { 335 // According to Intel Doc. AP-526, "Integer Divide", p.18. 336 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 337 cdql(); 338 } else { 339 movl(hi, lo); 340 sarl(hi, 31); 341 } 342 } 343 344 void MacroAssembler::jC2(Register tmp, Label& L) { 345 // set parity bit if FPU flag C2 is set (via rax) 346 save_rax(tmp); 347 fwait(); fnstsw_ax(); 348 sahf(); 349 restore_rax(tmp); 350 // branch 351 jcc(Assembler::parity, L); 352 } 353 354 void MacroAssembler::jnC2(Register tmp, Label& L) { 355 // set parity bit if FPU flag C2 is set (via rax) 356 save_rax(tmp); 357 fwait(); fnstsw_ax(); 358 sahf(); 359 restore_rax(tmp); 360 // branch 361 jcc(Assembler::noParity, L); 362 } 363 364 // 32bit can do a case table jump in one instruction but we no longer allow the base 365 // to be installed in the Address class 366 void MacroAssembler::jump(ArrayAddress entry) { 367 jmp(as_Address(entry)); 368 } 369 370 // Note: y_lo will be destroyed 371 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 372 // Long compare for Java (semantics as described in JVM spec.) 373 Label high, low, done; 374 375 cmpl(x_hi, y_hi); 376 jcc(Assembler::less, low); 377 jcc(Assembler::greater, high); 378 // x_hi is the return register 379 xorl(x_hi, x_hi); 380 cmpl(x_lo, y_lo); 381 jcc(Assembler::below, low); 382 jcc(Assembler::equal, done); 383 384 bind(high); 385 xorl(x_hi, x_hi); 386 increment(x_hi); 387 jmp(done); 388 389 bind(low); 390 xorl(x_hi, x_hi); 391 decrementl(x_hi); 392 393 bind(done); 394 } 395 396 void MacroAssembler::lea(Register dst, AddressLiteral src) { 397 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 398 } 399 400 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 401 // leal(dst, as_Address(adr)); 402 // see note in movl as to why we must use a move 403 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 404 } 405 406 void MacroAssembler::leave() { 407 mov(rsp, rbp); 408 pop(rbp); 409 } 410 411 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 412 // Multiplication of two Java long values stored on the stack 413 // as illustrated below. Result is in rdx:rax. 414 // 415 // rsp ---> [ ?? ] \ \ 416 // .... | y_rsp_offset | 417 // [ y_lo ] / (in bytes) | x_rsp_offset 418 // [ y_hi ] | (in bytes) 419 // .... | 420 // [ x_lo ] / 421 // [ x_hi ] 422 // .... 423 // 424 // Basic idea: lo(result) = lo(x_lo * y_lo) 425 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 426 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 427 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 428 Label quick; 429 // load x_hi, y_hi and check if quick 430 // multiplication is possible 431 movl(rbx, x_hi); 432 movl(rcx, y_hi); 433 movl(rax, rbx); 434 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 435 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 436 // do full multiplication 437 // 1st step 438 mull(y_lo); // x_hi * y_lo 439 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 440 // 2nd step 441 movl(rax, x_lo); 442 mull(rcx); // x_lo * y_hi 443 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 444 // 3rd step 445 bind(quick); // note: rbx, = 0 if quick multiply! 446 movl(rax, x_lo); 447 mull(y_lo); // x_lo * y_lo 448 addl(rdx, rbx); // correct hi(x_lo * y_lo) 449 } 450 451 void MacroAssembler::lneg(Register hi, Register lo) { 452 negl(lo); 453 adcl(hi, 0); 454 negl(hi); 455 } 456 457 void MacroAssembler::lshl(Register hi, Register lo) { 458 // Java shift left long support (semantics as described in JVM spec., p.305) 459 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 460 // shift value is in rcx ! 461 assert(hi != rcx, "must not use rcx"); 462 assert(lo != rcx, "must not use rcx"); 463 const Register s = rcx; // shift count 464 const int n = BitsPerWord; 465 Label L; 466 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 467 cmpl(s, n); // if (s < n) 468 jcc(Assembler::less, L); // else (s >= n) 469 movl(hi, lo); // x := x << n 470 xorl(lo, lo); 471 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 472 bind(L); // s (mod n) < n 473 shldl(hi, lo); // x := x << s 474 shll(lo); 475 } 476 477 478 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 479 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 480 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 481 assert(hi != rcx, "must not use rcx"); 482 assert(lo != rcx, "must not use rcx"); 483 const Register s = rcx; // shift count 484 const int n = BitsPerWord; 485 Label L; 486 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 487 cmpl(s, n); // if (s < n) 488 jcc(Assembler::less, L); // else (s >= n) 489 movl(lo, hi); // x := x >> n 490 if (sign_extension) sarl(hi, 31); 491 else xorl(hi, hi); 492 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 493 bind(L); // s (mod n) < n 494 shrdl(lo, hi); // x := x >> s 495 if (sign_extension) sarl(hi); 496 else shrl(hi); 497 } 498 499 void MacroAssembler::movoop(Register dst, jobject obj) { 500 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 501 } 502 503 void MacroAssembler::movoop(Address dst, jobject obj) { 504 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 505 } 506 507 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 508 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 509 } 510 511 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 512 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 513 } 514 515 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 516 if (src.is_lval()) { 517 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 518 } else { 519 movl(dst, as_Address(src)); 520 } 521 } 522 523 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 524 movl(as_Address(dst), src); 525 } 526 527 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 528 movl(dst, as_Address(src)); 529 } 530 531 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 532 void MacroAssembler::movptr(Address dst, intptr_t src) { 533 movl(dst, src); 534 } 535 536 537 void MacroAssembler::pop_callee_saved_registers() { 538 pop(rcx); 539 pop(rdx); 540 pop(rdi); 541 pop(rsi); 542 } 543 544 void MacroAssembler::pop_fTOS() { 545 fld_d(Address(rsp, 0)); 546 addl(rsp, 2 * wordSize); 547 } 548 549 void MacroAssembler::push_callee_saved_registers() { 550 push(rsi); 551 push(rdi); 552 push(rdx); 553 push(rcx); 554 } 555 556 void MacroAssembler::push_fTOS() { 557 subl(rsp, 2 * wordSize); 558 fstp_d(Address(rsp, 0)); 559 } 560 561 562 void MacroAssembler::pushoop(jobject obj) { 563 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 564 } 565 566 void MacroAssembler::pushklass(Metadata* obj) { 567 push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate()); 568 } 569 570 void MacroAssembler::pushptr(AddressLiteral src) { 571 if (src.is_lval()) { 572 push_literal32((int32_t)src.target(), src.rspec()); 573 } else { 574 pushl(as_Address(src)); 575 } 576 } 577 578 void MacroAssembler::set_word_if_not_zero(Register dst) { 579 xorl(dst, dst); 580 set_byte_if_not_zero(dst); 581 } 582 583 static void pass_arg0(MacroAssembler* masm, Register arg) { 584 masm->push(arg); 585 } 586 587 static void pass_arg1(MacroAssembler* masm, Register arg) { 588 masm->push(arg); 589 } 590 591 static void pass_arg2(MacroAssembler* masm, Register arg) { 592 masm->push(arg); 593 } 594 595 static void pass_arg3(MacroAssembler* masm, Register arg) { 596 masm->push(arg); 597 } 598 599 #ifndef PRODUCT 600 extern "C" void findpc(intptr_t x); 601 #endif 602 603 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 604 // In order to get locks to work, we need to fake a in_VM state 605 JavaThread* thread = JavaThread::current(); 606 JavaThreadState saved_state = thread->thread_state(); 607 thread->set_thread_state(_thread_in_vm); 608 if (ShowMessageBoxOnError) { 609 JavaThread* thread = JavaThread::current(); 610 JavaThreadState saved_state = thread->thread_state(); 611 thread->set_thread_state(_thread_in_vm); 612 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 613 ttyLocker ttyl; 614 BytecodeCounter::print(); 615 } 616 // To see where a verify_oop failed, get $ebx+40/X for this frame. 617 // This is the value of eip which points to where verify_oop will return. 618 if (os::message_box(msg, "Execution stopped, print registers?")) { 619 print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip); 620 BREAKPOINT; 621 } 622 } else { 623 ttyLocker ttyl; 624 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 625 } 626 // Don't assert holding the ttyLock 627 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 628 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 629 } 630 631 void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) { 632 ttyLocker ttyl; 633 FlagSetting fs(Debugging, true); 634 tty->print_cr("eip = 0x%08x", eip); 635 #ifndef PRODUCT 636 if ((WizardMode || Verbose) && PrintMiscellaneous) { 637 tty->cr(); 638 findpc(eip); 639 tty->cr(); 640 } 641 #endif 642 #define PRINT_REG(rax) \ 643 { tty->print("%s = ", #rax); os::print_location(tty, rax); } 644 PRINT_REG(rax); 645 PRINT_REG(rbx); 646 PRINT_REG(rcx); 647 PRINT_REG(rdx); 648 PRINT_REG(rdi); 649 PRINT_REG(rsi); 650 PRINT_REG(rbp); 651 PRINT_REG(rsp); 652 #undef PRINT_REG 653 // Print some words near top of staack. 654 int* dump_sp = (int*) rsp; 655 for (int col1 = 0; col1 < 8; col1++) { 656 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); 657 os::print_location(tty, *dump_sp++); 658 } 659 for (int row = 0; row < 16; row++) { 660 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); 661 for (int col = 0; col < 8; col++) { 662 tty->print(" 0x%08x", *dump_sp++); 663 } 664 tty->cr(); 665 } 666 // Print some instructions around pc: 667 Disassembler::decode((address)eip-64, (address)eip); 668 tty->print_cr("--------"); 669 Disassembler::decode((address)eip, (address)eip+32); 670 } 671 672 void MacroAssembler::stop(const char* msg) { 673 ExternalAddress message((address)msg); 674 // push address of message 675 pushptr(message.addr()); 676 { Label L; call(L, relocInfo::none); bind(L); } // push eip 677 pusha(); // push registers 678 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 679 hlt(); 680 } 681 682 void MacroAssembler::warn(const char* msg) { 683 push_CPU_state(); 684 685 ExternalAddress message((address) msg); 686 // push address of message 687 pushptr(message.addr()); 688 689 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 690 addl(rsp, wordSize); // discard argument 691 pop_CPU_state(); 692 } 693 694 void MacroAssembler::print_state() { 695 { Label L; call(L, relocInfo::none); bind(L); } // push eip 696 pusha(); // push registers 697 698 push_CPU_state(); 699 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32))); 700 pop_CPU_state(); 701 702 popa(); 703 addl(rsp, wordSize); 704 } 705 706 #else // _LP64 707 708 // 64 bit versions 709 710 Address MacroAssembler::as_Address(AddressLiteral adr) { 711 // amd64 always does this as a pc-rel 712 // we can be absolute or disp based on the instruction type 713 // jmp/call are displacements others are absolute 714 assert(!adr.is_lval(), "must be rval"); 715 assert(reachable(adr), "must be"); 716 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 717 718 } 719 720 Address MacroAssembler::as_Address(ArrayAddress adr) { 721 AddressLiteral base = adr.base(); 722 lea(rscratch1, base); 723 Address index = adr.index(); 724 assert(index._disp == 0, "must not have disp"); // maybe it can? 725 Address array(rscratch1, index._index, index._scale, index._disp); 726 return array; 727 } 728 729 int MacroAssembler::biased_locking_enter(Register lock_reg, 730 Register obj_reg, 731 Register swap_reg, 732 Register tmp_reg, 733 bool swap_reg_contains_mark, 734 Label& done, 735 Label* slow_case, 736 BiasedLockingCounters* counters) { 737 assert(UseBiasedLocking, "why call this otherwise?"); 738 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 739 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 740 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 741 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 742 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 743 Address saved_mark_addr(lock_reg, 0); 744 745 if (PrintBiasedLockingStatistics && counters == NULL) 746 counters = BiasedLocking::counters(); 747 748 // Biased locking 749 // See whether the lock is currently biased toward our thread and 750 // whether the epoch is still valid 751 // Note that the runtime guarantees sufficient alignment of JavaThread 752 // pointers to allow age to be placed into low bits 753 // First check to see whether biasing is even enabled for this object 754 Label cas_label; 755 int null_check_offset = -1; 756 if (!swap_reg_contains_mark) { 757 null_check_offset = offset(); 758 movq(swap_reg, mark_addr); 759 } 760 movq(tmp_reg, swap_reg); 761 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 762 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 763 jcc(Assembler::notEqual, cas_label); 764 // The bias pattern is present in the object's header. Need to check 765 // whether the bias owner and the epoch are both still current. 766 load_prototype_header(tmp_reg, obj_reg); 767 orq(tmp_reg, r15_thread); 768 xorq(tmp_reg, swap_reg); 769 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 770 if (counters != NULL) { 771 cond_inc32(Assembler::zero, 772 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 773 } 774 jcc(Assembler::equal, done); 775 776 Label try_revoke_bias; 777 Label try_rebias; 778 779 // At this point we know that the header has the bias pattern and 780 // that we are not the bias owner in the current epoch. We need to 781 // figure out more details about the state of the header in order to 782 // know what operations can be legally performed on the object's 783 // header. 784 785 // If the low three bits in the xor result aren't clear, that means 786 // the prototype header is no longer biased and we have to revoke 787 // the bias on this object. 788 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 789 jcc(Assembler::notZero, try_revoke_bias); 790 791 // Biasing is still enabled for this data type. See whether the 792 // epoch of the current bias is still valid, meaning that the epoch 793 // bits of the mark word are equal to the epoch bits of the 794 // prototype header. (Note that the prototype header's epoch bits 795 // only change at a safepoint.) If not, attempt to rebias the object 796 // toward the current thread. Note that we must be absolutely sure 797 // that the current epoch is invalid in order to do this because 798 // otherwise the manipulations it performs on the mark word are 799 // illegal. 800 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 801 jcc(Assembler::notZero, try_rebias); 802 803 // The epoch of the current bias is still valid but we know nothing 804 // about the owner; it might be set or it might be clear. Try to 805 // acquire the bias of the object using an atomic operation. If this 806 // fails we will go in to the runtime to revoke the object's bias. 807 // Note that we first construct the presumed unbiased header so we 808 // don't accidentally blow away another thread's valid bias. 809 andq(swap_reg, 810 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 811 movq(tmp_reg, swap_reg); 812 orq(tmp_reg, r15_thread); 813 if (os::is_MP()) { 814 lock(); 815 } 816 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 817 // If the biasing toward our thread failed, this means that 818 // another thread succeeded in biasing it toward itself and we 819 // need to revoke that bias. The revocation will occur in the 820 // interpreter runtime in the slow case. 821 if (counters != NULL) { 822 cond_inc32(Assembler::zero, 823 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 824 } 825 if (slow_case != NULL) { 826 jcc(Assembler::notZero, *slow_case); 827 } 828 jmp(done); 829 830 bind(try_rebias); 831 // At this point we know the epoch has expired, meaning that the 832 // current "bias owner", if any, is actually invalid. Under these 833 // circumstances _only_, we are allowed to use the current header's 834 // value as the comparison value when doing the cas to acquire the 835 // bias in the current epoch. In other words, we allow transfer of 836 // the bias from one thread to another directly in this situation. 837 // 838 // FIXME: due to a lack of registers we currently blow away the age 839 // bits in this situation. Should attempt to preserve them. 840 load_prototype_header(tmp_reg, obj_reg); 841 orq(tmp_reg, r15_thread); 842 if (os::is_MP()) { 843 lock(); 844 } 845 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 846 // If the biasing toward our thread failed, then another thread 847 // succeeded in biasing it toward itself and we need to revoke that 848 // bias. The revocation will occur in the runtime in the slow case. 849 if (counters != NULL) { 850 cond_inc32(Assembler::zero, 851 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 852 } 853 if (slow_case != NULL) { 854 jcc(Assembler::notZero, *slow_case); 855 } 856 jmp(done); 857 858 bind(try_revoke_bias); 859 // The prototype mark in the klass doesn't have the bias bit set any 860 // more, indicating that objects of this data type are not supposed 861 // to be biased any more. We are going to try to reset the mark of 862 // this object to the prototype value and fall through to the 863 // CAS-based locking scheme. Note that if our CAS fails, it means 864 // that another thread raced us for the privilege of revoking the 865 // bias of this particular object, so it's okay to continue in the 866 // normal locking code. 867 // 868 // FIXME: due to a lack of registers we currently blow away the age 869 // bits in this situation. Should attempt to preserve them. 870 load_prototype_header(tmp_reg, obj_reg); 871 if (os::is_MP()) { 872 lock(); 873 } 874 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 875 // Fall through to the normal CAS-based lock, because no matter what 876 // the result of the above CAS, some thread must have succeeded in 877 // removing the bias bit from the object's header. 878 if (counters != NULL) { 879 cond_inc32(Assembler::zero, 880 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 881 } 882 883 bind(cas_label); 884 885 return null_check_offset; 886 } 887 888 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 889 Label L, E; 890 891 #ifdef _WIN64 892 // Windows always allocates space for it's register args 893 assert(num_args <= 4, "only register arguments supported"); 894 subq(rsp, frame::arg_reg_save_area_bytes); 895 #endif 896 897 // Align stack if necessary 898 testl(rsp, 15); 899 jcc(Assembler::zero, L); 900 901 subq(rsp, 8); 902 { 903 call(RuntimeAddress(entry_point)); 904 } 905 addq(rsp, 8); 906 jmp(E); 907 908 bind(L); 909 { 910 call(RuntimeAddress(entry_point)); 911 } 912 913 bind(E); 914 915 #ifdef _WIN64 916 // restore stack pointer 917 addq(rsp, frame::arg_reg_save_area_bytes); 918 #endif 919 920 } 921 922 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 923 assert(!src2.is_lval(), "should use cmpptr"); 924 925 if (reachable(src2)) { 926 cmpq(src1, as_Address(src2)); 927 } else { 928 lea(rscratch1, src2); 929 Assembler::cmpq(src1, Address(rscratch1, 0)); 930 } 931 } 932 933 int MacroAssembler::corrected_idivq(Register reg) { 934 // Full implementation of Java ldiv and lrem; checks for special 935 // case as described in JVM spec., p.243 & p.271. The function 936 // returns the (pc) offset of the idivl instruction - may be needed 937 // for implicit exceptions. 938 // 939 // normal case special case 940 // 941 // input : rax: dividend min_long 942 // reg: divisor (may not be eax/edx) -1 943 // 944 // output: rax: quotient (= rax idiv reg) min_long 945 // rdx: remainder (= rax irem reg) 0 946 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 947 static const int64_t min_long = 0x8000000000000000; 948 Label normal_case, special_case; 949 950 // check for special case 951 cmp64(rax, ExternalAddress((address) &min_long)); 952 jcc(Assembler::notEqual, normal_case); 953 xorl(rdx, rdx); // prepare rdx for possible special case (where 954 // remainder = 0) 955 cmpq(reg, -1); 956 jcc(Assembler::equal, special_case); 957 958 // handle normal case 959 bind(normal_case); 960 cdqq(); 961 int idivq_offset = offset(); 962 idivq(reg); 963 964 // normal and special case exit 965 bind(special_case); 966 967 return idivq_offset; 968 } 969 970 void MacroAssembler::decrementq(Register reg, int value) { 971 if (value == min_jint) { subq(reg, value); return; } 972 if (value < 0) { incrementq(reg, -value); return; } 973 if (value == 0) { ; return; } 974 if (value == 1 && UseIncDec) { decq(reg) ; return; } 975 /* else */ { subq(reg, value) ; return; } 976 } 977 978 void MacroAssembler::decrementq(Address dst, int value) { 979 if (value == min_jint) { subq(dst, value); return; } 980 if (value < 0) { incrementq(dst, -value); return; } 981 if (value == 0) { ; return; } 982 if (value == 1 && UseIncDec) { decq(dst) ; return; } 983 /* else */ { subq(dst, value) ; return; } 984 } 985 986 void MacroAssembler::incrementq(Register reg, int value) { 987 if (value == min_jint) { addq(reg, value); return; } 988 if (value < 0) { decrementq(reg, -value); return; } 989 if (value == 0) { ; return; } 990 if (value == 1 && UseIncDec) { incq(reg) ; return; } 991 /* else */ { addq(reg, value) ; return; } 992 } 993 994 void MacroAssembler::incrementq(Address dst, int value) { 995 if (value == min_jint) { addq(dst, value); return; } 996 if (value < 0) { decrementq(dst, -value); return; } 997 if (value == 0) { ; return; } 998 if (value == 1 && UseIncDec) { incq(dst) ; return; } 999 /* else */ { addq(dst, value) ; return; } 1000 } 1001 1002 // 32bit can do a case table jump in one instruction but we no longer allow the base 1003 // to be installed in the Address class 1004 void MacroAssembler::jump(ArrayAddress entry) { 1005 lea(rscratch1, entry.base()); 1006 Address dispatch = entry.index(); 1007 assert(dispatch._base == noreg, "must be"); 1008 dispatch._base = rscratch1; 1009 jmp(dispatch); 1010 } 1011 1012 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 1013 ShouldNotReachHere(); // 64bit doesn't use two regs 1014 cmpq(x_lo, y_lo); 1015 } 1016 1017 void MacroAssembler::lea(Register dst, AddressLiteral src) { 1018 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 1019 } 1020 1021 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 1022 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 1023 movptr(dst, rscratch1); 1024 } 1025 1026 void MacroAssembler::leave() { 1027 // %%% is this really better? Why not on 32bit too? 1028 emit_int8((unsigned char)0xC9); // LEAVE 1029 } 1030 1031 void MacroAssembler::lneg(Register hi, Register lo) { 1032 ShouldNotReachHere(); // 64bit doesn't use two regs 1033 negq(lo); 1034 } 1035 1036 void MacroAssembler::movoop(Register dst, jobject obj) { 1037 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 1038 } 1039 1040 void MacroAssembler::movoop(Address dst, jobject obj) { 1041 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 1042 movq(dst, rscratch1); 1043 } 1044 1045 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 1046 mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 1047 } 1048 1049 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 1050 mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 1051 movq(dst, rscratch1); 1052 } 1053 1054 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 1055 if (src.is_lval()) { 1056 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 1057 } else { 1058 if (reachable(src)) { 1059 movq(dst, as_Address(src)); 1060 } else { 1061 lea(rscratch1, src); 1062 movq(dst, Address(rscratch1,0)); 1063 } 1064 } 1065 } 1066 1067 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 1068 movq(as_Address(dst), src); 1069 } 1070 1071 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 1072 movq(dst, as_Address(src)); 1073 } 1074 1075 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 1076 void MacroAssembler::movptr(Address dst, intptr_t src) { 1077 mov64(rscratch1, src); 1078 movq(dst, rscratch1); 1079 } 1080 1081 // These are mostly for initializing NULL 1082 void MacroAssembler::movptr(Address dst, int32_t src) { 1083 movslq(dst, src); 1084 } 1085 1086 void MacroAssembler::movptr(Register dst, int32_t src) { 1087 mov64(dst, (intptr_t)src); 1088 } 1089 1090 void MacroAssembler::pushoop(jobject obj) { 1091 movoop(rscratch1, obj); 1092 push(rscratch1); 1093 } 1094 1095 void MacroAssembler::pushklass(Metadata* obj) { 1096 mov_metadata(rscratch1, obj); 1097 push(rscratch1); 1098 } 1099 1100 void MacroAssembler::pushptr(AddressLiteral src) { 1101 lea(rscratch1, src); 1102 if (src.is_lval()) { 1103 push(rscratch1); 1104 } else { 1105 pushq(Address(rscratch1, 0)); 1106 } 1107 } 1108 1109 void MacroAssembler::reset_last_Java_frame(bool clear_fp, 1110 bool clear_pc) { 1111 // we must set sp to zero to clear frame 1112 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 1113 // must clear fp, so that compiled frames are not confused; it is 1114 // possible that we need it only for debugging 1115 if (clear_fp) { 1116 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 1117 } 1118 1119 if (clear_pc) { 1120 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 1121 } 1122 } 1123 1124 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 1125 Register last_java_fp, 1126 address last_java_pc) { 1127 // determine last_java_sp register 1128 if (!last_java_sp->is_valid()) { 1129 last_java_sp = rsp; 1130 } 1131 1132 // last_java_fp is optional 1133 if (last_java_fp->is_valid()) { 1134 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 1135 last_java_fp); 1136 } 1137 1138 // last_java_pc is optional 1139 if (last_java_pc != NULL) { 1140 Address java_pc(r15_thread, 1141 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 1142 lea(rscratch1, InternalAddress(last_java_pc)); 1143 movptr(java_pc, rscratch1); 1144 } 1145 1146 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 1147 } 1148 1149 static void pass_arg0(MacroAssembler* masm, Register arg) { 1150 if (c_rarg0 != arg ) { 1151 masm->mov(c_rarg0, arg); 1152 } 1153 } 1154 1155 static void pass_arg1(MacroAssembler* masm, Register arg) { 1156 if (c_rarg1 != arg ) { 1157 masm->mov(c_rarg1, arg); 1158 } 1159 } 1160 1161 static void pass_arg2(MacroAssembler* masm, Register arg) { 1162 if (c_rarg2 != arg ) { 1163 masm->mov(c_rarg2, arg); 1164 } 1165 } 1166 1167 static void pass_arg3(MacroAssembler* masm, Register arg) { 1168 if (c_rarg3 != arg ) { 1169 masm->mov(c_rarg3, arg); 1170 } 1171 } 1172 1173 void MacroAssembler::stop(const char* msg) { 1174 address rip = pc(); 1175 pusha(); // get regs on stack 1176 lea(c_rarg0, ExternalAddress((address) msg)); 1177 lea(c_rarg1, InternalAddress(rip)); 1178 movq(c_rarg2, rsp); // pass pointer to regs array 1179 andq(rsp, -16); // align stack as required by ABI 1180 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 1181 hlt(); 1182 } 1183 1184 void MacroAssembler::warn(const char* msg) { 1185 push(rbp); 1186 movq(rbp, rsp); 1187 andq(rsp, -16); // align stack as required by push_CPU_state and call 1188 push_CPU_state(); // keeps alignment at 16 bytes 1189 lea(c_rarg0, ExternalAddress((address) msg)); 1190 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 1191 pop_CPU_state(); 1192 mov(rsp, rbp); 1193 pop(rbp); 1194 } 1195 1196 void MacroAssembler::print_state() { 1197 address rip = pc(); 1198 pusha(); // get regs on stack 1199 push(rbp); 1200 movq(rbp, rsp); 1201 andq(rsp, -16); // align stack as required by push_CPU_state and call 1202 push_CPU_state(); // keeps alignment at 16 bytes 1203 1204 lea(c_rarg0, InternalAddress(rip)); 1205 lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array 1206 call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1); 1207 1208 pop_CPU_state(); 1209 mov(rsp, rbp); 1210 pop(rbp); 1211 popa(); 1212 } 1213 1214 #ifndef PRODUCT 1215 extern "C" void findpc(intptr_t x); 1216 #endif 1217 1218 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 1219 // In order to get locks to work, we need to fake a in_VM state 1220 if (ShowMessageBoxOnError) { 1221 JavaThread* thread = JavaThread::current(); 1222 JavaThreadState saved_state = thread->thread_state(); 1223 thread->set_thread_state(_thread_in_vm); 1224 #ifndef PRODUCT 1225 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 1226 ttyLocker ttyl; 1227 BytecodeCounter::print(); 1228 } 1229 #endif 1230 // To see where a verify_oop failed, get $ebx+40/X for this frame. 1231 // XXX correct this offset for amd64 1232 // This is the value of eip which points to where verify_oop will return. 1233 if (os::message_box(msg, "Execution stopped, print registers?")) { 1234 print_state64(pc, regs); 1235 BREAKPOINT; 1236 assert(false, "start up GDB"); 1237 } 1238 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 1239 } else { 1240 ttyLocker ttyl; 1241 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 1242 msg); 1243 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 1244 } 1245 } 1246 1247 void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) { 1248 ttyLocker ttyl; 1249 FlagSetting fs(Debugging, true); 1250 tty->print_cr("rip = 0x%016lx", pc); 1251 #ifndef PRODUCT 1252 tty->cr(); 1253 findpc(pc); 1254 tty->cr(); 1255 #endif 1256 #define PRINT_REG(rax, value) \ 1257 { tty->print("%s = ", #rax); os::print_location(tty, value); } 1258 PRINT_REG(rax, regs[15]); 1259 PRINT_REG(rbx, regs[12]); 1260 PRINT_REG(rcx, regs[14]); 1261 PRINT_REG(rdx, regs[13]); 1262 PRINT_REG(rdi, regs[8]); 1263 PRINT_REG(rsi, regs[9]); 1264 PRINT_REG(rbp, regs[10]); 1265 PRINT_REG(rsp, regs[11]); 1266 PRINT_REG(r8 , regs[7]); 1267 PRINT_REG(r9 , regs[6]); 1268 PRINT_REG(r10, regs[5]); 1269 PRINT_REG(r11, regs[4]); 1270 PRINT_REG(r12, regs[3]); 1271 PRINT_REG(r13, regs[2]); 1272 PRINT_REG(r14, regs[1]); 1273 PRINT_REG(r15, regs[0]); 1274 #undef PRINT_REG 1275 // Print some words near top of staack. 1276 int64_t* rsp = (int64_t*) regs[11]; 1277 int64_t* dump_sp = rsp; 1278 for (int col1 = 0; col1 < 8; col1++) { 1279 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); 1280 os::print_location(tty, *dump_sp++); 1281 } 1282 for (int row = 0; row < 25; row++) { 1283 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); 1284 for (int col = 0; col < 4; col++) { 1285 tty->print(" 0x%016lx", *dump_sp++); 1286 } 1287 tty->cr(); 1288 } 1289 // Print some instructions around pc: 1290 Disassembler::decode((address)pc-64, (address)pc); 1291 tty->print_cr("--------"); 1292 Disassembler::decode((address)pc, (address)pc+32); 1293 } 1294 1295 #endif // _LP64 1296 1297 // Now versions that are common to 32/64 bit 1298 1299 void MacroAssembler::addptr(Register dst, int32_t imm32) { 1300 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 1301 } 1302 1303 void MacroAssembler::addptr(Register dst, Register src) { 1304 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 1305 } 1306 1307 void MacroAssembler::addptr(Address dst, Register src) { 1308 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 1309 } 1310 1311 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { 1312 if (reachable(src)) { 1313 Assembler::addsd(dst, as_Address(src)); 1314 } else { 1315 lea(rscratch1, src); 1316 Assembler::addsd(dst, Address(rscratch1, 0)); 1317 } 1318 } 1319 1320 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { 1321 if (reachable(src)) { 1322 addss(dst, as_Address(src)); 1323 } else { 1324 lea(rscratch1, src); 1325 addss(dst, Address(rscratch1, 0)); 1326 } 1327 } 1328 1329 void MacroAssembler::align(int modulus) { 1330 if (offset() % modulus != 0) { 1331 nop(modulus - (offset() % modulus)); 1332 } 1333 } 1334 1335 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 1336 // Used in sign-masking with aligned address. 1337 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 1338 if (reachable(src)) { 1339 Assembler::andpd(dst, as_Address(src)); 1340 } else { 1341 lea(rscratch1, src); 1342 Assembler::andpd(dst, Address(rscratch1, 0)); 1343 } 1344 } 1345 1346 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { 1347 // Used in sign-masking with aligned address. 1348 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 1349 if (reachable(src)) { 1350 Assembler::andps(dst, as_Address(src)); 1351 } else { 1352 lea(rscratch1, src); 1353 Assembler::andps(dst, Address(rscratch1, 0)); 1354 } 1355 } 1356 1357 void MacroAssembler::andptr(Register dst, int32_t imm32) { 1358 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 1359 } 1360 1361 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 1362 pushf(); 1363 if (os::is_MP()) 1364 lock(); 1365 incrementl(counter_addr); 1366 popf(); 1367 } 1368 1369 // Writes to stack successive pages until offset reached to check for 1370 // stack overflow + shadow pages. This clobbers tmp. 1371 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 1372 movptr(tmp, rsp); 1373 // Bang stack for total size given plus shadow page size. 1374 // Bang one page at a time because large size can bang beyond yellow and 1375 // red zones. 1376 Label loop; 1377 bind(loop); 1378 movl(Address(tmp, (-os::vm_page_size())), size ); 1379 subptr(tmp, os::vm_page_size()); 1380 subl(size, os::vm_page_size()); 1381 jcc(Assembler::greater, loop); 1382 1383 // Bang down shadow pages too. 1384 // Skip the first one because that was already touched in the above 1385 // loop - the post decrement of temp means it's now a page below the 1386 // last touch 1387 for (int i = 1; i <= StackShadowPages; i++) { 1388 // this could be any sized move but this is can be a debugging crumb 1389 // so the bigger the better. 1390 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 1391 } 1392 } 1393 1394 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 1395 assert(UseBiasedLocking, "why call this otherwise?"); 1396 1397 // Check for biased locking unlock case, which is a no-op 1398 // Note: we do not have to check the thread ID for two reasons. 1399 // First, the interpreter checks for IllegalMonitorStateException at 1400 // a higher level. Second, if the bias was revoked while we held the 1401 // lock, the object could not be rebiased toward another thread, so 1402 // the bias bit would be clear. 1403 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 1404 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 1405 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 1406 jcc(Assembler::equal, done); 1407 } 1408 1409 void MacroAssembler::c2bool(Register x) { 1410 // implements x == 0 ? 0 : 1 1411 // note: must only look at least-significant byte of x 1412 // since C-style booleans are stored in one byte 1413 // only! (was bug) 1414 andl(x, 0xFF); 1415 setb(Assembler::notZero, x); 1416 } 1417 1418 // Wouldn't need if AddressLiteral version had new name 1419 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 1420 Assembler::call(L, rtype); 1421 } 1422 1423 void MacroAssembler::call(Register entry) { 1424 Assembler::call(entry); 1425 } 1426 1427 void MacroAssembler::call(AddressLiteral entry) { 1428 if (reachable(entry)) { 1429 Assembler::call_literal(entry.target(), entry.rspec()); 1430 } else { 1431 lea(rscratch1, entry); 1432 Assembler::call(rscratch1); 1433 } 1434 } 1435 1436 void MacroAssembler::ic_call(address entry) { 1437 RelocationHolder rh = virtual_call_Relocation::spec(pc()); 1438 movptr(rax, (intptr_t)Universe::non_oop_word()); 1439 call(AddressLiteral(entry, rh)); 1440 } 1441 1442 // Implementation of call_VM versions 1443 1444 void MacroAssembler::call_VM(Register oop_result, 1445 address entry_point, 1446 bool check_exceptions) { 1447 Label C, E; 1448 call(C, relocInfo::none); 1449 jmp(E); 1450 1451 bind(C); 1452 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 1453 ret(0); 1454 1455 bind(E); 1456 } 1457 1458 void MacroAssembler::call_VM(Register oop_result, 1459 address entry_point, 1460 Register arg_1, 1461 bool check_exceptions) { 1462 Label C, E; 1463 call(C, relocInfo::none); 1464 jmp(E); 1465 1466 bind(C); 1467 pass_arg1(this, arg_1); 1468 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 1469 ret(0); 1470 1471 bind(E); 1472 } 1473 1474 void MacroAssembler::call_VM(Register oop_result, 1475 address entry_point, 1476 Register arg_1, 1477 Register arg_2, 1478 bool check_exceptions) { 1479 Label C, E; 1480 call(C, relocInfo::none); 1481 jmp(E); 1482 1483 bind(C); 1484 1485 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1486 1487 pass_arg2(this, arg_2); 1488 pass_arg1(this, arg_1); 1489 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 1490 ret(0); 1491 1492 bind(E); 1493 } 1494 1495 void MacroAssembler::call_VM(Register oop_result, 1496 address entry_point, 1497 Register arg_1, 1498 Register arg_2, 1499 Register arg_3, 1500 bool check_exceptions) { 1501 Label C, E; 1502 call(C, relocInfo::none); 1503 jmp(E); 1504 1505 bind(C); 1506 1507 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 1508 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 1509 pass_arg3(this, arg_3); 1510 1511 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1512 pass_arg2(this, arg_2); 1513 1514 pass_arg1(this, arg_1); 1515 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 1516 ret(0); 1517 1518 bind(E); 1519 } 1520 1521 void MacroAssembler::call_VM(Register oop_result, 1522 Register last_java_sp, 1523 address entry_point, 1524 int number_of_arguments, 1525 bool check_exceptions) { 1526 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 1527 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 1528 } 1529 1530 void MacroAssembler::call_VM(Register oop_result, 1531 Register last_java_sp, 1532 address entry_point, 1533 Register arg_1, 1534 bool check_exceptions) { 1535 pass_arg1(this, arg_1); 1536 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 1537 } 1538 1539 void MacroAssembler::call_VM(Register oop_result, 1540 Register last_java_sp, 1541 address entry_point, 1542 Register arg_1, 1543 Register arg_2, 1544 bool check_exceptions) { 1545 1546 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1547 pass_arg2(this, arg_2); 1548 pass_arg1(this, arg_1); 1549 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 1550 } 1551 1552 void MacroAssembler::call_VM(Register oop_result, 1553 Register last_java_sp, 1554 address entry_point, 1555 Register arg_1, 1556 Register arg_2, 1557 Register arg_3, 1558 bool check_exceptions) { 1559 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 1560 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 1561 pass_arg3(this, arg_3); 1562 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1563 pass_arg2(this, arg_2); 1564 pass_arg1(this, arg_1); 1565 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 1566 } 1567 1568 void MacroAssembler::super_call_VM(Register oop_result, 1569 Register last_java_sp, 1570 address entry_point, 1571 int number_of_arguments, 1572 bool check_exceptions) { 1573 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 1574 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 1575 } 1576 1577 void MacroAssembler::super_call_VM(Register oop_result, 1578 Register last_java_sp, 1579 address entry_point, 1580 Register arg_1, 1581 bool check_exceptions) { 1582 pass_arg1(this, arg_1); 1583 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 1584 } 1585 1586 void MacroAssembler::super_call_VM(Register oop_result, 1587 Register last_java_sp, 1588 address entry_point, 1589 Register arg_1, 1590 Register arg_2, 1591 bool check_exceptions) { 1592 1593 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1594 pass_arg2(this, arg_2); 1595 pass_arg1(this, arg_1); 1596 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 1597 } 1598 1599 void MacroAssembler::super_call_VM(Register oop_result, 1600 Register last_java_sp, 1601 address entry_point, 1602 Register arg_1, 1603 Register arg_2, 1604 Register arg_3, 1605 bool check_exceptions) { 1606 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 1607 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 1608 pass_arg3(this, arg_3); 1609 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1610 pass_arg2(this, arg_2); 1611 pass_arg1(this, arg_1); 1612 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 1613 } 1614 1615 void MacroAssembler::call_VM_base(Register oop_result, 1616 Register java_thread, 1617 Register last_java_sp, 1618 address entry_point, 1619 int number_of_arguments, 1620 bool check_exceptions) { 1621 // determine java_thread register 1622 if (!java_thread->is_valid()) { 1623 #ifdef _LP64 1624 java_thread = r15_thread; 1625 #else 1626 java_thread = rdi; 1627 get_thread(java_thread); 1628 #endif // LP64 1629 } 1630 // determine last_java_sp register 1631 if (!last_java_sp->is_valid()) { 1632 last_java_sp = rsp; 1633 } 1634 // debugging support 1635 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 1636 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 1637 #ifdef ASSERT 1638 // TraceBytecodes does not use r12 but saves it over the call, so don't verify 1639 // r12 is the heapbase. 1640 LP64_ONLY(if ((UseCompressedOops || UseCompressedClassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");) 1641 #endif // ASSERT 1642 1643 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 1644 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 1645 1646 // push java thread (becomes first argument of C function) 1647 1648 NOT_LP64(push(java_thread); number_of_arguments++); 1649 LP64_ONLY(mov(c_rarg0, r15_thread)); 1650 1651 // set last Java frame before call 1652 assert(last_java_sp != rbp, "can't use ebp/rbp"); 1653 1654 // Only interpreter should have to set fp 1655 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 1656 1657 // do the call, remove parameters 1658 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 1659 1660 // restore the thread (cannot use the pushed argument since arguments 1661 // may be overwritten by C code generated by an optimizing compiler); 1662 // however can use the register value directly if it is callee saved. 1663 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 1664 // rdi & rsi (also r15) are callee saved -> nothing to do 1665 #ifdef ASSERT 1666 guarantee(java_thread != rax, "change this code"); 1667 push(rax); 1668 { Label L; 1669 get_thread(rax); 1670 cmpptr(java_thread, rax); 1671 jcc(Assembler::equal, L); 1672 STOP("MacroAssembler::call_VM_base: rdi not callee saved?"); 1673 bind(L); 1674 } 1675 pop(rax); 1676 #endif 1677 } else { 1678 get_thread(java_thread); 1679 } 1680 // reset last Java frame 1681 // Only interpreter should have to clear fp 1682 reset_last_Java_frame(java_thread, true, false); 1683 1684 #ifndef CC_INTERP 1685 // C++ interp handles this in the interpreter 1686 check_and_handle_popframe(java_thread); 1687 check_and_handle_earlyret(java_thread); 1688 #endif /* CC_INTERP */ 1689 1690 if (check_exceptions) { 1691 // check for pending exceptions (java_thread is set upon return) 1692 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 1693 #ifndef _LP64 1694 jump_cc(Assembler::notEqual, 1695 RuntimeAddress(StubRoutines::forward_exception_entry())); 1696 #else 1697 // This used to conditionally jump to forward_exception however it is 1698 // possible if we relocate that the branch will not reach. So we must jump 1699 // around so we can always reach 1700 1701 Label ok; 1702 jcc(Assembler::equal, ok); 1703 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 1704 bind(ok); 1705 #endif // LP64 1706 } 1707 1708 // get oop result if there is one and reset the value in the thread 1709 if (oop_result->is_valid()) { 1710 get_vm_result(oop_result, java_thread); 1711 } 1712 } 1713 1714 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 1715 1716 // Calculate the value for last_Java_sp 1717 // somewhat subtle. call_VM does an intermediate call 1718 // which places a return address on the stack just under the 1719 // stack pointer as the user finsihed with it. This allows 1720 // use to retrieve last_Java_pc from last_Java_sp[-1]. 1721 // On 32bit we then have to push additional args on the stack to accomplish 1722 // the actual requested call. On 64bit call_VM only can use register args 1723 // so the only extra space is the return address that call_VM created. 1724 // This hopefully explains the calculations here. 1725 1726 #ifdef _LP64 1727 // We've pushed one address, correct last_Java_sp 1728 lea(rax, Address(rsp, wordSize)); 1729 #else 1730 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 1731 #endif // LP64 1732 1733 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 1734 1735 } 1736 1737 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 1738 call_VM_leaf_base(entry_point, number_of_arguments); 1739 } 1740 1741 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 1742 pass_arg0(this, arg_0); 1743 call_VM_leaf(entry_point, 1); 1744 } 1745 1746 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 1747 1748 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1749 pass_arg1(this, arg_1); 1750 pass_arg0(this, arg_0); 1751 call_VM_leaf(entry_point, 2); 1752 } 1753 1754 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 1755 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 1756 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1757 pass_arg2(this, arg_2); 1758 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1759 pass_arg1(this, arg_1); 1760 pass_arg0(this, arg_0); 1761 call_VM_leaf(entry_point, 3); 1762 } 1763 1764 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 1765 pass_arg0(this, arg_0); 1766 MacroAssembler::call_VM_leaf_base(entry_point, 1); 1767 } 1768 1769 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 1770 1771 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1772 pass_arg1(this, arg_1); 1773 pass_arg0(this, arg_0); 1774 MacroAssembler::call_VM_leaf_base(entry_point, 2); 1775 } 1776 1777 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 1778 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 1779 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1780 pass_arg2(this, arg_2); 1781 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1782 pass_arg1(this, arg_1); 1783 pass_arg0(this, arg_0); 1784 MacroAssembler::call_VM_leaf_base(entry_point, 3); 1785 } 1786 1787 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 1788 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); 1789 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 1790 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 1791 pass_arg3(this, arg_3); 1792 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 1793 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 1794 pass_arg2(this, arg_2); 1795 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 1796 pass_arg1(this, arg_1); 1797 pass_arg0(this, arg_0); 1798 MacroAssembler::call_VM_leaf_base(entry_point, 4); 1799 } 1800 1801 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 1802 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 1803 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 1804 verify_oop(oop_result, "broken oop in call_VM_base"); 1805 } 1806 1807 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 1808 movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 1809 movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD); 1810 } 1811 1812 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 1813 } 1814 1815 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 1816 } 1817 1818 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 1819 if (reachable(src1)) { 1820 cmpl(as_Address(src1), imm); 1821 } else { 1822 lea(rscratch1, src1); 1823 cmpl(Address(rscratch1, 0), imm); 1824 } 1825 } 1826 1827 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 1828 assert(!src2.is_lval(), "use cmpptr"); 1829 if (reachable(src2)) { 1830 cmpl(src1, as_Address(src2)); 1831 } else { 1832 lea(rscratch1, src2); 1833 cmpl(src1, Address(rscratch1, 0)); 1834 } 1835 } 1836 1837 void MacroAssembler::cmp32(Register src1, int32_t imm) { 1838 Assembler::cmpl(src1, imm); 1839 } 1840 1841 void MacroAssembler::cmp32(Register src1, Address src2) { 1842 Assembler::cmpl(src1, src2); 1843 } 1844 1845 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 1846 ucomisd(opr1, opr2); 1847 1848 Label L; 1849 if (unordered_is_less) { 1850 movl(dst, -1); 1851 jcc(Assembler::parity, L); 1852 jcc(Assembler::below , L); 1853 movl(dst, 0); 1854 jcc(Assembler::equal , L); 1855 increment(dst); 1856 } else { // unordered is greater 1857 movl(dst, 1); 1858 jcc(Assembler::parity, L); 1859 jcc(Assembler::above , L); 1860 movl(dst, 0); 1861 jcc(Assembler::equal , L); 1862 decrementl(dst); 1863 } 1864 bind(L); 1865 } 1866 1867 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 1868 ucomiss(opr1, opr2); 1869 1870 Label L; 1871 if (unordered_is_less) { 1872 movl(dst, -1); 1873 jcc(Assembler::parity, L); 1874 jcc(Assembler::below , L); 1875 movl(dst, 0); 1876 jcc(Assembler::equal , L); 1877 increment(dst); 1878 } else { // unordered is greater 1879 movl(dst, 1); 1880 jcc(Assembler::parity, L); 1881 jcc(Assembler::above , L); 1882 movl(dst, 0); 1883 jcc(Assembler::equal , L); 1884 decrementl(dst); 1885 } 1886 bind(L); 1887 } 1888 1889 1890 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 1891 if (reachable(src1)) { 1892 cmpb(as_Address(src1), imm); 1893 } else { 1894 lea(rscratch1, src1); 1895 cmpb(Address(rscratch1, 0), imm); 1896 } 1897 } 1898 1899 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 1900 #ifdef _LP64 1901 if (src2.is_lval()) { 1902 movptr(rscratch1, src2); 1903 Assembler::cmpq(src1, rscratch1); 1904 } else if (reachable(src2)) { 1905 cmpq(src1, as_Address(src2)); 1906 } else { 1907 lea(rscratch1, src2); 1908 Assembler::cmpq(src1, Address(rscratch1, 0)); 1909 } 1910 #else 1911 if (src2.is_lval()) { 1912 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 1913 } else { 1914 cmpl(src1, as_Address(src2)); 1915 } 1916 #endif // _LP64 1917 } 1918 1919 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 1920 assert(src2.is_lval(), "not a mem-mem compare"); 1921 #ifdef _LP64 1922 // moves src2's literal address 1923 movptr(rscratch1, src2); 1924 Assembler::cmpq(src1, rscratch1); 1925 #else 1926 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 1927 #endif // _LP64 1928 } 1929 1930 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 1931 if (reachable(adr)) { 1932 if (os::is_MP()) 1933 lock(); 1934 cmpxchgptr(reg, as_Address(adr)); 1935 } else { 1936 lea(rscratch1, adr); 1937 if (os::is_MP()) 1938 lock(); 1939 cmpxchgptr(reg, Address(rscratch1, 0)); 1940 } 1941 } 1942 1943 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 1944 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 1945 } 1946 1947 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 1948 if (reachable(src)) { 1949 Assembler::comisd(dst, as_Address(src)); 1950 } else { 1951 lea(rscratch1, src); 1952 Assembler::comisd(dst, Address(rscratch1, 0)); 1953 } 1954 } 1955 1956 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 1957 if (reachable(src)) { 1958 Assembler::comiss(dst, as_Address(src)); 1959 } else { 1960 lea(rscratch1, src); 1961 Assembler::comiss(dst, Address(rscratch1, 0)); 1962 } 1963 } 1964 1965 1966 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 1967 Condition negated_cond = negate_condition(cond); 1968 Label L; 1969 jcc(negated_cond, L); 1970 atomic_incl(counter_addr); 1971 bind(L); 1972 } 1973 1974 int MacroAssembler::corrected_idivl(Register reg) { 1975 // Full implementation of Java idiv and irem; checks for 1976 // special case as described in JVM spec., p.243 & p.271. 1977 // The function returns the (pc) offset of the idivl 1978 // instruction - may be needed for implicit exceptions. 1979 // 1980 // normal case special case 1981 // 1982 // input : rax,: dividend min_int 1983 // reg: divisor (may not be rax,/rdx) -1 1984 // 1985 // output: rax,: quotient (= rax, idiv reg) min_int 1986 // rdx: remainder (= rax, irem reg) 0 1987 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 1988 const int min_int = 0x80000000; 1989 Label normal_case, special_case; 1990 1991 // check for special case 1992 cmpl(rax, min_int); 1993 jcc(Assembler::notEqual, normal_case); 1994 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 1995 cmpl(reg, -1); 1996 jcc(Assembler::equal, special_case); 1997 1998 // handle normal case 1999 bind(normal_case); 2000 cdql(); 2001 int idivl_offset = offset(); 2002 idivl(reg); 2003 2004 // normal and special case exit 2005 bind(special_case); 2006 2007 return idivl_offset; 2008 } 2009 2010 2011 2012 void MacroAssembler::decrementl(Register reg, int value) { 2013 if (value == min_jint) {subl(reg, value) ; return; } 2014 if (value < 0) { incrementl(reg, -value); return; } 2015 if (value == 0) { ; return; } 2016 if (value == 1 && UseIncDec) { decl(reg) ; return; } 2017 /* else */ { subl(reg, value) ; return; } 2018 } 2019 2020 void MacroAssembler::decrementl(Address dst, int value) { 2021 if (value == min_jint) {subl(dst, value) ; return; } 2022 if (value < 0) { incrementl(dst, -value); return; } 2023 if (value == 0) { ; return; } 2024 if (value == 1 && UseIncDec) { decl(dst) ; return; } 2025 /* else */ { subl(dst, value) ; return; } 2026 } 2027 2028 void MacroAssembler::division_with_shift (Register reg, int shift_value) { 2029 assert (shift_value > 0, "illegal shift value"); 2030 Label _is_positive; 2031 testl (reg, reg); 2032 jcc (Assembler::positive, _is_positive); 2033 int offset = (1 << shift_value) - 1 ; 2034 2035 if (offset == 1) { 2036 incrementl(reg); 2037 } else { 2038 addl(reg, offset); 2039 } 2040 2041 bind (_is_positive); 2042 sarl(reg, shift_value); 2043 } 2044 2045 void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { 2046 if (reachable(src)) { 2047 Assembler::divsd(dst, as_Address(src)); 2048 } else { 2049 lea(rscratch1, src); 2050 Assembler::divsd(dst, Address(rscratch1, 0)); 2051 } 2052 } 2053 2054 void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { 2055 if (reachable(src)) { 2056 Assembler::divss(dst, as_Address(src)); 2057 } else { 2058 lea(rscratch1, src); 2059 Assembler::divss(dst, Address(rscratch1, 0)); 2060 } 2061 } 2062 2063 // !defined(COMPILER2) is because of stupid core builds 2064 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 2065 void MacroAssembler::empty_FPU_stack() { 2066 if (VM_Version::supports_mmx()) { 2067 emms(); 2068 } else { 2069 for (int i = 8; i-- > 0; ) ffree(i); 2070 } 2071 } 2072 #endif // !LP64 || C1 || !C2 2073 2074 2075 // Defines obj, preserves var_size_in_bytes 2076 void MacroAssembler::eden_allocate(Register obj, 2077 Register var_size_in_bytes, 2078 int con_size_in_bytes, 2079 Register t1, 2080 Label& slow_case) { 2081 assert(obj == rax, "obj must be in rax, for cmpxchg"); 2082 assert_different_registers(obj, var_size_in_bytes, t1); 2083 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 2084 jmp(slow_case); 2085 } else { 2086 Register end = t1; 2087 Label retry; 2088 bind(retry); 2089 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 2090 movptr(obj, heap_top); 2091 if (var_size_in_bytes == noreg) { 2092 lea(end, Address(obj, con_size_in_bytes)); 2093 } else { 2094 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 2095 } 2096 // if end < obj then we wrapped around => object too long => slow case 2097 cmpptr(end, obj); 2098 jcc(Assembler::below, slow_case); 2099 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 2100 jcc(Assembler::above, slow_case); 2101 // Compare obj with the top addr, and if still equal, store the new top addr in 2102 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 2103 // it otherwise. Use lock prefix for atomicity on MPs. 2104 locked_cmpxchgptr(end, heap_top); 2105 jcc(Assembler::notEqual, retry); 2106 } 2107 } 2108 2109 void MacroAssembler::enter() { 2110 push(rbp); 2111 mov(rbp, rsp); 2112 } 2113 2114 // A 5 byte nop that is safe for patching (see patch_verified_entry) 2115 void MacroAssembler::fat_nop() { 2116 if (UseAddressNop) { 2117 addr_nop_5(); 2118 } else { 2119 emit_int8(0x26); // es: 2120 emit_int8(0x2e); // cs: 2121 emit_int8(0x64); // fs: 2122 emit_int8(0x65); // gs: 2123 emit_int8((unsigned char)0x90); 2124 } 2125 } 2126 2127 void MacroAssembler::fcmp(Register tmp) { 2128 fcmp(tmp, 1, true, true); 2129 } 2130 2131 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 2132 assert(!pop_right || pop_left, "usage error"); 2133 if (VM_Version::supports_cmov()) { 2134 assert(tmp == noreg, "unneeded temp"); 2135 if (pop_left) { 2136 fucomip(index); 2137 } else { 2138 fucomi(index); 2139 } 2140 if (pop_right) { 2141 fpop(); 2142 } 2143 } else { 2144 assert(tmp != noreg, "need temp"); 2145 if (pop_left) { 2146 if (pop_right) { 2147 fcompp(); 2148 } else { 2149 fcomp(index); 2150 } 2151 } else { 2152 fcom(index); 2153 } 2154 // convert FPU condition into eflags condition via rax, 2155 save_rax(tmp); 2156 fwait(); fnstsw_ax(); 2157 sahf(); 2158 restore_rax(tmp); 2159 } 2160 // condition codes set as follows: 2161 // 2162 // CF (corresponds to C0) if x < y 2163 // PF (corresponds to C2) if unordered 2164 // ZF (corresponds to C3) if x = y 2165 } 2166 2167 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 2168 fcmp2int(dst, unordered_is_less, 1, true, true); 2169 } 2170 2171 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 2172 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 2173 Label L; 2174 if (unordered_is_less) { 2175 movl(dst, -1); 2176 jcc(Assembler::parity, L); 2177 jcc(Assembler::below , L); 2178 movl(dst, 0); 2179 jcc(Assembler::equal , L); 2180 increment(dst); 2181 } else { // unordered is greater 2182 movl(dst, 1); 2183 jcc(Assembler::parity, L); 2184 jcc(Assembler::above , L); 2185 movl(dst, 0); 2186 jcc(Assembler::equal , L); 2187 decrementl(dst); 2188 } 2189 bind(L); 2190 } 2191 2192 void MacroAssembler::fld_d(AddressLiteral src) { 2193 fld_d(as_Address(src)); 2194 } 2195 2196 void MacroAssembler::fld_s(AddressLiteral src) { 2197 fld_s(as_Address(src)); 2198 } 2199 2200 void MacroAssembler::fld_x(AddressLiteral src) { 2201 Assembler::fld_x(as_Address(src)); 2202 } 2203 2204 void MacroAssembler::fldcw(AddressLiteral src) { 2205 Assembler::fldcw(as_Address(src)); 2206 } 2207 2208 void MacroAssembler::pow_exp_core_encoding() { 2209 // kills rax, rcx, rdx 2210 subptr(rsp,sizeof(jdouble)); 2211 // computes 2^X. Stack: X ... 2212 // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and 2213 // keep it on the thread's stack to compute 2^int(X) later 2214 // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) 2215 // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) 2216 fld_s(0); // Stack: X X ... 2217 frndint(); // Stack: int(X) X ... 2218 fsuba(1); // Stack: int(X) X-int(X) ... 2219 fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... 2220 f2xm1(); // Stack: 2^(X-int(X))-1 ... 2221 fld1(); // Stack: 1 2^(X-int(X))-1 ... 2222 faddp(1); // Stack: 2^(X-int(X)) 2223 // computes 2^(int(X)): add exponent bias (1023) to int(X), then 2224 // shift int(X)+1023 to exponent position. 2225 // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 2226 // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent 2227 // values so detect them and set result to NaN. 2228 movl(rax,Address(rsp,0)); 2229 movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding 2230 addl(rax, 1023); 2231 movl(rdx,rax); 2232 shll(rax,20); 2233 // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. 2234 addl(rdx,1); 2235 // Check that 1 < int(X)+1023+1 < 2048 2236 // in 3 steps: 2237 // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 2238 // 2- (int(X)+1023+1)&-2048 != 0 2239 // 3- (int(X)+1023+1)&-2048 != 1 2240 // Do 2- first because addl just updated the flags. 2241 cmov32(Assembler::equal,rax,rcx); 2242 cmpl(rdx,1); 2243 cmov32(Assembler::equal,rax,rcx); 2244 testl(rdx,rcx); 2245 cmov32(Assembler::notEqual,rax,rcx); 2246 movl(Address(rsp,4),rax); 2247 movl(Address(rsp,0),0); 2248 fmul_d(Address(rsp,0)); // Stack: 2^X ... 2249 addptr(rsp,sizeof(jdouble)); 2250 } 2251 2252 void MacroAssembler::increase_precision() { 2253 subptr(rsp, BytesPerWord); 2254 fnstcw(Address(rsp, 0)); 2255 movl(rax, Address(rsp, 0)); 2256 orl(rax, 0x300); 2257 push(rax); 2258 fldcw(Address(rsp, 0)); 2259 pop(rax); 2260 } 2261 2262 void MacroAssembler::restore_precision() { 2263 fldcw(Address(rsp, 0)); 2264 addptr(rsp, BytesPerWord); 2265 } 2266 2267 void MacroAssembler::fast_pow() { 2268 // computes X^Y = 2^(Y * log2(X)) 2269 // if fast computation is not possible, result is NaN. Requires 2270 // fallback from user of this macro. 2271 // increase precision for intermediate steps of the computation 2272 increase_precision(); 2273 fyl2x(); // Stack: (Y*log2(X)) ... 2274 pow_exp_core_encoding(); // Stack: exp(X) ... 2275 restore_precision(); 2276 } 2277 2278 void MacroAssembler::fast_exp() { 2279 // computes exp(X) = 2^(X * log2(e)) 2280 // if fast computation is not possible, result is NaN. Requires 2281 // fallback from user of this macro. 2282 // increase precision for intermediate steps of the computation 2283 increase_precision(); 2284 fldl2e(); // Stack: log2(e) X ... 2285 fmulp(1); // Stack: (X*log2(e)) ... 2286 pow_exp_core_encoding(); // Stack: exp(X) ... 2287 restore_precision(); 2288 } 2289 2290 void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { 2291 // kills rax, rcx, rdx 2292 // pow and exp needs 2 extra registers on the fpu stack. 2293 Label slow_case, done; 2294 Register tmp = noreg; 2295 if (!VM_Version::supports_cmov()) { 2296 // fcmp needs a temporary so preserve rdx, 2297 tmp = rdx; 2298 } 2299 Register tmp2 = rax; 2300 Register tmp3 = rcx; 2301 2302 if (is_exp) { 2303 // Stack: X 2304 fld_s(0); // duplicate argument for runtime call. Stack: X X 2305 fast_exp(); // Stack: exp(X) X 2306 fcmp(tmp, 0, false, false); // Stack: exp(X) X 2307 // exp(X) not equal to itself: exp(X) is NaN go to slow case. 2308 jcc(Assembler::parity, slow_case); 2309 // get rid of duplicate argument. Stack: exp(X) 2310 if (num_fpu_regs_in_use > 0) { 2311 fxch(); 2312 fpop(); 2313 } else { 2314 ffree(1); 2315 } 2316 jmp(done); 2317 } else { 2318 // Stack: X Y 2319 Label x_negative, y_odd; 2320 2321 fldz(); // Stack: 0 X Y 2322 fcmp(tmp, 1, true, false); // Stack: X Y 2323 jcc(Assembler::above, x_negative); 2324 2325 // X >= 0 2326 2327 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 2328 fld_s(1); // Stack: X Y X Y 2329 fast_pow(); // Stack: X^Y X Y 2330 fcmp(tmp, 0, false, false); // Stack: X^Y X Y 2331 // X^Y not equal to itself: X^Y is NaN go to slow case. 2332 jcc(Assembler::parity, slow_case); 2333 // get rid of duplicate arguments. Stack: X^Y 2334 if (num_fpu_regs_in_use > 0) { 2335 fxch(); fpop(); 2336 fxch(); fpop(); 2337 } else { 2338 ffree(2); 2339 ffree(1); 2340 } 2341 jmp(done); 2342 2343 // X <= 0 2344 bind(x_negative); 2345 2346 fld_s(1); // Stack: Y X Y 2347 frndint(); // Stack: int(Y) X Y 2348 fcmp(tmp, 2, false, false); // Stack: int(Y) X Y 2349 jcc(Assembler::notEqual, slow_case); 2350 2351 subptr(rsp, 8); 2352 2353 // For X^Y, when X < 0, Y has to be an integer and the final 2354 // result depends on whether it's odd or even. We just checked 2355 // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit 2356 // integer to test its parity. If int(Y) is huge and doesn't fit 2357 // in the 64 bit integer range, the integer indefinite value will 2358 // end up in the gp registers. Huge numbers are all even, the 2359 // integer indefinite number is even so it's fine. 2360 2361 #ifdef ASSERT 2362 // Let's check we don't end up with an integer indefinite number 2363 // when not expected. First test for huge numbers: check whether 2364 // int(Y)+1 == int(Y) which is true for very large numbers and 2365 // those are all even. A 64 bit integer is guaranteed to not 2366 // overflow for numbers where y+1 != y (when precision is set to 2367 // double precision). 2368 Label y_not_huge; 2369 2370 fld1(); // Stack: 1 int(Y) X Y 2371 fadd(1); // Stack: 1+int(Y) int(Y) X Y 2372 2373 #ifdef _LP64 2374 // trip to memory to force the precision down from double extended 2375 // precision 2376 fstp_d(Address(rsp, 0)); 2377 fld_d(Address(rsp, 0)); 2378 #endif 2379 2380 fcmp(tmp, 1, true, false); // Stack: int(Y) X Y 2381 #endif 2382 2383 // move int(Y) as 64 bit integer to thread's stack 2384 fistp_d(Address(rsp,0)); // Stack: X Y 2385 2386 #ifdef ASSERT 2387 jcc(Assembler::notEqual, y_not_huge); 2388 2389 // Y is huge so we know it's even. It may not fit in a 64 bit 2390 // integer and we don't want the debug code below to see the 2391 // integer indefinite value so overwrite int(Y) on the thread's 2392 // stack with 0. 2393 movl(Address(rsp, 0), 0); 2394 movl(Address(rsp, 4), 0); 2395 2396 bind(y_not_huge); 2397 #endif 2398 2399 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 2400 fld_s(1); // Stack: X Y X Y 2401 fabs(); // Stack: abs(X) Y X Y 2402 fast_pow(); // Stack: abs(X)^Y X Y 2403 fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y 2404 // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. 2405 2406 pop(tmp2); 2407 NOT_LP64(pop(tmp3)); 2408 jcc(Assembler::parity, slow_case); 2409 2410 #ifdef ASSERT 2411 // Check that int(Y) is not integer indefinite value (int 2412 // overflow). Shouldn't happen because for values that would 2413 // overflow, 1+int(Y)==Y which was tested earlier. 2414 #ifndef _LP64 2415 { 2416 Label integer; 2417 testl(tmp2, tmp2); 2418 jcc(Assembler::notZero, integer); 2419 cmpl(tmp3, 0x80000000); 2420 jcc(Assembler::notZero, integer); 2421 STOP("integer indefinite value shouldn't be seen here"); 2422 bind(integer); 2423 } 2424 #else 2425 { 2426 Label integer; 2427 mov(tmp3, tmp2); // preserve tmp2 for parity check below 2428 shlq(tmp3, 1); 2429 jcc(Assembler::carryClear, integer); 2430 jcc(Assembler::notZero, integer); 2431 STOP("integer indefinite value shouldn't be seen here"); 2432 bind(integer); 2433 } 2434 #endif 2435 #endif 2436 2437 // get rid of duplicate arguments. Stack: X^Y 2438 if (num_fpu_regs_in_use > 0) { 2439 fxch(); fpop(); 2440 fxch(); fpop(); 2441 } else { 2442 ffree(2); 2443 ffree(1); 2444 } 2445 2446 testl(tmp2, 1); 2447 jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y 2448 // X <= 0, Y even: X^Y = -abs(X)^Y 2449 2450 fchs(); // Stack: -abs(X)^Y Y 2451 jmp(done); 2452 } 2453 2454 // slow case: runtime call 2455 bind(slow_case); 2456 2457 fpop(); // pop incorrect result or int(Y) 2458 2459 fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 2460 is_exp ? 1 : 2, num_fpu_regs_in_use); 2461 2462 // Come here with result in F-TOS 2463 bind(done); 2464 } 2465 2466 void MacroAssembler::fpop() { 2467 ffree(); 2468 fincstp(); 2469 } 2470 2471 void MacroAssembler::fremr(Register tmp) { 2472 save_rax(tmp); 2473 { Label L; 2474 bind(L); 2475 fprem(); 2476 fwait(); fnstsw_ax(); 2477 #ifdef _LP64 2478 testl(rax, 0x400); 2479 jcc(Assembler::notEqual, L); 2480 #else 2481 sahf(); 2482 jcc(Assembler::parity, L); 2483 #endif // _LP64 2484 } 2485 restore_rax(tmp); 2486 // Result is in ST0. 2487 // Note: fxch & fpop to get rid of ST1 2488 // (otherwise FPU stack could overflow eventually) 2489 fxch(1); 2490 fpop(); 2491 } 2492 2493 2494 void MacroAssembler::incrementl(AddressLiteral dst) { 2495 if (reachable(dst)) { 2496 incrementl(as_Address(dst)); 2497 } else { 2498 lea(rscratch1, dst); 2499 incrementl(Address(rscratch1, 0)); 2500 } 2501 } 2502 2503 void MacroAssembler::incrementl(ArrayAddress dst) { 2504 incrementl(as_Address(dst)); 2505 } 2506 2507 void MacroAssembler::incrementl(Register reg, int value) { 2508 if (value == min_jint) {addl(reg, value) ; return; } 2509 if (value < 0) { decrementl(reg, -value); return; } 2510 if (value == 0) { ; return; } 2511 if (value == 1 && UseIncDec) { incl(reg) ; return; } 2512 /* else */ { addl(reg, value) ; return; } 2513 } 2514 2515 void MacroAssembler::incrementl(Address dst, int value) { 2516 if (value == min_jint) {addl(dst, value) ; return; } 2517 if (value < 0) { decrementl(dst, -value); return; } 2518 if (value == 0) { ; return; } 2519 if (value == 1 && UseIncDec) { incl(dst) ; return; } 2520 /* else */ { addl(dst, value) ; return; } 2521 } 2522 2523 void MacroAssembler::jump(AddressLiteral dst) { 2524 if (reachable(dst)) { 2525 jmp_literal(dst.target(), dst.rspec()); 2526 } else { 2527 lea(rscratch1, dst); 2528 jmp(rscratch1); 2529 } 2530 } 2531 2532 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 2533 if (reachable(dst)) { 2534 InstructionMark im(this); 2535 relocate(dst.reloc()); 2536 const int short_size = 2; 2537 const int long_size = 6; 2538 int offs = (intptr_t)dst.target() - ((intptr_t)pc()); 2539 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 2540 // 0111 tttn #8-bit disp 2541 emit_int8(0x70 | cc); 2542 emit_int8((offs - short_size) & 0xFF); 2543 } else { 2544 // 0000 1111 1000 tttn #32-bit disp 2545 emit_int8(0x0F); 2546 emit_int8((unsigned char)(0x80 | cc)); 2547 emit_int32(offs - long_size); 2548 } 2549 } else { 2550 #ifdef ASSERT 2551 warning("reversing conditional branch"); 2552 #endif /* ASSERT */ 2553 Label skip; 2554 jccb(reverse[cc], skip); 2555 lea(rscratch1, dst); 2556 Assembler::jmp(rscratch1); 2557 bind(skip); 2558 } 2559 } 2560 2561 void MacroAssembler::ldmxcsr(AddressLiteral src) { 2562 if (reachable(src)) { 2563 Assembler::ldmxcsr(as_Address(src)); 2564 } else { 2565 lea(rscratch1, src); 2566 Assembler::ldmxcsr(Address(rscratch1, 0)); 2567 } 2568 } 2569 2570 int MacroAssembler::load_signed_byte(Register dst, Address src) { 2571 int off; 2572 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 2573 off = offset(); 2574 movsbl(dst, src); // movsxb 2575 } else { 2576 off = load_unsigned_byte(dst, src); 2577 shll(dst, 24); 2578 sarl(dst, 24); 2579 } 2580 return off; 2581 } 2582 2583 // Note: load_signed_short used to be called load_signed_word. 2584 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler 2585 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. 2586 // The term "word" in HotSpot means a 32- or 64-bit machine word. 2587 int MacroAssembler::load_signed_short(Register dst, Address src) { 2588 int off; 2589 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 2590 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 2591 // version but this is what 64bit has always done. This seems to imply 2592 // that users are only using 32bits worth. 2593 off = offset(); 2594 movswl(dst, src); // movsxw 2595 } else { 2596 off = load_unsigned_short(dst, src); 2597 shll(dst, 16); 2598 sarl(dst, 16); 2599 } 2600 return off; 2601 } 2602 2603 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 2604 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 2605 // and "3.9 Partial Register Penalties", p. 22). 2606 int off; 2607 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 2608 off = offset(); 2609 movzbl(dst, src); // movzxb 2610 } else { 2611 xorl(dst, dst); 2612 off = offset(); 2613 movb(dst, src); 2614 } 2615 return off; 2616 } 2617 2618 // Note: load_unsigned_short used to be called load_unsigned_word. 2619 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 2620 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 2621 // and "3.9 Partial Register Penalties", p. 22). 2622 int off; 2623 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 2624 off = offset(); 2625 movzwl(dst, src); // movzxw 2626 } else { 2627 xorl(dst, dst); 2628 off = offset(); 2629 movw(dst, src); 2630 } 2631 return off; 2632 } 2633 2634 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 2635 switch (size_in_bytes) { 2636 #ifndef _LP64 2637 case 8: 2638 assert(dst2 != noreg, "second dest register required"); 2639 movl(dst, src); 2640 movl(dst2, src.plus_disp(BytesPerInt)); 2641 break; 2642 #else 2643 case 8: movq(dst, src); break; 2644 #endif 2645 case 4: movl(dst, src); break; 2646 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 2647 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 2648 default: ShouldNotReachHere(); 2649 } 2650 } 2651 2652 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 2653 switch (size_in_bytes) { 2654 #ifndef _LP64 2655 case 8: 2656 assert(src2 != noreg, "second source register required"); 2657 movl(dst, src); 2658 movl(dst.plus_disp(BytesPerInt), src2); 2659 break; 2660 #else 2661 case 8: movq(dst, src); break; 2662 #endif 2663 case 4: movl(dst, src); break; 2664 case 2: movw(dst, src); break; 2665 case 1: movb(dst, src); break; 2666 default: ShouldNotReachHere(); 2667 } 2668 } 2669 2670 void MacroAssembler::mov32(AddressLiteral dst, Register src) { 2671 if (reachable(dst)) { 2672 movl(as_Address(dst), src); 2673 } else { 2674 lea(rscratch1, dst); 2675 movl(Address(rscratch1, 0), src); 2676 } 2677 } 2678 2679 void MacroAssembler::mov32(Register dst, AddressLiteral src) { 2680 if (reachable(src)) { 2681 movl(dst, as_Address(src)); 2682 } else { 2683 lea(rscratch1, src); 2684 movl(dst, Address(rscratch1, 0)); 2685 } 2686 } 2687 2688 // C++ bool manipulation 2689 2690 void MacroAssembler::movbool(Register dst, Address src) { 2691 if(sizeof(bool) == 1) 2692 movb(dst, src); 2693 else if(sizeof(bool) == 2) 2694 movw(dst, src); 2695 else if(sizeof(bool) == 4) 2696 movl(dst, src); 2697 else 2698 // unsupported 2699 ShouldNotReachHere(); 2700 } 2701 2702 void MacroAssembler::movbool(Address dst, bool boolconst) { 2703 if(sizeof(bool) == 1) 2704 movb(dst, (int) boolconst); 2705 else if(sizeof(bool) == 2) 2706 movw(dst, (int) boolconst); 2707 else if(sizeof(bool) == 4) 2708 movl(dst, (int) boolconst); 2709 else 2710 // unsupported 2711 ShouldNotReachHere(); 2712 } 2713 2714 void MacroAssembler::movbool(Address dst, Register src) { 2715 if(sizeof(bool) == 1) 2716 movb(dst, src); 2717 else if(sizeof(bool) == 2) 2718 movw(dst, src); 2719 else if(sizeof(bool) == 4) 2720 movl(dst, src); 2721 else 2722 // unsupported 2723 ShouldNotReachHere(); 2724 } 2725 2726 void MacroAssembler::movbyte(ArrayAddress dst, int src) { 2727 movb(as_Address(dst), src); 2728 } 2729 2730 void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) { 2731 if (reachable(src)) { 2732 movdl(dst, as_Address(src)); 2733 } else { 2734 lea(rscratch1, src); 2735 movdl(dst, Address(rscratch1, 0)); 2736 } 2737 } 2738 2739 void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) { 2740 if (reachable(src)) { 2741 movq(dst, as_Address(src)); 2742 } else { 2743 lea(rscratch1, src); 2744 movq(dst, Address(rscratch1, 0)); 2745 } 2746 } 2747 2748 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 2749 if (reachable(src)) { 2750 if (UseXmmLoadAndClearUpper) { 2751 movsd (dst, as_Address(src)); 2752 } else { 2753 movlpd(dst, as_Address(src)); 2754 } 2755 } else { 2756 lea(rscratch1, src); 2757 if (UseXmmLoadAndClearUpper) { 2758 movsd (dst, Address(rscratch1, 0)); 2759 } else { 2760 movlpd(dst, Address(rscratch1, 0)); 2761 } 2762 } 2763 } 2764 2765 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 2766 if (reachable(src)) { 2767 movss(dst, as_Address(src)); 2768 } else { 2769 lea(rscratch1, src); 2770 movss(dst, Address(rscratch1, 0)); 2771 } 2772 } 2773 2774 void MacroAssembler::movptr(Register dst, Register src) { 2775 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 2776 } 2777 2778 void MacroAssembler::movptr(Register dst, Address src) { 2779 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 2780 } 2781 2782 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 2783 void MacroAssembler::movptr(Register dst, intptr_t src) { 2784 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 2785 } 2786 2787 void MacroAssembler::movptr(Address dst, Register src) { 2788 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 2789 } 2790 2791 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) { 2792 if (reachable(src)) { 2793 Assembler::movdqu(dst, as_Address(src)); 2794 } else { 2795 lea(rscratch1, src); 2796 Assembler::movdqu(dst, Address(rscratch1, 0)); 2797 } 2798 } 2799 2800 void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) { 2801 if (reachable(src)) { 2802 Assembler::movdqa(dst, as_Address(src)); 2803 } else { 2804 lea(rscratch1, src); 2805 Assembler::movdqa(dst, Address(rscratch1, 0)); 2806 } 2807 } 2808 2809 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 2810 if (reachable(src)) { 2811 Assembler::movsd(dst, as_Address(src)); 2812 } else { 2813 lea(rscratch1, src); 2814 Assembler::movsd(dst, Address(rscratch1, 0)); 2815 } 2816 } 2817 2818 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 2819 if (reachable(src)) { 2820 Assembler::movss(dst, as_Address(src)); 2821 } else { 2822 lea(rscratch1, src); 2823 Assembler::movss(dst, Address(rscratch1, 0)); 2824 } 2825 } 2826 2827 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { 2828 if (reachable(src)) { 2829 Assembler::mulsd(dst, as_Address(src)); 2830 } else { 2831 lea(rscratch1, src); 2832 Assembler::mulsd(dst, Address(rscratch1, 0)); 2833 } 2834 } 2835 2836 void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { 2837 if (reachable(src)) { 2838 Assembler::mulss(dst, as_Address(src)); 2839 } else { 2840 lea(rscratch1, src); 2841 Assembler::mulss(dst, Address(rscratch1, 0)); 2842 } 2843 } 2844 2845 void MacroAssembler::null_check(Register reg, int offset) { 2846 if (needs_explicit_null_check(offset)) { 2847 // provoke OS NULL exception if reg = NULL by 2848 // accessing M[reg] w/o changing any (non-CC) registers 2849 // NOTE: cmpl is plenty here to provoke a segv 2850 cmpptr(rax, Address(reg, 0)); 2851 // Note: should probably use testl(rax, Address(reg, 0)); 2852 // may be shorter code (however, this version of 2853 // testl needs to be implemented first) 2854 } else { 2855 // nothing to do, (later) access of M[reg + offset] 2856 // will provoke OS NULL exception if reg = NULL 2857 } 2858 } 2859 2860 void MacroAssembler::os_breakpoint() { 2861 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 2862 // (e.g., MSVC can't call ps() otherwise) 2863 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 2864 } 2865 2866 void MacroAssembler::pop_CPU_state() { 2867 pop_FPU_state(); 2868 pop_IU_state(); 2869 } 2870 2871 void MacroAssembler::pop_FPU_state() { 2872 NOT_LP64(frstor(Address(rsp, 0));) 2873 LP64_ONLY(fxrstor(Address(rsp, 0));) 2874 addptr(rsp, FPUStateSizeInWords * wordSize); 2875 } 2876 2877 void MacroAssembler::pop_IU_state() { 2878 popa(); 2879 LP64_ONLY(addq(rsp, 8)); 2880 popf(); 2881 } 2882 2883 // Save Integer and Float state 2884 // Warning: Stack must be 16 byte aligned (64bit) 2885 void MacroAssembler::push_CPU_state() { 2886 push_IU_state(); 2887 push_FPU_state(); 2888 } 2889 2890 void MacroAssembler::push_FPU_state() { 2891 subptr(rsp, FPUStateSizeInWords * wordSize); 2892 #ifndef _LP64 2893 fnsave(Address(rsp, 0)); 2894 fwait(); 2895 #else 2896 fxsave(Address(rsp, 0)); 2897 #endif // LP64 2898 } 2899 2900 void MacroAssembler::push_IU_state() { 2901 // Push flags first because pusha kills them 2902 pushf(); 2903 // Make sure rsp stays 16-byte aligned 2904 LP64_ONLY(subq(rsp, 8)); 2905 pusha(); 2906 } 2907 2908 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 2909 // determine java_thread register 2910 if (!java_thread->is_valid()) { 2911 java_thread = rdi; 2912 get_thread(java_thread); 2913 } 2914 // we must set sp to zero to clear frame 2915 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 2916 if (clear_fp) { 2917 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 2918 } 2919 2920 if (clear_pc) 2921 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 2922 2923 } 2924 2925 void MacroAssembler::restore_rax(Register tmp) { 2926 if (tmp == noreg) pop(rax); 2927 else if (tmp != rax) mov(rax, tmp); 2928 } 2929 2930 void MacroAssembler::round_to(Register reg, int modulus) { 2931 addptr(reg, modulus - 1); 2932 andptr(reg, -modulus); 2933 } 2934 2935 void MacroAssembler::save_rax(Register tmp) { 2936 if (tmp == noreg) push(rax); 2937 else if (tmp != rax) mov(tmp, rax); 2938 } 2939 2940 // Write serialization page so VM thread can do a pseudo remote membar. 2941 // We use the current thread pointer to calculate a thread specific 2942 // offset to write to within the page. This minimizes bus traffic 2943 // due to cache line collision. 2944 void MacroAssembler::serialize_memory(Register thread, Register tmp) { 2945 movl(tmp, thread); 2946 shrl(tmp, os::get_serialize_page_shift_count()); 2947 andl(tmp, (os::vm_page_size() - sizeof(int))); 2948 2949 Address index(noreg, tmp, Address::times_1); 2950 ExternalAddress page(os::get_memory_serialize_page()); 2951 2952 // Size of store must match masking code above 2953 movl(as_Address(ArrayAddress(page, index)), tmp); 2954 } 2955 2956 // Calls to C land 2957 // 2958 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded 2959 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 2960 // has to be reset to 0. This is required to allow proper stack traversal. 2961 void MacroAssembler::set_last_Java_frame(Register java_thread, 2962 Register last_java_sp, 2963 Register last_java_fp, 2964 address last_java_pc) { 2965 // determine java_thread register 2966 if (!java_thread->is_valid()) { 2967 java_thread = rdi; 2968 get_thread(java_thread); 2969 } 2970 // determine last_java_sp register 2971 if (!last_java_sp->is_valid()) { 2972 last_java_sp = rsp; 2973 } 2974 2975 // last_java_fp is optional 2976 2977 if (last_java_fp->is_valid()) { 2978 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 2979 } 2980 2981 // last_java_pc is optional 2982 2983 if (last_java_pc != NULL) { 2984 lea(Address(java_thread, 2985 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 2986 InternalAddress(last_java_pc)); 2987 2988 } 2989 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 2990 } 2991 2992 void MacroAssembler::shlptr(Register dst, int imm8) { 2993 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 2994 } 2995 2996 void MacroAssembler::shrptr(Register dst, int imm8) { 2997 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 2998 } 2999 3000 void MacroAssembler::sign_extend_byte(Register reg) { 3001 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 3002 movsbl(reg, reg); // movsxb 3003 } else { 3004 shll(reg, 24); 3005 sarl(reg, 24); 3006 } 3007 } 3008 3009 void MacroAssembler::sign_extend_short(Register reg) { 3010 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 3011 movswl(reg, reg); // movsxw 3012 } else { 3013 shll(reg, 16); 3014 sarl(reg, 16); 3015 } 3016 } 3017 3018 void MacroAssembler::testl(Register dst, AddressLiteral src) { 3019 assert(reachable(src), "Address should be reachable"); 3020 testl(dst, as_Address(src)); 3021 } 3022 3023 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { 3024 if (reachable(src)) { 3025 Assembler::sqrtsd(dst, as_Address(src)); 3026 } else { 3027 lea(rscratch1, src); 3028 Assembler::sqrtsd(dst, Address(rscratch1, 0)); 3029 } 3030 } 3031 3032 void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { 3033 if (reachable(src)) { 3034 Assembler::sqrtss(dst, as_Address(src)); 3035 } else { 3036 lea(rscratch1, src); 3037 Assembler::sqrtss(dst, Address(rscratch1, 0)); 3038 } 3039 } 3040 3041 void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { 3042 if (reachable(src)) { 3043 Assembler::subsd(dst, as_Address(src)); 3044 } else { 3045 lea(rscratch1, src); 3046 Assembler::subsd(dst, Address(rscratch1, 0)); 3047 } 3048 } 3049 3050 void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { 3051 if (reachable(src)) { 3052 Assembler::subss(dst, as_Address(src)); 3053 } else { 3054 lea(rscratch1, src); 3055 Assembler::subss(dst, Address(rscratch1, 0)); 3056 } 3057 } 3058 3059 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 3060 if (reachable(src)) { 3061 Assembler::ucomisd(dst, as_Address(src)); 3062 } else { 3063 lea(rscratch1, src); 3064 Assembler::ucomisd(dst, Address(rscratch1, 0)); 3065 } 3066 } 3067 3068 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 3069 if (reachable(src)) { 3070 Assembler::ucomiss(dst, as_Address(src)); 3071 } else { 3072 lea(rscratch1, src); 3073 Assembler::ucomiss(dst, Address(rscratch1, 0)); 3074 } 3075 } 3076 3077 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 3078 // Used in sign-bit flipping with aligned address. 3079 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 3080 if (reachable(src)) { 3081 Assembler::xorpd(dst, as_Address(src)); 3082 } else { 3083 lea(rscratch1, src); 3084 Assembler::xorpd(dst, Address(rscratch1, 0)); 3085 } 3086 } 3087 3088 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 3089 // Used in sign-bit flipping with aligned address. 3090 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 3091 if (reachable(src)) { 3092 Assembler::xorps(dst, as_Address(src)); 3093 } else { 3094 lea(rscratch1, src); 3095 Assembler::xorps(dst, Address(rscratch1, 0)); 3096 } 3097 } 3098 3099 void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) { 3100 // Used in sign-bit flipping with aligned address. 3101 bool aligned_adr = (((intptr_t)src.target() & 15) == 0); 3102 assert((UseAVX > 0) || aligned_adr, "SSE mode requires address alignment 16 bytes"); 3103 if (reachable(src)) { 3104 Assembler::pshufb(dst, as_Address(src)); 3105 } else { 3106 lea(rscratch1, src); 3107 Assembler::pshufb(dst, Address(rscratch1, 0)); 3108 } 3109 } 3110 3111 // AVX 3-operands instructions 3112 3113 void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3114 if (reachable(src)) { 3115 vaddsd(dst, nds, as_Address(src)); 3116 } else { 3117 lea(rscratch1, src); 3118 vaddsd(dst, nds, Address(rscratch1, 0)); 3119 } 3120 } 3121 3122 void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3123 if (reachable(src)) { 3124 vaddss(dst, nds, as_Address(src)); 3125 } else { 3126 lea(rscratch1, src); 3127 vaddss(dst, nds, Address(rscratch1, 0)); 3128 } 3129 } 3130 3131 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 3132 if (reachable(src)) { 3133 vandpd(dst, nds, as_Address(src), vector256); 3134 } else { 3135 lea(rscratch1, src); 3136 vandpd(dst, nds, Address(rscratch1, 0), vector256); 3137 } 3138 } 3139 3140 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 3141 if (reachable(src)) { 3142 vandps(dst, nds, as_Address(src), vector256); 3143 } else { 3144 lea(rscratch1, src); 3145 vandps(dst, nds, Address(rscratch1, 0), vector256); 3146 } 3147 } 3148 3149 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3150 if (reachable(src)) { 3151 vdivsd(dst, nds, as_Address(src)); 3152 } else { 3153 lea(rscratch1, src); 3154 vdivsd(dst, nds, Address(rscratch1, 0)); 3155 } 3156 } 3157 3158 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3159 if (reachable(src)) { 3160 vdivss(dst, nds, as_Address(src)); 3161 } else { 3162 lea(rscratch1, src); 3163 vdivss(dst, nds, Address(rscratch1, 0)); 3164 } 3165 } 3166 3167 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3168 if (reachable(src)) { 3169 vmulsd(dst, nds, as_Address(src)); 3170 } else { 3171 lea(rscratch1, src); 3172 vmulsd(dst, nds, Address(rscratch1, 0)); 3173 } 3174 } 3175 3176 void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3177 if (reachable(src)) { 3178 vmulss(dst, nds, as_Address(src)); 3179 } else { 3180 lea(rscratch1, src); 3181 vmulss(dst, nds, Address(rscratch1, 0)); 3182 } 3183 } 3184 3185 void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3186 if (reachable(src)) { 3187 vsubsd(dst, nds, as_Address(src)); 3188 } else { 3189 lea(rscratch1, src); 3190 vsubsd(dst, nds, Address(rscratch1, 0)); 3191 } 3192 } 3193 3194 void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 3195 if (reachable(src)) { 3196 vsubss(dst, nds, as_Address(src)); 3197 } else { 3198 lea(rscratch1, src); 3199 vsubss(dst, nds, Address(rscratch1, 0)); 3200 } 3201 } 3202 3203 void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 3204 if (reachable(src)) { 3205 vxorpd(dst, nds, as_Address(src), vector256); 3206 } else { 3207 lea(rscratch1, src); 3208 vxorpd(dst, nds, Address(rscratch1, 0), vector256); 3209 } 3210 } 3211 3212 void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 3213 if (reachable(src)) { 3214 vxorps(dst, nds, as_Address(src), vector256); 3215 } else { 3216 lea(rscratch1, src); 3217 vxorps(dst, nds, Address(rscratch1, 0), vector256); 3218 } 3219 } 3220 3221 3222 ////////////////////////////////////////////////////////////////////////////////// 3223 #if INCLUDE_ALL_GCS 3224 3225 void MacroAssembler::g1_write_barrier_pre(Register obj, 3226 Register pre_val, 3227 Register thread, 3228 Register tmp, 3229 bool tosca_live, 3230 bool expand_call) { 3231 3232 // If expand_call is true then we expand the call_VM_leaf macro 3233 // directly to skip generating the check by 3234 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 3235 3236 #ifdef _LP64 3237 assert(thread == r15_thread, "must be"); 3238 #endif // _LP64 3239 3240 Label done; 3241 Label runtime; 3242 3243 assert(pre_val != noreg, "check this code"); 3244 3245 if (obj != noreg) { 3246 assert_different_registers(obj, pre_val, tmp); 3247 assert(pre_val != rax, "check this code"); 3248 } 3249 3250 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 3251 PtrQueue::byte_offset_of_active())); 3252 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 3253 PtrQueue::byte_offset_of_index())); 3254 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 3255 PtrQueue::byte_offset_of_buf())); 3256 3257 3258 // Is marking active? 3259 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 3260 cmpl(in_progress, 0); 3261 } else { 3262 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 3263 cmpb(in_progress, 0); 3264 } 3265 jcc(Assembler::equal, done); 3266 3267 // Do we need to load the previous value? 3268 if (obj != noreg) { 3269 load_heap_oop(pre_val, Address(obj, 0)); 3270 } 3271 3272 // Is the previous value null? 3273 cmpptr(pre_val, (int32_t) NULL_WORD); 3274 jcc(Assembler::equal, done); 3275 3276 // Can we store original value in the thread's buffer? 3277 // Is index == 0? 3278 // (The index field is typed as size_t.) 3279 3280 movptr(tmp, index); // tmp := *index_adr 3281 cmpptr(tmp, 0); // tmp == 0? 3282 jcc(Assembler::equal, runtime); // If yes, goto runtime 3283 3284 subptr(tmp, wordSize); // tmp := tmp - wordSize 3285 movptr(index, tmp); // *index_adr := tmp 3286 addptr(tmp, buffer); // tmp := tmp + *buffer_adr 3287 3288 // Record the previous value 3289 movptr(Address(tmp, 0), pre_val); 3290 jmp(done); 3291 3292 bind(runtime); 3293 // save the live input values 3294 if(tosca_live) push(rax); 3295 3296 if (obj != noreg && obj != rax) 3297 push(obj); 3298 3299 if (pre_val != rax) 3300 push(pre_val); 3301 3302 // Calling the runtime using the regular call_VM_leaf mechanism generates 3303 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 3304 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 3305 // 3306 // If we care generating the pre-barrier without a frame (e.g. in the 3307 // intrinsified Reference.get() routine) then ebp might be pointing to 3308 // the caller frame and so this check will most likely fail at runtime. 3309 // 3310 // Expanding the call directly bypasses the generation of the check. 3311 // So when we do not have have a full interpreter frame on the stack 3312 // expand_call should be passed true. 3313 3314 NOT_LP64( push(thread); ) 3315 3316 if (expand_call) { 3317 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 3318 pass_arg1(this, thread); 3319 pass_arg0(this, pre_val); 3320 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 3321 } else { 3322 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 3323 } 3324 3325 NOT_LP64( pop(thread); ) 3326 3327 // save the live input values 3328 if (pre_val != rax) 3329 pop(pre_val); 3330 3331 if (obj != noreg && obj != rax) 3332 pop(obj); 3333 3334 if(tosca_live) pop(rax); 3335 3336 bind(done); 3337 } 3338 3339 void MacroAssembler::g1_write_barrier_post(Register store_addr, 3340 Register new_val, 3341 Register thread, 3342 Register tmp, 3343 Register tmp2) { 3344 #ifdef _LP64 3345 assert(thread == r15_thread, "must be"); 3346 #endif // _LP64 3347 3348 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 3349 PtrQueue::byte_offset_of_index())); 3350 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 3351 PtrQueue::byte_offset_of_buf())); 3352 3353 BarrierSet* bs = Universe::heap()->barrier_set(); 3354 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 3355 Label done; 3356 Label runtime; 3357 3358 // Does store cross heap regions? 3359 3360 movptr(tmp, store_addr); 3361 xorptr(tmp, new_val); 3362 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 3363 jcc(Assembler::equal, done); 3364 3365 // crosses regions, storing NULL? 3366 3367 cmpptr(new_val, (int32_t) NULL_WORD); 3368 jcc(Assembler::equal, done); 3369 3370 // storing region crossing non-NULL, is card already dirty? 3371 3372 ExternalAddress cardtable((address) ct->byte_map_base); 3373 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 3374 #ifdef _LP64 3375 const Register card_addr = tmp; 3376 3377 movq(card_addr, store_addr); 3378 shrq(card_addr, CardTableModRefBS::card_shift); 3379 3380 lea(tmp2, cardtable); 3381 3382 // get the address of the card 3383 addq(card_addr, tmp2); 3384 #else 3385 const Register card_index = tmp; 3386 3387 movl(card_index, store_addr); 3388 shrl(card_index, CardTableModRefBS::card_shift); 3389 3390 Address index(noreg, card_index, Address::times_1); 3391 const Register card_addr = tmp; 3392 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 3393 #endif 3394 cmpb(Address(card_addr, 0), (int)G1SATBCardTableModRefBS::g1_young_card_val()); 3395 jcc(Assembler::equal, done); 3396 3397 membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); 3398 cmpb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val()); 3399 jcc(Assembler::equal, done); 3400 3401 3402 // storing a region crossing, non-NULL oop, card is clean. 3403 // dirty card and log. 3404 3405 movb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val()); 3406 3407 cmpl(queue_index, 0); 3408 jcc(Assembler::equal, runtime); 3409 subl(queue_index, wordSize); 3410 movptr(tmp2, buffer); 3411 #ifdef _LP64 3412 movslq(rscratch1, queue_index); 3413 addq(tmp2, rscratch1); 3414 movq(Address(tmp2, 0), card_addr); 3415 #else 3416 addl(tmp2, queue_index); 3417 movl(Address(tmp2, 0), card_index); 3418 #endif 3419 jmp(done); 3420 3421 bind(runtime); 3422 // save the live input values 3423 push(store_addr); 3424 push(new_val); 3425 #ifdef _LP64 3426 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 3427 #else 3428 push(thread); 3429 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 3430 pop(thread); 3431 #endif 3432 pop(new_val); 3433 pop(store_addr); 3434 3435 bind(done); 3436 } 3437 3438 #endif // INCLUDE_ALL_GCS 3439 ////////////////////////////////////////////////////////////////////////////////// 3440 3441 3442 void MacroAssembler::store_check(Register obj) { 3443 // Does a store check for the oop in register obj. The content of 3444 // register obj is destroyed afterwards. 3445 store_check_part_1(obj); 3446 store_check_part_2(obj); 3447 } 3448 3449 void MacroAssembler::store_check(Register obj, Address dst) { 3450 store_check(obj); 3451 } 3452 3453 3454 // split the store check operation so that other instructions can be scheduled inbetween 3455 void MacroAssembler::store_check_part_1(Register obj) { 3456 BarrierSet* bs = Universe::heap()->barrier_set(); 3457 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 3458 shrptr(obj, CardTableModRefBS::card_shift); 3459 } 3460 3461 void MacroAssembler::store_check_part_2(Register obj) { 3462 BarrierSet* bs = Universe::heap()->barrier_set(); 3463 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 3464 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 3465 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 3466 3467 // The calculation for byte_map_base is as follows: 3468 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 3469 // So this essentially converts an address to a displacement and 3470 // it will never need to be relocated. On 64bit however the value may be too 3471 // large for a 32bit displacement 3472 3473 intptr_t disp = (intptr_t) ct->byte_map_base; 3474 if (is_simm32(disp)) { 3475 Address cardtable(noreg, obj, Address::times_1, disp); 3476 movb(cardtable, 0); 3477 } else { 3478 // By doing it as an ExternalAddress disp could be converted to a rip-relative 3479 // displacement and done in a single instruction given favorable mapping and 3480 // a smarter version of as_Address. Worst case it is two instructions which 3481 // is no worse off then loading disp into a register and doing as a simple 3482 // Address() as above. 3483 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 3484 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 3485 // in some cases we'll get a single instruction version. 3486 3487 ExternalAddress cardtable((address)disp); 3488 Address index(noreg, obj, Address::times_1); 3489 movb(as_Address(ArrayAddress(cardtable, index)), 0); 3490 } 3491 } 3492 3493 void MacroAssembler::subptr(Register dst, int32_t imm32) { 3494 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 3495 } 3496 3497 // Force generation of a 4 byte immediate value even if it fits into 8bit 3498 void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { 3499 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); 3500 } 3501 3502 void MacroAssembler::subptr(Register dst, Register src) { 3503 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 3504 } 3505 3506 // C++ bool manipulation 3507 void MacroAssembler::testbool(Register dst) { 3508 if(sizeof(bool) == 1) 3509 testb(dst, 0xff); 3510 else if(sizeof(bool) == 2) { 3511 // testw implementation needed for two byte bools 3512 ShouldNotReachHere(); 3513 } else if(sizeof(bool) == 4) 3514 testl(dst, dst); 3515 else 3516 // unsupported 3517 ShouldNotReachHere(); 3518 } 3519 3520 void MacroAssembler::testptr(Register dst, Register src) { 3521 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 3522 } 3523 3524 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 3525 void MacroAssembler::tlab_allocate(Register obj, 3526 Register var_size_in_bytes, 3527 int con_size_in_bytes, 3528 Register t1, 3529 Register t2, 3530 Label& slow_case) { 3531 assert_different_registers(obj, t1, t2); 3532 assert_different_registers(obj, var_size_in_bytes, t1); 3533 Register end = t2; 3534 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 3535 3536 verify_tlab(); 3537 3538 NOT_LP64(get_thread(thread)); 3539 3540 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 3541 if (var_size_in_bytes == noreg) { 3542 lea(end, Address(obj, con_size_in_bytes)); 3543 } else { 3544 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 3545 } 3546 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 3547 jcc(Assembler::above, slow_case); 3548 3549 // update the tlab top pointer 3550 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 3551 3552 // recover var_size_in_bytes if necessary 3553 if (var_size_in_bytes == end) { 3554 subptr(var_size_in_bytes, obj); 3555 } 3556 verify_tlab(); 3557 } 3558 3559 // Preserves rbx, and rdx. 3560 Register MacroAssembler::tlab_refill(Label& retry, 3561 Label& try_eden, 3562 Label& slow_case) { 3563 Register top = rax; 3564 Register t1 = rcx; 3565 Register t2 = rsi; 3566 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 3567 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 3568 Label do_refill, discard_tlab; 3569 3570 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 3571 // No allocation in the shared eden. 3572 jmp(slow_case); 3573 } 3574 3575 NOT_LP64(get_thread(thread_reg)); 3576 3577 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 3578 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 3579 3580 // calculate amount of free space 3581 subptr(t1, top); 3582 shrptr(t1, LogHeapWordSize); 3583 3584 // Retain tlab and allocate object in shared space if 3585 // the amount free in the tlab is too large to discard. 3586 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 3587 jcc(Assembler::lessEqual, discard_tlab); 3588 3589 // Retain 3590 // %%% yuck as movptr... 3591 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 3592 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 3593 if (TLABStats) { 3594 // increment number of slow_allocations 3595 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 3596 } 3597 jmp(try_eden); 3598 3599 bind(discard_tlab); 3600 if (TLABStats) { 3601 // increment number of refills 3602 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 3603 // accumulate wastage -- t1 is amount free in tlab 3604 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 3605 } 3606 3607 // if tlab is currently allocated (top or end != null) then 3608 // fill [top, end + alignment_reserve) with array object 3609 testptr(top, top); 3610 jcc(Assembler::zero, do_refill); 3611 3612 // set up the mark word 3613 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 3614 // set the length to the remaining space 3615 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 3616 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 3617 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 3618 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 3619 // set klass to intArrayKlass 3620 // dubious reloc why not an oop reloc? 3621 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); 3622 // store klass last. concurrent gcs assumes klass length is valid if 3623 // klass field is not null. 3624 store_klass(top, t1); 3625 3626 movptr(t1, top); 3627 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 3628 incr_allocated_bytes(thread_reg, t1, 0); 3629 3630 // refill the tlab with an eden allocation 3631 bind(do_refill); 3632 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 3633 shlptr(t1, LogHeapWordSize); 3634 // allocate new tlab, address returned in top 3635 eden_allocate(top, t1, 0, t2, slow_case); 3636 3637 // Check that t1 was preserved in eden_allocate. 3638 #ifdef ASSERT 3639 if (UseTLAB) { 3640 Label ok; 3641 Register tsize = rsi; 3642 assert_different_registers(tsize, thread_reg, t1); 3643 push(tsize); 3644 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 3645 shlptr(tsize, LogHeapWordSize); 3646 cmpptr(t1, tsize); 3647 jcc(Assembler::equal, ok); 3648 STOP("assert(t1 != tlab size)"); 3649 should_not_reach_here(); 3650 3651 bind(ok); 3652 pop(tsize); 3653 } 3654 #endif 3655 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 3656 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 3657 addptr(top, t1); 3658 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 3659 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 3660 verify_tlab(); 3661 jmp(retry); 3662 3663 return thread_reg; // for use by caller 3664 } 3665 3666 void MacroAssembler::incr_allocated_bytes(Register thread, 3667 Register var_size_in_bytes, 3668 int con_size_in_bytes, 3669 Register t1) { 3670 if (!thread->is_valid()) { 3671 #ifdef _LP64 3672 thread = r15_thread; 3673 #else 3674 assert(t1->is_valid(), "need temp reg"); 3675 thread = t1; 3676 get_thread(thread); 3677 #endif 3678 } 3679 3680 #ifdef _LP64 3681 if (var_size_in_bytes->is_valid()) { 3682 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 3683 } else { 3684 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 3685 } 3686 #else 3687 if (var_size_in_bytes->is_valid()) { 3688 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 3689 } else { 3690 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 3691 } 3692 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); 3693 #endif 3694 } 3695 3696 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { 3697 pusha(); 3698 3699 // if we are coming from c1, xmm registers may be live 3700 int off = 0; 3701 if (UseSSE == 1) { 3702 subptr(rsp, sizeof(jdouble)*8); 3703 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); 3704 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); 3705 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); 3706 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); 3707 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); 3708 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); 3709 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); 3710 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); 3711 } else if (UseSSE >= 2) { 3712 #ifdef COMPILER2 3713 if (MaxVectorSize > 16) { 3714 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); 3715 // Save upper half of YMM registes 3716 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 3717 vextractf128h(Address(rsp, 0),xmm0); 3718 vextractf128h(Address(rsp, 16),xmm1); 3719 vextractf128h(Address(rsp, 32),xmm2); 3720 vextractf128h(Address(rsp, 48),xmm3); 3721 vextractf128h(Address(rsp, 64),xmm4); 3722 vextractf128h(Address(rsp, 80),xmm5); 3723 vextractf128h(Address(rsp, 96),xmm6); 3724 vextractf128h(Address(rsp,112),xmm7); 3725 #ifdef _LP64 3726 vextractf128h(Address(rsp,128),xmm8); 3727 vextractf128h(Address(rsp,144),xmm9); 3728 vextractf128h(Address(rsp,160),xmm10); 3729 vextractf128h(Address(rsp,176),xmm11); 3730 vextractf128h(Address(rsp,192),xmm12); 3731 vextractf128h(Address(rsp,208),xmm13); 3732 vextractf128h(Address(rsp,224),xmm14); 3733 vextractf128h(Address(rsp,240),xmm15); 3734 #endif 3735 } 3736 #endif 3737 // Save whole 128bit (16 bytes) XMM regiters 3738 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 3739 movdqu(Address(rsp,off++*16),xmm0); 3740 movdqu(Address(rsp,off++*16),xmm1); 3741 movdqu(Address(rsp,off++*16),xmm2); 3742 movdqu(Address(rsp,off++*16),xmm3); 3743 movdqu(Address(rsp,off++*16),xmm4); 3744 movdqu(Address(rsp,off++*16),xmm5); 3745 movdqu(Address(rsp,off++*16),xmm6); 3746 movdqu(Address(rsp,off++*16),xmm7); 3747 #ifdef _LP64 3748 movdqu(Address(rsp,off++*16),xmm8); 3749 movdqu(Address(rsp,off++*16),xmm9); 3750 movdqu(Address(rsp,off++*16),xmm10); 3751 movdqu(Address(rsp,off++*16),xmm11); 3752 movdqu(Address(rsp,off++*16),xmm12); 3753 movdqu(Address(rsp,off++*16),xmm13); 3754 movdqu(Address(rsp,off++*16),xmm14); 3755 movdqu(Address(rsp,off++*16),xmm15); 3756 #endif 3757 } 3758 3759 // Preserve registers across runtime call 3760 int incoming_argument_and_return_value_offset = -1; 3761 if (num_fpu_regs_in_use > 1) { 3762 // Must preserve all other FPU regs (could alternatively convert 3763 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash 3764 // FPU state, but can not trust C compiler) 3765 NEEDS_CLEANUP; 3766 // NOTE that in this case we also push the incoming argument(s) to 3767 // the stack and restore it later; we also use this stack slot to 3768 // hold the return value from dsin, dcos etc. 3769 for (int i = 0; i < num_fpu_regs_in_use; i++) { 3770 subptr(rsp, sizeof(jdouble)); 3771 fstp_d(Address(rsp, 0)); 3772 } 3773 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 3774 for (int i = nb_args-1; i >= 0; i--) { 3775 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); 3776 } 3777 } 3778 3779 subptr(rsp, nb_args*sizeof(jdouble)); 3780 for (int i = 0; i < nb_args; i++) { 3781 fstp_d(Address(rsp, i*sizeof(jdouble))); 3782 } 3783 3784 #ifdef _LP64 3785 if (nb_args > 0) { 3786 movdbl(xmm0, Address(rsp, 0)); 3787 } 3788 if (nb_args > 1) { 3789 movdbl(xmm1, Address(rsp, sizeof(jdouble))); 3790 } 3791 assert(nb_args <= 2, "unsupported number of args"); 3792 #endif // _LP64 3793 3794 // NOTE: we must not use call_VM_leaf here because that requires a 3795 // complete interpreter frame in debug mode -- same bug as 4387334 3796 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 3797 // do proper 64bit abi 3798 3799 NEEDS_CLEANUP; 3800 // Need to add stack banging before this runtime call if it needs to 3801 // be taken; however, there is no generic stack banging routine at 3802 // the MacroAssembler level 3803 3804 MacroAssembler::call_VM_leaf_base(runtime_entry, 0); 3805 3806 #ifdef _LP64 3807 movsd(Address(rsp, 0), xmm0); 3808 fld_d(Address(rsp, 0)); 3809 #endif // _LP64 3810 addptr(rsp, sizeof(jdouble) * nb_args); 3811 if (num_fpu_regs_in_use > 1) { 3812 // Must save return value to stack and then restore entire FPU 3813 // stack except incoming arguments 3814 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 3815 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { 3816 fld_d(Address(rsp, 0)); 3817 addptr(rsp, sizeof(jdouble)); 3818 } 3819 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); 3820 addptr(rsp, sizeof(jdouble) * nb_args); 3821 } 3822 3823 off = 0; 3824 if (UseSSE == 1) { 3825 movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); 3826 movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); 3827 movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); 3828 movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); 3829 movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); 3830 movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); 3831 movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); 3832 movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); 3833 addptr(rsp, sizeof(jdouble)*8); 3834 } else if (UseSSE >= 2) { 3835 // Restore whole 128bit (16 bytes) XMM regiters 3836 movdqu(xmm0, Address(rsp,off++*16)); 3837 movdqu(xmm1, Address(rsp,off++*16)); 3838 movdqu(xmm2, Address(rsp,off++*16)); 3839 movdqu(xmm3, Address(rsp,off++*16)); 3840 movdqu(xmm4, Address(rsp,off++*16)); 3841 movdqu(xmm5, Address(rsp,off++*16)); 3842 movdqu(xmm6, Address(rsp,off++*16)); 3843 movdqu(xmm7, Address(rsp,off++*16)); 3844 #ifdef _LP64 3845 movdqu(xmm8, Address(rsp,off++*16)); 3846 movdqu(xmm9, Address(rsp,off++*16)); 3847 movdqu(xmm10, Address(rsp,off++*16)); 3848 movdqu(xmm11, Address(rsp,off++*16)); 3849 movdqu(xmm12, Address(rsp,off++*16)); 3850 movdqu(xmm13, Address(rsp,off++*16)); 3851 movdqu(xmm14, Address(rsp,off++*16)); 3852 movdqu(xmm15, Address(rsp,off++*16)); 3853 #endif 3854 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 3855 #ifdef COMPILER2 3856 if (MaxVectorSize > 16) { 3857 // Restore upper half of YMM registes. 3858 vinsertf128h(xmm0, Address(rsp, 0)); 3859 vinsertf128h(xmm1, Address(rsp, 16)); 3860 vinsertf128h(xmm2, Address(rsp, 32)); 3861 vinsertf128h(xmm3, Address(rsp, 48)); 3862 vinsertf128h(xmm4, Address(rsp, 64)); 3863 vinsertf128h(xmm5, Address(rsp, 80)); 3864 vinsertf128h(xmm6, Address(rsp, 96)); 3865 vinsertf128h(xmm7, Address(rsp,112)); 3866 #ifdef _LP64 3867 vinsertf128h(xmm8, Address(rsp,128)); 3868 vinsertf128h(xmm9, Address(rsp,144)); 3869 vinsertf128h(xmm10, Address(rsp,160)); 3870 vinsertf128h(xmm11, Address(rsp,176)); 3871 vinsertf128h(xmm12, Address(rsp,192)); 3872 vinsertf128h(xmm13, Address(rsp,208)); 3873 vinsertf128h(xmm14, Address(rsp,224)); 3874 vinsertf128h(xmm15, Address(rsp,240)); 3875 #endif 3876 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 3877 } 3878 #endif 3879 } 3880 popa(); 3881 } 3882 3883 static const double pi_4 = 0.7853981633974483; 3884 3885 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 3886 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 3887 // was attempted in this code; unfortunately it appears that the 3888 // switch to 80-bit precision and back causes this to be 3889 // unprofitable compared with simply performing a runtime call if 3890 // the argument is out of the (-pi/4, pi/4) range. 3891 3892 Register tmp = noreg; 3893 if (!VM_Version::supports_cmov()) { 3894 // fcmp needs a temporary so preserve rbx, 3895 tmp = rbx; 3896 push(tmp); 3897 } 3898 3899 Label slow_case, done; 3900 3901 ExternalAddress pi4_adr = (address)&pi_4; 3902 if (reachable(pi4_adr)) { 3903 // x ?<= pi/4 3904 fld_d(pi4_adr); 3905 fld_s(1); // Stack: X PI/4 X 3906 fabs(); // Stack: |X| PI/4 X 3907 fcmp(tmp); 3908 jcc(Assembler::above, slow_case); 3909 3910 // fastest case: -pi/4 <= x <= pi/4 3911 switch(trig) { 3912 case 's': 3913 fsin(); 3914 break; 3915 case 'c': 3916 fcos(); 3917 break; 3918 case 't': 3919 ftan(); 3920 break; 3921 default: 3922 assert(false, "bad intrinsic"); 3923 break; 3924 } 3925 jmp(done); 3926 } 3927 3928 // slow case: runtime call 3929 bind(slow_case); 3930 3931 switch(trig) { 3932 case 's': 3933 { 3934 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); 3935 } 3936 break; 3937 case 'c': 3938 { 3939 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); 3940 } 3941 break; 3942 case 't': 3943 { 3944 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); 3945 } 3946 break; 3947 default: 3948 assert(false, "bad intrinsic"); 3949 break; 3950 } 3951 3952 // Come here with result in F-TOS 3953 bind(done); 3954 3955 if (tmp != noreg) { 3956 pop(tmp); 3957 } 3958 } 3959 3960 3961 // Look up the method for a megamorphic invokeinterface call. 3962 // The target method is determined by <intf_klass, itable_index>. 3963 // The receiver klass is in recv_klass. 3964 // On success, the result will be in method_result, and execution falls through. 3965 // On failure, execution transfers to the given label. 3966 void MacroAssembler::lookup_interface_method(Register recv_klass, 3967 Register intf_klass, 3968 RegisterOrConstant itable_index, 3969 Register method_result, 3970 Register scan_temp, 3971 Label& L_no_such_interface) { 3972 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 3973 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 3974 "caller must use same register for non-constant itable index as for method"); 3975 3976 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 3977 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; 3978 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 3979 int scan_step = itableOffsetEntry::size() * wordSize; 3980 int vte_size = vtableEntry::size() * wordSize; 3981 Address::ScaleFactor times_vte_scale = Address::times_ptr; 3982 assert(vte_size == wordSize, "else adjust times_vte_scale"); 3983 3984 movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); 3985 3986 // %%% Could store the aligned, prescaled offset in the klassoop. 3987 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 3988 if (HeapWordsPerLong > 1) { 3989 // Round up to align_object_offset boundary 3990 // see code for InstanceKlass::start_of_itable! 3991 round_to(scan_temp, BytesPerLong); 3992 } 3993 3994 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 3995 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 3996 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 3997 3998 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 3999 // if (scan->interface() == intf) { 4000 // result = (klass + scan->offset() + itable_index); 4001 // } 4002 // } 4003 Label search, found_method; 4004 4005 for (int peel = 1; peel >= 0; peel--) { 4006 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 4007 cmpptr(intf_klass, method_result); 4008 4009 if (peel) { 4010 jccb(Assembler::equal, found_method); 4011 } else { 4012 jccb(Assembler::notEqual, search); 4013 // (invert the test to fall through to found_method...) 4014 } 4015 4016 if (!peel) break; 4017 4018 bind(search); 4019 4020 // Check that the previous entry is non-null. A null entry means that 4021 // the receiver class doesn't implement the interface, and wasn't the 4022 // same as when the caller was compiled. 4023 testptr(method_result, method_result); 4024 jcc(Assembler::zero, L_no_such_interface); 4025 addptr(scan_temp, scan_step); 4026 } 4027 4028 bind(found_method); 4029 4030 // Got a hit. 4031 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 4032 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 4033 } 4034 4035 4036 // virtual method calling 4037 void MacroAssembler::lookup_virtual_method(Register recv_klass, 4038 RegisterOrConstant vtable_index, 4039 Register method_result) { 4040 const int base = InstanceKlass::vtable_start_offset() * wordSize; 4041 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); 4042 Address vtable_entry_addr(recv_klass, 4043 vtable_index, Address::times_ptr, 4044 base + vtableEntry::method_offset_in_bytes()); 4045 movptr(method_result, vtable_entry_addr); 4046 } 4047 4048 4049 void MacroAssembler::check_klass_subtype(Register sub_klass, 4050 Register super_klass, 4051 Register temp_reg, 4052 Label& L_success) { 4053 Label L_failure; 4054 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 4055 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 4056 bind(L_failure); 4057 } 4058 4059 4060 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 4061 Register super_klass, 4062 Register temp_reg, 4063 Label* L_success, 4064 Label* L_failure, 4065 Label* L_slow_path, 4066 RegisterOrConstant super_check_offset) { 4067 assert_different_registers(sub_klass, super_klass, temp_reg); 4068 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 4069 if (super_check_offset.is_register()) { 4070 assert_different_registers(sub_klass, super_klass, 4071 super_check_offset.as_register()); 4072 } else if (must_load_sco) { 4073 assert(temp_reg != noreg, "supply either a temp or a register offset"); 4074 } 4075 4076 Label L_fallthrough; 4077 int label_nulls = 0; 4078 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 4079 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 4080 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 4081 assert(label_nulls <= 1, "at most one NULL in the batch"); 4082 4083 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 4084 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 4085 Address super_check_offset_addr(super_klass, sco_offset); 4086 4087 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 4088 // range of a jccb. If this routine grows larger, reconsider at 4089 // least some of these. 4090 #define local_jcc(assembler_cond, label) \ 4091 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 4092 else jcc( assembler_cond, label) /*omit semi*/ 4093 4094 // Hacked jmp, which may only be used just before L_fallthrough. 4095 #define final_jmp(label) \ 4096 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 4097 else jmp(label) /*omit semi*/ 4098 4099 // If the pointers are equal, we are done (e.g., String[] elements). 4100 // This self-check enables sharing of secondary supertype arrays among 4101 // non-primary types such as array-of-interface. Otherwise, each such 4102 // type would need its own customized SSA. 4103 // We move this check to the front of the fast path because many 4104 // type checks are in fact trivially successful in this manner, 4105 // so we get a nicely predicted branch right at the start of the check. 4106 cmpptr(sub_klass, super_klass); 4107 local_jcc(Assembler::equal, *L_success); 4108 4109 // Check the supertype display: 4110 if (must_load_sco) { 4111 // Positive movl does right thing on LP64. 4112 movl(temp_reg, super_check_offset_addr); 4113 super_check_offset = RegisterOrConstant(temp_reg); 4114 } 4115 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 4116 cmpptr(super_klass, super_check_addr); // load displayed supertype 4117 4118 // This check has worked decisively for primary supers. 4119 // Secondary supers are sought in the super_cache ('super_cache_addr'). 4120 // (Secondary supers are interfaces and very deeply nested subtypes.) 4121 // This works in the same check above because of a tricky aliasing 4122 // between the super_cache and the primary super display elements. 4123 // (The 'super_check_addr' can address either, as the case requires.) 4124 // Note that the cache is updated below if it does not help us find 4125 // what we need immediately. 4126 // So if it was a primary super, we can just fail immediately. 4127 // Otherwise, it's the slow path for us (no success at this point). 4128 4129 if (super_check_offset.is_register()) { 4130 local_jcc(Assembler::equal, *L_success); 4131 cmpl(super_check_offset.as_register(), sc_offset); 4132 if (L_failure == &L_fallthrough) { 4133 local_jcc(Assembler::equal, *L_slow_path); 4134 } else { 4135 local_jcc(Assembler::notEqual, *L_failure); 4136 final_jmp(*L_slow_path); 4137 } 4138 } else if (super_check_offset.as_constant() == sc_offset) { 4139 // Need a slow path; fast failure is impossible. 4140 if (L_slow_path == &L_fallthrough) { 4141 local_jcc(Assembler::equal, *L_success); 4142 } else { 4143 local_jcc(Assembler::notEqual, *L_slow_path); 4144 final_jmp(*L_success); 4145 } 4146 } else { 4147 // No slow path; it's a fast decision. 4148 if (L_failure == &L_fallthrough) { 4149 local_jcc(Assembler::equal, *L_success); 4150 } else { 4151 local_jcc(Assembler::notEqual, *L_failure); 4152 final_jmp(*L_success); 4153 } 4154 } 4155 4156 bind(L_fallthrough); 4157 4158 #undef local_jcc 4159 #undef final_jmp 4160 } 4161 4162 4163 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 4164 Register super_klass, 4165 Register temp_reg, 4166 Register temp2_reg, 4167 Label* L_success, 4168 Label* L_failure, 4169 bool set_cond_codes) { 4170 assert_different_registers(sub_klass, super_klass, temp_reg); 4171 if (temp2_reg != noreg) 4172 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 4173 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 4174 4175 Label L_fallthrough; 4176 int label_nulls = 0; 4177 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 4178 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 4179 assert(label_nulls <= 1, "at most one NULL in the batch"); 4180 4181 // a couple of useful fields in sub_klass: 4182 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 4183 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 4184 Address secondary_supers_addr(sub_klass, ss_offset); 4185 Address super_cache_addr( sub_klass, sc_offset); 4186 4187 // Do a linear scan of the secondary super-klass chain. 4188 // This code is rarely used, so simplicity is a virtue here. 4189 // The repne_scan instruction uses fixed registers, which we must spill. 4190 // Don't worry too much about pre-existing connections with the input regs. 4191 4192 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 4193 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 4194 4195 // Get super_klass value into rax (even if it was in rdi or rcx). 4196 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 4197 if (super_klass != rax || UseCompressedOops) { 4198 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 4199 mov(rax, super_klass); 4200 } 4201 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 4202 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 4203 4204 #ifndef PRODUCT 4205 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 4206 ExternalAddress pst_counter_addr((address) pst_counter); 4207 NOT_LP64( incrementl(pst_counter_addr) ); 4208 LP64_ONLY( lea(rcx, pst_counter_addr) ); 4209 LP64_ONLY( incrementl(Address(rcx, 0)) ); 4210 #endif //PRODUCT 4211 4212 // We will consult the secondary-super array. 4213 movptr(rdi, secondary_supers_addr); 4214 // Load the array length. (Positive movl does right thing on LP64.) 4215 movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes())); 4216 // Skip to start of data. 4217 addptr(rdi, Array<Klass*>::base_offset_in_bytes()); 4218 4219 // Scan RCX words at [RDI] for an occurrence of RAX. 4220 // Set NZ/Z based on last compare. 4221 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 4222 // not change flags (only scas instruction which is repeated sets flags). 4223 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 4224 4225 testptr(rax,rax); // Set Z = 0 4226 repne_scan(); 4227 4228 // Unspill the temp. registers: 4229 if (pushed_rdi) pop(rdi); 4230 if (pushed_rcx) pop(rcx); 4231 if (pushed_rax) pop(rax); 4232 4233 if (set_cond_codes) { 4234 // Special hack for the AD files: rdi is guaranteed non-zero. 4235 assert(!pushed_rdi, "rdi must be left non-NULL"); 4236 // Also, the condition codes are properly set Z/NZ on succeed/failure. 4237 } 4238 4239 if (L_failure == &L_fallthrough) 4240 jccb(Assembler::notEqual, *L_failure); 4241 else jcc(Assembler::notEqual, *L_failure); 4242 4243 // Success. Cache the super we found and proceed in triumph. 4244 movptr(super_cache_addr, super_klass); 4245 4246 if (L_success != &L_fallthrough) { 4247 jmp(*L_success); 4248 } 4249 4250 #undef IS_A_TEMP 4251 4252 bind(L_fallthrough); 4253 } 4254 4255 4256 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 4257 if (VM_Version::supports_cmov()) { 4258 cmovl(cc, dst, src); 4259 } else { 4260 Label L; 4261 jccb(negate_condition(cc), L); 4262 movl(dst, src); 4263 bind(L); 4264 } 4265 } 4266 4267 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { 4268 if (VM_Version::supports_cmov()) { 4269 cmovl(cc, dst, src); 4270 } else { 4271 Label L; 4272 jccb(negate_condition(cc), L); 4273 movl(dst, src); 4274 bind(L); 4275 } 4276 } 4277 4278 void MacroAssembler::verify_oop(Register reg, const char* s) { 4279 if (!VerifyOops) return; 4280 4281 // Pass register number to verify_oop_subroutine 4282 const char* b = NULL; 4283 { 4284 ResourceMark rm; 4285 stringStream ss; 4286 ss.print("verify_oop: %s: %s", reg->name(), s); 4287 b = code_string(ss.as_string()); 4288 } 4289 BLOCK_COMMENT("verify_oop {"); 4290 #ifdef _LP64 4291 push(rscratch1); // save r10, trashed by movptr() 4292 #endif 4293 push(rax); // save rax, 4294 push(reg); // pass register argument 4295 ExternalAddress buffer((address) b); 4296 // avoid using pushptr, as it modifies scratch registers 4297 // and our contract is not to modify anything 4298 movptr(rax, buffer.addr()); 4299 push(rax); 4300 // call indirectly to solve generation ordering problem 4301 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 4302 call(rax); 4303 // Caller pops the arguments (oop, message) and restores rax, r10 4304 BLOCK_COMMENT("} verify_oop"); 4305 } 4306 4307 4308 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 4309 Register tmp, 4310 int offset) { 4311 intptr_t value = *delayed_value_addr; 4312 if (value != 0) 4313 return RegisterOrConstant(value + offset); 4314 4315 // load indirectly to solve generation ordering problem 4316 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 4317 4318 #ifdef ASSERT 4319 { Label L; 4320 testptr(tmp, tmp); 4321 if (WizardMode) { 4322 const char* buf = NULL; 4323 { 4324 ResourceMark rm; 4325 stringStream ss; 4326 ss.print("DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 4327 buf = code_string(ss.as_string()); 4328 } 4329 jcc(Assembler::notZero, L); 4330 STOP(buf); 4331 } else { 4332 jccb(Assembler::notZero, L); 4333 hlt(); 4334 } 4335 bind(L); 4336 } 4337 #endif 4338 4339 if (offset != 0) 4340 addptr(tmp, offset); 4341 4342 return RegisterOrConstant(tmp); 4343 } 4344 4345 4346 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 4347 int extra_slot_offset) { 4348 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 4349 int stackElementSize = Interpreter::stackElementSize; 4350 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 4351 #ifdef ASSERT 4352 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 4353 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 4354 #endif 4355 Register scale_reg = noreg; 4356 Address::ScaleFactor scale_factor = Address::no_scale; 4357 if (arg_slot.is_constant()) { 4358 offset += arg_slot.as_constant() * stackElementSize; 4359 } else { 4360 scale_reg = arg_slot.as_register(); 4361 scale_factor = Address::times(stackElementSize); 4362 } 4363 offset += wordSize; // return PC is on stack 4364 return Address(rsp, scale_reg, scale_factor, offset); 4365 } 4366 4367 4368 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 4369 if (!VerifyOops) return; 4370 4371 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 4372 // Pass register number to verify_oop_subroutine 4373 const char* b = NULL; 4374 { 4375 ResourceMark rm; 4376 stringStream ss; 4377 ss.print("verify_oop_addr: %s", s); 4378 b = code_string(ss.as_string()); 4379 } 4380 #ifdef _LP64 4381 push(rscratch1); // save r10, trashed by movptr() 4382 #endif 4383 push(rax); // save rax, 4384 // addr may contain rsp so we will have to adjust it based on the push 4385 // we just did (and on 64 bit we do two pushes) 4386 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 4387 // stores rax into addr which is backwards of what was intended. 4388 if (addr.uses(rsp)) { 4389 lea(rax, addr); 4390 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); 4391 } else { 4392 pushptr(addr); 4393 } 4394 4395 ExternalAddress buffer((address) b); 4396 // pass msg argument 4397 // avoid using pushptr, as it modifies scratch registers 4398 // and our contract is not to modify anything 4399 movptr(rax, buffer.addr()); 4400 push(rax); 4401 4402 // call indirectly to solve generation ordering problem 4403 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 4404 call(rax); 4405 // Caller pops the arguments (addr, message) and restores rax, r10. 4406 } 4407 4408 void MacroAssembler::verify_tlab() { 4409 #ifdef ASSERT 4410 if (UseTLAB && VerifyOops) { 4411 Label next, ok; 4412 Register t1 = rsi; 4413 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 4414 4415 push(t1); 4416 NOT_LP64(push(thread_reg)); 4417 NOT_LP64(get_thread(thread_reg)); 4418 4419 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 4420 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 4421 jcc(Assembler::aboveEqual, next); 4422 STOP("assert(top >= start)"); 4423 should_not_reach_here(); 4424 4425 bind(next); 4426 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 4427 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 4428 jcc(Assembler::aboveEqual, ok); 4429 STOP("assert(top <= end)"); 4430 should_not_reach_here(); 4431 4432 bind(ok); 4433 NOT_LP64(pop(thread_reg)); 4434 pop(t1); 4435 } 4436 #endif 4437 } 4438 4439 class ControlWord { 4440 public: 4441 int32_t _value; 4442 4443 int rounding_control() const { return (_value >> 10) & 3 ; } 4444 int precision_control() const { return (_value >> 8) & 3 ; } 4445 bool precision() const { return ((_value >> 5) & 1) != 0; } 4446 bool underflow() const { return ((_value >> 4) & 1) != 0; } 4447 bool overflow() const { return ((_value >> 3) & 1) != 0; } 4448 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 4449 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 4450 bool invalid() const { return ((_value >> 0) & 1) != 0; } 4451 4452 void print() const { 4453 // rounding control 4454 const char* rc; 4455 switch (rounding_control()) { 4456 case 0: rc = "round near"; break; 4457 case 1: rc = "round down"; break; 4458 case 2: rc = "round up "; break; 4459 case 3: rc = "chop "; break; 4460 }; 4461 // precision control 4462 const char* pc; 4463 switch (precision_control()) { 4464 case 0: pc = "24 bits "; break; 4465 case 1: pc = "reserved"; break; 4466 case 2: pc = "53 bits "; break; 4467 case 3: pc = "64 bits "; break; 4468 }; 4469 // flags 4470 char f[9]; 4471 f[0] = ' '; 4472 f[1] = ' '; 4473 f[2] = (precision ()) ? 'P' : 'p'; 4474 f[3] = (underflow ()) ? 'U' : 'u'; 4475 f[4] = (overflow ()) ? 'O' : 'o'; 4476 f[5] = (zero_divide ()) ? 'Z' : 'z'; 4477 f[6] = (denormalized()) ? 'D' : 'd'; 4478 f[7] = (invalid ()) ? 'I' : 'i'; 4479 f[8] = '\x0'; 4480 // output 4481 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 4482 } 4483 4484 }; 4485 4486 class StatusWord { 4487 public: 4488 int32_t _value; 4489 4490 bool busy() const { return ((_value >> 15) & 1) != 0; } 4491 bool C3() const { return ((_value >> 14) & 1) != 0; } 4492 bool C2() const { return ((_value >> 10) & 1) != 0; } 4493 bool C1() const { return ((_value >> 9) & 1) != 0; } 4494 bool C0() const { return ((_value >> 8) & 1) != 0; } 4495 int top() const { return (_value >> 11) & 7 ; } 4496 bool error_status() const { return ((_value >> 7) & 1) != 0; } 4497 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 4498 bool precision() const { return ((_value >> 5) & 1) != 0; } 4499 bool underflow() const { return ((_value >> 4) & 1) != 0; } 4500 bool overflow() const { return ((_value >> 3) & 1) != 0; } 4501 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 4502 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 4503 bool invalid() const { return ((_value >> 0) & 1) != 0; } 4504 4505 void print() const { 4506 // condition codes 4507 char c[5]; 4508 c[0] = (C3()) ? '3' : '-'; 4509 c[1] = (C2()) ? '2' : '-'; 4510 c[2] = (C1()) ? '1' : '-'; 4511 c[3] = (C0()) ? '0' : '-'; 4512 c[4] = '\x0'; 4513 // flags 4514 char f[9]; 4515 f[0] = (error_status()) ? 'E' : '-'; 4516 f[1] = (stack_fault ()) ? 'S' : '-'; 4517 f[2] = (precision ()) ? 'P' : '-'; 4518 f[3] = (underflow ()) ? 'U' : '-'; 4519 f[4] = (overflow ()) ? 'O' : '-'; 4520 f[5] = (zero_divide ()) ? 'Z' : '-'; 4521 f[6] = (denormalized()) ? 'D' : '-'; 4522 f[7] = (invalid ()) ? 'I' : '-'; 4523 f[8] = '\x0'; 4524 // output 4525 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 4526 } 4527 4528 }; 4529 4530 class TagWord { 4531 public: 4532 int32_t _value; 4533 4534 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 4535 4536 void print() const { 4537 printf("%04x", _value & 0xFFFF); 4538 } 4539 4540 }; 4541 4542 class FPU_Register { 4543 public: 4544 int32_t _m0; 4545 int32_t _m1; 4546 int16_t _ex; 4547 4548 bool is_indefinite() const { 4549 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 4550 } 4551 4552 void print() const { 4553 char sign = (_ex < 0) ? '-' : '+'; 4554 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 4555 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 4556 }; 4557 4558 }; 4559 4560 class FPU_State { 4561 public: 4562 enum { 4563 register_size = 10, 4564 number_of_registers = 8, 4565 register_mask = 7 4566 }; 4567 4568 ControlWord _control_word; 4569 StatusWord _status_word; 4570 TagWord _tag_word; 4571 int32_t _error_offset; 4572 int32_t _error_selector; 4573 int32_t _data_offset; 4574 int32_t _data_selector; 4575 int8_t _register[register_size * number_of_registers]; 4576 4577 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 4578 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 4579 4580 const char* tag_as_string(int tag) const { 4581 switch (tag) { 4582 case 0: return "valid"; 4583 case 1: return "zero"; 4584 case 2: return "special"; 4585 case 3: return "empty"; 4586 } 4587 ShouldNotReachHere(); 4588 return NULL; 4589 } 4590 4591 void print() const { 4592 // print computation registers 4593 { int t = _status_word.top(); 4594 for (int i = 0; i < number_of_registers; i++) { 4595 int j = (i - t) & register_mask; 4596 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 4597 st(j)->print(); 4598 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 4599 } 4600 } 4601 printf("\n"); 4602 // print control registers 4603 printf("ctrl = "); _control_word.print(); printf("\n"); 4604 printf("stat = "); _status_word .print(); printf("\n"); 4605 printf("tags = "); _tag_word .print(); printf("\n"); 4606 } 4607 4608 }; 4609 4610 class Flag_Register { 4611 public: 4612 int32_t _value; 4613 4614 bool overflow() const { return ((_value >> 11) & 1) != 0; } 4615 bool direction() const { return ((_value >> 10) & 1) != 0; } 4616 bool sign() const { return ((_value >> 7) & 1) != 0; } 4617 bool zero() const { return ((_value >> 6) & 1) != 0; } 4618 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 4619 bool parity() const { return ((_value >> 2) & 1) != 0; } 4620 bool carry() const { return ((_value >> 0) & 1) != 0; } 4621 4622 void print() const { 4623 // flags 4624 char f[8]; 4625 f[0] = (overflow ()) ? 'O' : '-'; 4626 f[1] = (direction ()) ? 'D' : '-'; 4627 f[2] = (sign ()) ? 'S' : '-'; 4628 f[3] = (zero ()) ? 'Z' : '-'; 4629 f[4] = (auxiliary_carry()) ? 'A' : '-'; 4630 f[5] = (parity ()) ? 'P' : '-'; 4631 f[6] = (carry ()) ? 'C' : '-'; 4632 f[7] = '\x0'; 4633 // output 4634 printf("%08x flags = %s", _value, f); 4635 } 4636 4637 }; 4638 4639 class IU_Register { 4640 public: 4641 int32_t _value; 4642 4643 void print() const { 4644 printf("%08x %11d", _value, _value); 4645 } 4646 4647 }; 4648 4649 class IU_State { 4650 public: 4651 Flag_Register _eflags; 4652 IU_Register _rdi; 4653 IU_Register _rsi; 4654 IU_Register _rbp; 4655 IU_Register _rsp; 4656 IU_Register _rbx; 4657 IU_Register _rdx; 4658 IU_Register _rcx; 4659 IU_Register _rax; 4660 4661 void print() const { 4662 // computation registers 4663 printf("rax, = "); _rax.print(); printf("\n"); 4664 printf("rbx, = "); _rbx.print(); printf("\n"); 4665 printf("rcx = "); _rcx.print(); printf("\n"); 4666 printf("rdx = "); _rdx.print(); printf("\n"); 4667 printf("rdi = "); _rdi.print(); printf("\n"); 4668 printf("rsi = "); _rsi.print(); printf("\n"); 4669 printf("rbp, = "); _rbp.print(); printf("\n"); 4670 printf("rsp = "); _rsp.print(); printf("\n"); 4671 printf("\n"); 4672 // control registers 4673 printf("flgs = "); _eflags.print(); printf("\n"); 4674 } 4675 }; 4676 4677 4678 class CPU_State { 4679 public: 4680 FPU_State _fpu_state; 4681 IU_State _iu_state; 4682 4683 void print() const { 4684 printf("--------------------------------------------------\n"); 4685 _iu_state .print(); 4686 printf("\n"); 4687 _fpu_state.print(); 4688 printf("--------------------------------------------------\n"); 4689 } 4690 4691 }; 4692 4693 4694 static void _print_CPU_state(CPU_State* state) { 4695 state->print(); 4696 }; 4697 4698 4699 void MacroAssembler::print_CPU_state() { 4700 push_CPU_state(); 4701 push(rsp); // pass CPU state 4702 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 4703 addptr(rsp, wordSize); // discard argument 4704 pop_CPU_state(); 4705 } 4706 4707 4708 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 4709 static int counter = 0; 4710 FPU_State* fs = &state->_fpu_state; 4711 counter++; 4712 // For leaf calls, only verify that the top few elements remain empty. 4713 // We only need 1 empty at the top for C2 code. 4714 if( stack_depth < 0 ) { 4715 if( fs->tag_for_st(7) != 3 ) { 4716 printf("FPR7 not empty\n"); 4717 state->print(); 4718 assert(false, "error"); 4719 return false; 4720 } 4721 return true; // All other stack states do not matter 4722 } 4723 4724 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 4725 "bad FPU control word"); 4726 4727 // compute stack depth 4728 int i = 0; 4729 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 4730 int d = i; 4731 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 4732 // verify findings 4733 if (i != FPU_State::number_of_registers) { 4734 // stack not contiguous 4735 printf("%s: stack not contiguous at ST%d\n", s, i); 4736 state->print(); 4737 assert(false, "error"); 4738 return false; 4739 } 4740 // check if computed stack depth corresponds to expected stack depth 4741 if (stack_depth < 0) { 4742 // expected stack depth is -stack_depth or less 4743 if (d > -stack_depth) { 4744 // too many elements on the stack 4745 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 4746 state->print(); 4747 assert(false, "error"); 4748 return false; 4749 } 4750 } else { 4751 // expected stack depth is stack_depth 4752 if (d != stack_depth) { 4753 // wrong stack depth 4754 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 4755 state->print(); 4756 assert(false, "error"); 4757 return false; 4758 } 4759 } 4760 // everything is cool 4761 return true; 4762 } 4763 4764 4765 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 4766 if (!VerifyFPU) return; 4767 push_CPU_state(); 4768 push(rsp); // pass CPU state 4769 ExternalAddress msg((address) s); 4770 // pass message string s 4771 pushptr(msg.addr()); 4772 push(stack_depth); // pass stack depth 4773 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 4774 addptr(rsp, 3 * wordSize); // discard arguments 4775 // check for error 4776 { Label L; 4777 testl(rax, rax); 4778 jcc(Assembler::notZero, L); 4779 int3(); // break if error condition 4780 bind(L); 4781 } 4782 pop_CPU_state(); 4783 } 4784 4785 void MacroAssembler::restore_cpu_control_state_after_jni() { 4786 // Either restore the MXCSR register after returning from the JNI Call 4787 // or verify that it wasn't changed (with -Xcheck:jni flag). 4788 if (VM_Version::supports_sse()) { 4789 if (RestoreMXCSROnJNICalls) { 4790 ldmxcsr(ExternalAddress(StubRoutines::addr_mxcsr_std())); 4791 } else if (CheckJNICalls) { 4792 call(RuntimeAddress(StubRoutines::x86::verify_mxcsr_entry())); 4793 } 4794 } 4795 if (VM_Version::supports_avx()) { 4796 // Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty. 4797 vzeroupper(); 4798 } 4799 4800 #ifndef _LP64 4801 // Either restore the x87 floating pointer control word after returning 4802 // from the JNI call or verify that it wasn't changed. 4803 if (CheckJNICalls) { 4804 call(RuntimeAddress(StubRoutines::x86::verify_fpu_cntrl_wrd_entry())); 4805 } 4806 #endif // _LP64 4807 } 4808 4809 4810 void MacroAssembler::load_klass(Register dst, Register src) { 4811 #ifdef _LP64 4812 if (UseCompressedClassPointers) { 4813 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 4814 decode_klass_not_null(dst); 4815 } else 4816 #endif 4817 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 4818 } 4819 4820 void MacroAssembler::load_prototype_header(Register dst, Register src) { 4821 load_klass(dst, src); 4822 movptr(dst, Address(dst, Klass::prototype_header_offset())); 4823 } 4824 4825 void MacroAssembler::store_klass(Register dst, Register src) { 4826 #ifdef _LP64 4827 if (UseCompressedClassPointers) { 4828 encode_klass_not_null(src); 4829 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 4830 } else 4831 #endif 4832 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 4833 } 4834 4835 void MacroAssembler::load_heap_oop(Register dst, Address src) { 4836 #ifdef _LP64 4837 // FIXME: Must change all places where we try to load the klass. 4838 if (UseCompressedOops) { 4839 movl(dst, src); 4840 decode_heap_oop(dst); 4841 } else 4842 #endif 4843 movptr(dst, src); 4844 } 4845 4846 // Doesn't do verfication, generates fixed size code 4847 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { 4848 #ifdef _LP64 4849 if (UseCompressedOops) { 4850 movl(dst, src); 4851 decode_heap_oop_not_null(dst); 4852 } else 4853 #endif 4854 movptr(dst, src); 4855 } 4856 4857 void MacroAssembler::store_heap_oop(Address dst, Register src) { 4858 #ifdef _LP64 4859 if (UseCompressedOops) { 4860 assert(!dst.uses(src), "not enough registers"); 4861 encode_heap_oop(src); 4862 movl(dst, src); 4863 } else 4864 #endif 4865 movptr(dst, src); 4866 } 4867 4868 void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) { 4869 assert_different_registers(src1, tmp); 4870 #ifdef _LP64 4871 if (UseCompressedOops) { 4872 bool did_push = false; 4873 if (tmp == noreg) { 4874 tmp = rax; 4875 push(tmp); 4876 did_push = true; 4877 assert(!src2.uses(rsp), "can't push"); 4878 } 4879 load_heap_oop(tmp, src2); 4880 cmpptr(src1, tmp); 4881 if (did_push) pop(tmp); 4882 } else 4883 #endif 4884 cmpptr(src1, src2); 4885 } 4886 4887 // Used for storing NULLs. 4888 void MacroAssembler::store_heap_oop_null(Address dst) { 4889 #ifdef _LP64 4890 if (UseCompressedOops) { 4891 movl(dst, (int32_t)NULL_WORD); 4892 } else { 4893 movslq(dst, (int32_t)NULL_WORD); 4894 } 4895 #else 4896 movl(dst, (int32_t)NULL_WORD); 4897 #endif 4898 } 4899 4900 #ifdef _LP64 4901 void MacroAssembler::store_klass_gap(Register dst, Register src) { 4902 if (UseCompressedClassPointers) { 4903 // Store to klass gap in destination 4904 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 4905 } 4906 } 4907 4908 #ifdef ASSERT 4909 void MacroAssembler::verify_heapbase(const char* msg) { 4910 assert (UseCompressedOops, "should be compressed"); 4911 assert (Universe::heap() != NULL, "java heap should be initialized"); 4912 if (CheckCompressedOops) { 4913 Label ok; 4914 push(rscratch1); // cmpptr trashes rscratch1 4915 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); 4916 jcc(Assembler::equal, ok); 4917 STOP(msg); 4918 bind(ok); 4919 pop(rscratch1); 4920 } 4921 } 4922 #endif 4923 4924 // Algorithm must match oop.inline.hpp encode_heap_oop. 4925 void MacroAssembler::encode_heap_oop(Register r) { 4926 #ifdef ASSERT 4927 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 4928 #endif 4929 verify_oop(r, "broken oop in encode_heap_oop"); 4930 if (Universe::narrow_oop_base() == NULL) { 4931 if (Universe::narrow_oop_shift() != 0) { 4932 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4933 shrq(r, LogMinObjAlignmentInBytes); 4934 } 4935 return; 4936 } 4937 testq(r, r); 4938 cmovq(Assembler::equal, r, r12_heapbase); 4939 subq(r, r12_heapbase); 4940 shrq(r, LogMinObjAlignmentInBytes); 4941 } 4942 4943 void MacroAssembler::encode_heap_oop_not_null(Register r) { 4944 #ifdef ASSERT 4945 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 4946 if (CheckCompressedOops) { 4947 Label ok; 4948 testq(r, r); 4949 jcc(Assembler::notEqual, ok); 4950 STOP("null oop passed to encode_heap_oop_not_null"); 4951 bind(ok); 4952 } 4953 #endif 4954 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 4955 if (Universe::narrow_oop_base() != NULL) { 4956 subq(r, r12_heapbase); 4957 } 4958 if (Universe::narrow_oop_shift() != 0) { 4959 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4960 shrq(r, LogMinObjAlignmentInBytes); 4961 } 4962 } 4963 4964 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 4965 #ifdef ASSERT 4966 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 4967 if (CheckCompressedOops) { 4968 Label ok; 4969 testq(src, src); 4970 jcc(Assembler::notEqual, ok); 4971 STOP("null oop passed to encode_heap_oop_not_null2"); 4972 bind(ok); 4973 } 4974 #endif 4975 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 4976 if (dst != src) { 4977 movq(dst, src); 4978 } 4979 if (Universe::narrow_oop_base() != NULL) { 4980 subq(dst, r12_heapbase); 4981 } 4982 if (Universe::narrow_oop_shift() != 0) { 4983 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4984 shrq(dst, LogMinObjAlignmentInBytes); 4985 } 4986 } 4987 4988 void MacroAssembler::decode_heap_oop(Register r) { 4989 #ifdef ASSERT 4990 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 4991 #endif 4992 if (Universe::narrow_oop_base() == NULL) { 4993 if (Universe::narrow_oop_shift() != 0) { 4994 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 4995 shlq(r, LogMinObjAlignmentInBytes); 4996 } 4997 } else { 4998 Label done; 4999 shlq(r, LogMinObjAlignmentInBytes); 5000 jccb(Assembler::equal, done); 5001 addq(r, r12_heapbase); 5002 bind(done); 5003 } 5004 verify_oop(r, "broken oop in decode_heap_oop"); 5005 } 5006 5007 void MacroAssembler::decode_heap_oop_not_null(Register r) { 5008 // Note: it will change flags 5009 assert (UseCompressedOops, "should only be used for compressed headers"); 5010 assert (Universe::heap() != NULL, "java heap should be initialized"); 5011 // Cannot assert, unverified entry point counts instructions (see .ad file) 5012 // vtableStubs also counts instructions in pd_code_size_limit. 5013 // Also do not verify_oop as this is called by verify_oop. 5014 if (Universe::narrow_oop_shift() != 0) { 5015 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 5016 shlq(r, LogMinObjAlignmentInBytes); 5017 if (Universe::narrow_oop_base() != NULL) { 5018 addq(r, r12_heapbase); 5019 } 5020 } else { 5021 assert (Universe::narrow_oop_base() == NULL, "sanity"); 5022 } 5023 } 5024 5025 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 5026 // Note: it will change flags 5027 assert (UseCompressedOops, "should only be used for compressed headers"); 5028 assert (Universe::heap() != NULL, "java heap should be initialized"); 5029 // Cannot assert, unverified entry point counts instructions (see .ad file) 5030 // vtableStubs also counts instructions in pd_code_size_limit. 5031 // Also do not verify_oop as this is called by verify_oop. 5032 if (Universe::narrow_oop_shift() != 0) { 5033 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 5034 if (LogMinObjAlignmentInBytes == Address::times_8) { 5035 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 5036 } else { 5037 if (dst != src) { 5038 movq(dst, src); 5039 } 5040 shlq(dst, LogMinObjAlignmentInBytes); 5041 if (Universe::narrow_oop_base() != NULL) { 5042 addq(dst, r12_heapbase); 5043 } 5044 } 5045 } else { 5046 assert (Universe::narrow_oop_base() == NULL, "sanity"); 5047 if (dst != src) { 5048 movq(dst, src); 5049 } 5050 } 5051 } 5052 5053 void MacroAssembler::encode_klass_not_null(Register r) { 5054 assert(Universe::narrow_klass_base() != NULL, "Base should be initialized"); 5055 // Use r12 as a scratch register in which to temporarily load the narrow_klass_base. 5056 assert(r != r12_heapbase, "Encoding a klass in r12"); 5057 mov64(r12_heapbase, (int64_t)Universe::narrow_klass_base()); 5058 subq(r, r12_heapbase); 5059 if (Universe::narrow_klass_shift() != 0) { 5060 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 5061 shrq(r, LogKlassAlignmentInBytes); 5062 } 5063 reinit_heapbase(); 5064 } 5065 5066 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 5067 if (dst == src) { 5068 encode_klass_not_null(src); 5069 } else { 5070 mov64(dst, (int64_t)Universe::narrow_klass_base()); 5071 negq(dst); 5072 addq(dst, src); 5073 if (Universe::narrow_klass_shift() != 0) { 5074 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 5075 shrq(dst, LogKlassAlignmentInBytes); 5076 } 5077 } 5078 } 5079 5080 // Function instr_size_for_decode_klass_not_null() counts the instructions 5081 // generated by decode_klass_not_null(register r) and reinit_heapbase(), 5082 // when (Universe::heap() != NULL). Hence, if the instructions they 5083 // generate change, then this method needs to be updated. 5084 int MacroAssembler::instr_size_for_decode_klass_not_null() { 5085 assert (UseCompressedClassPointers, "only for compressed klass ptrs"); 5086 // mov64 + addq + shlq? + mov64 (for reinit_heapbase()). 5087 return (Universe::narrow_klass_shift() == 0 ? 20 : 24); 5088 } 5089 5090 // !!! If the instructions that get generated here change then function 5091 // instr_size_for_decode_klass_not_null() needs to get updated. 5092 void MacroAssembler::decode_klass_not_null(Register r) { 5093 // Note: it will change flags 5094 assert(Universe::narrow_klass_base() != NULL, "Base should be initialized"); 5095 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 5096 assert(r != r12_heapbase, "Decoding a klass in r12"); 5097 // Cannot assert, unverified entry point counts instructions (see .ad file) 5098 // vtableStubs also counts instructions in pd_code_size_limit. 5099 // Also do not verify_oop as this is called by verify_oop. 5100 if (Universe::narrow_klass_shift() != 0) { 5101 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 5102 shlq(r, LogKlassAlignmentInBytes); 5103 } 5104 // Use r12 as a scratch register in which to temporarily load the narrow_klass_base. 5105 mov64(r12_heapbase, (int64_t)Universe::narrow_klass_base()); 5106 addq(r, r12_heapbase); 5107 reinit_heapbase(); 5108 } 5109 5110 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 5111 // Note: it will change flags 5112 assert(Universe::narrow_klass_base() != NULL, "Base should be initialized"); 5113 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 5114 if (dst == src) { 5115 decode_klass_not_null(dst); 5116 } else { 5117 // Cannot assert, unverified entry point counts instructions (see .ad file) 5118 // vtableStubs also counts instructions in pd_code_size_limit. 5119 // Also do not verify_oop as this is called by verify_oop. 5120 5121 mov64(dst, (int64_t)Universe::narrow_klass_base()); 5122 if (Universe::narrow_klass_shift() != 0) { 5123 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 5124 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); 5125 leaq(dst, Address(dst, src, Address::times_8, 0)); 5126 } else { 5127 addq(dst, src); 5128 } 5129 } 5130 } 5131 5132 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 5133 assert (UseCompressedOops, "should only be used for compressed headers"); 5134 assert (Universe::heap() != NULL, "java heap should be initialized"); 5135 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5136 int oop_index = oop_recorder()->find_index(obj); 5137 RelocationHolder rspec = oop_Relocation::spec(oop_index); 5138 mov_narrow_oop(dst, oop_index, rspec); 5139 } 5140 5141 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 5142 assert (UseCompressedOops, "should only be used for compressed headers"); 5143 assert (Universe::heap() != NULL, "java heap should be initialized"); 5144 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5145 int oop_index = oop_recorder()->find_index(obj); 5146 RelocationHolder rspec = oop_Relocation::spec(oop_index); 5147 mov_narrow_oop(dst, oop_index, rspec); 5148 } 5149 5150 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 5151 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 5152 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5153 int klass_index = oop_recorder()->find_index(k); 5154 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 5155 mov_narrow_oop(dst, Klass::encode_klass(k), rspec); 5156 } 5157 5158 void MacroAssembler::set_narrow_klass(Address dst, Klass* k) { 5159 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 5160 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5161 int klass_index = oop_recorder()->find_index(k); 5162 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 5163 mov_narrow_oop(dst, Klass::encode_klass(k), rspec); 5164 } 5165 5166 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 5167 assert (UseCompressedOops, "should only be used for compressed headers"); 5168 assert (Universe::heap() != NULL, "java heap should be initialized"); 5169 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5170 int oop_index = oop_recorder()->find_index(obj); 5171 RelocationHolder rspec = oop_Relocation::spec(oop_index); 5172 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 5173 } 5174 5175 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 5176 assert (UseCompressedOops, "should only be used for compressed headers"); 5177 assert (Universe::heap() != NULL, "java heap should be initialized"); 5178 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5179 int oop_index = oop_recorder()->find_index(obj); 5180 RelocationHolder rspec = oop_Relocation::spec(oop_index); 5181 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 5182 } 5183 5184 void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) { 5185 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 5186 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5187 int klass_index = oop_recorder()->find_index(k); 5188 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 5189 Assembler::cmp_narrow_oop(dst, Klass::encode_klass(k), rspec); 5190 } 5191 5192 void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) { 5193 assert (UseCompressedClassPointers, "should only be used for compressed headers"); 5194 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 5195 int klass_index = oop_recorder()->find_index(k); 5196 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 5197 Assembler::cmp_narrow_oop(dst, Klass::encode_klass(k), rspec); 5198 } 5199 5200 void MacroAssembler::reinit_heapbase() { 5201 if (UseCompressedOops || UseCompressedClassPointers) { 5202 if (Universe::heap() != NULL) { 5203 if (Universe::narrow_oop_base() == NULL) { 5204 MacroAssembler::xorptr(r12_heapbase, r12_heapbase); 5205 } else { 5206 mov64(r12_heapbase, (int64_t)Universe::narrow_ptrs_base()); 5207 } 5208 } else { 5209 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); 5210 } 5211 } 5212 } 5213 5214 #endif // _LP64 5215 5216 5217 // C2 compiled method's prolog code. 5218 void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { 5219 5220 // WARNING: Initial instruction MUST be 5 bytes or longer so that 5221 // NativeJump::patch_verified_entry will be able to patch out the entry 5222 // code safely. The push to verify stack depth is ok at 5 bytes, 5223 // the frame allocation can be either 3 or 6 bytes. So if we don't do 5224 // stack bang then we must use the 6 byte frame allocation even if 5225 // we have no frame. :-( 5226 5227 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 5228 // Remove word for return addr 5229 framesize -= wordSize; 5230 5231 // Calls to C2R adapters often do not accept exceptional returns. 5232 // We require that their callers must bang for them. But be careful, because 5233 // some VM calls (such as call site linkage) can use several kilobytes of 5234 // stack. But the stack safety zone should account for that. 5235 // See bugs 4446381, 4468289, 4497237. 5236 if (stack_bang) { 5237 generate_stack_overflow_check(framesize); 5238 5239 // We always push rbp, so that on return to interpreter rbp, will be 5240 // restored correctly and we can correct the stack. 5241 push(rbp); 5242 // Remove word for ebp 5243 framesize -= wordSize; 5244 5245 // Create frame 5246 if (framesize) { 5247 subptr(rsp, framesize); 5248 } 5249 } else { 5250 // Create frame (force generation of a 4 byte immediate value) 5251 subptr_imm32(rsp, framesize); 5252 5253 // Save RBP register now. 5254 framesize -= wordSize; 5255 movptr(Address(rsp, framesize), rbp); 5256 } 5257 5258 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth 5259 framesize -= wordSize; 5260 movptr(Address(rsp, framesize), (int32_t)0xbadb100d); 5261 } 5262 5263 #ifndef _LP64 5264 // If method sets FPU control word do it now 5265 if (fp_mode_24b) { 5266 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 5267 } 5268 if (UseSSE >= 2 && VerifyFPU) { 5269 verify_FPU(0, "FPU stack must be clean on entry"); 5270 } 5271 #endif 5272 5273 #ifdef ASSERT 5274 if (VerifyStackAtCalls) { 5275 Label L; 5276 push(rax); 5277 mov(rax, rsp); 5278 andptr(rax, StackAlignmentInBytes-1); 5279 cmpptr(rax, StackAlignmentInBytes-wordSize); 5280 pop(rax); 5281 jcc(Assembler::equal, L); 5282 STOP("Stack is not properly aligned!"); 5283 bind(L); 5284 } 5285 #endif 5286 5287 } 5288 5289 void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) { 5290 // cnt - number of qwords (8-byte words). 5291 // base - start address, qword aligned. 5292 assert(base==rdi, "base register must be edi for rep stos"); 5293 assert(tmp==rax, "tmp register must be eax for rep stos"); 5294 assert(cnt==rcx, "cnt register must be ecx for rep stos"); 5295 5296 xorptr(tmp, tmp); 5297 if (UseFastStosb) { 5298 shlptr(cnt,3); // convert to number of bytes 5299 rep_stosb(); 5300 } else { 5301 NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM 5302 rep_stos(); 5303 } 5304 } 5305 5306 // IndexOf for constant substrings with size >= 8 chars 5307 // which don't need to be loaded through stack. 5308 void MacroAssembler::string_indexofC8(Register str1, Register str2, 5309 Register cnt1, Register cnt2, 5310 int int_cnt2, Register result, 5311 XMMRegister vec, Register tmp) { 5312 ShortBranchVerifier sbv(this); 5313 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 5314 5315 // This method uses pcmpestri inxtruction with bound registers 5316 // inputs: 5317 // xmm - substring 5318 // rax - substring length (elements count) 5319 // mem - scanned string 5320 // rdx - string length (elements count) 5321 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 5322 // outputs: 5323 // rcx - matched index in string 5324 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 5325 5326 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, 5327 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, 5328 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; 5329 5330 // Note, inline_string_indexOf() generates checks: 5331 // if (substr.count > string.count) return -1; 5332 // if (substr.count == 0) return 0; 5333 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); 5334 5335 // Load substring. 5336 movdqu(vec, Address(str2, 0)); 5337 movl(cnt2, int_cnt2); 5338 movptr(result, str1); // string addr 5339 5340 if (int_cnt2 > 8) { 5341 jmpb(SCAN_TO_SUBSTR); 5342 5343 // Reload substr for rescan, this code 5344 // is executed only for large substrings (> 8 chars) 5345 bind(RELOAD_SUBSTR); 5346 movdqu(vec, Address(str2, 0)); 5347 negptr(cnt2); // Jumped here with negative cnt2, convert to positive 5348 5349 bind(RELOAD_STR); 5350 // We came here after the beginning of the substring was 5351 // matched but the rest of it was not so we need to search 5352 // again. Start from the next element after the previous match. 5353 5354 // cnt2 is number of substring reminding elements and 5355 // cnt1 is number of string reminding elements when cmp failed. 5356 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 5357 subl(cnt1, cnt2); 5358 addl(cnt1, int_cnt2); 5359 movl(cnt2, int_cnt2); // Now restore cnt2 5360 5361 decrementl(cnt1); // Shift to next element 5362 cmpl(cnt1, cnt2); 5363 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 5364 5365 addptr(result, 2); 5366 5367 } // (int_cnt2 > 8) 5368 5369 // Scan string for start of substr in 16-byte vectors 5370 bind(SCAN_TO_SUBSTR); 5371 pcmpestri(vec, Address(result, 0), 0x0d); 5372 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 5373 subl(cnt1, 8); 5374 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 5375 cmpl(cnt1, cnt2); 5376 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 5377 addptr(result, 16); 5378 jmpb(SCAN_TO_SUBSTR); 5379 5380 // Found a potential substr 5381 bind(FOUND_CANDIDATE); 5382 // Matched whole vector if first element matched (tmp(rcx) == 0). 5383 if (int_cnt2 == 8) { 5384 jccb(Assembler::overflow, RET_FOUND); // OF == 1 5385 } else { // int_cnt2 > 8 5386 jccb(Assembler::overflow, FOUND_SUBSTR); 5387 } 5388 // After pcmpestri tmp(rcx) contains matched element index 5389 // Compute start addr of substr 5390 lea(result, Address(result, tmp, Address::times_2)); 5391 5392 // Make sure string is still long enough 5393 subl(cnt1, tmp); 5394 cmpl(cnt1, cnt2); 5395 if (int_cnt2 == 8) { 5396 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 5397 } else { // int_cnt2 > 8 5398 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); 5399 } 5400 // Left less then substring. 5401 5402 bind(RET_NOT_FOUND); 5403 movl(result, -1); 5404 jmpb(EXIT); 5405 5406 if (int_cnt2 > 8) { 5407 // This code is optimized for the case when whole substring 5408 // is matched if its head is matched. 5409 bind(MATCH_SUBSTR_HEAD); 5410 pcmpestri(vec, Address(result, 0), 0x0d); 5411 // Reload only string if does not match 5412 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 5413 5414 Label CONT_SCAN_SUBSTR; 5415 // Compare the rest of substring (> 8 chars). 5416 bind(FOUND_SUBSTR); 5417 // First 8 chars are already matched. 5418 negptr(cnt2); 5419 addptr(cnt2, 8); 5420 5421 bind(SCAN_SUBSTR); 5422 subl(cnt1, 8); 5423 cmpl(cnt2, -8); // Do not read beyond substring 5424 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); 5425 // Back-up strings to avoid reading beyond substring: 5426 // cnt1 = cnt1 - cnt2 + 8 5427 addl(cnt1, cnt2); // cnt2 is negative 5428 addl(cnt1, 8); 5429 movl(cnt2, 8); negptr(cnt2); 5430 bind(CONT_SCAN_SUBSTR); 5431 if (int_cnt2 < (int)G) { 5432 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); 5433 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); 5434 } else { 5435 // calculate index in register to avoid integer overflow (int_cnt2*2) 5436 movl(tmp, int_cnt2); 5437 addptr(tmp, cnt2); 5438 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); 5439 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 5440 } 5441 // Need to reload strings pointers if not matched whole vector 5442 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 5443 addptr(cnt2, 8); 5444 jcc(Assembler::negative, SCAN_SUBSTR); 5445 // Fall through if found full substring 5446 5447 } // (int_cnt2 > 8) 5448 5449 bind(RET_FOUND); 5450 // Found result if we matched full small substring. 5451 // Compute substr offset 5452 subptr(result, str1); 5453 shrl(result, 1); // index 5454 bind(EXIT); 5455 5456 } // string_indexofC8 5457 5458 // Small strings are loaded through stack if they cross page boundary. 5459 void MacroAssembler::string_indexof(Register str1, Register str2, 5460 Register cnt1, Register cnt2, 5461 int int_cnt2, Register result, 5462 XMMRegister vec, Register tmp) { 5463 ShortBranchVerifier sbv(this); 5464 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 5465 // 5466 // int_cnt2 is length of small (< 8 chars) constant substring 5467 // or (-1) for non constant substring in which case its length 5468 // is in cnt2 register. 5469 // 5470 // Note, inline_string_indexOf() generates checks: 5471 // if (substr.count > string.count) return -1; 5472 // if (substr.count == 0) return 0; 5473 // 5474 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); 5475 5476 // This method uses pcmpestri inxtruction with bound registers 5477 // inputs: 5478 // xmm - substring 5479 // rax - substring length (elements count) 5480 // mem - scanned string 5481 // rdx - string length (elements count) 5482 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 5483 // outputs: 5484 // rcx - matched index in string 5485 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 5486 5487 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, 5488 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, 5489 FOUND_CANDIDATE; 5490 5491 { //======================================================== 5492 // We don't know where these strings are located 5493 // and we can't read beyond them. Load them through stack. 5494 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; 5495 5496 movptr(tmp, rsp); // save old SP 5497 5498 if (int_cnt2 > 0) { // small (< 8 chars) constant substring 5499 if (int_cnt2 == 1) { // One char 5500 load_unsigned_short(result, Address(str2, 0)); 5501 movdl(vec, result); // move 32 bits 5502 } else if (int_cnt2 == 2) { // Two chars 5503 movdl(vec, Address(str2, 0)); // move 32 bits 5504 } else if (int_cnt2 == 4) { // Four chars 5505 movq(vec, Address(str2, 0)); // move 64 bits 5506 } else { // cnt2 = { 3, 5, 6, 7 } 5507 // Array header size is 12 bytes in 32-bit VM 5508 // + 6 bytes for 3 chars == 18 bytes, 5509 // enough space to load vec and shift. 5510 assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity"); 5511 movdqu(vec, Address(str2, (int_cnt2*2)-16)); 5512 psrldq(vec, 16-(int_cnt2*2)); 5513 } 5514 } else { // not constant substring 5515 cmpl(cnt2, 8); 5516 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough 5517 5518 // We can read beyond string if srt+16 does not cross page boundary 5519 // since heaps are aligned and mapped by pages. 5520 assert(os::vm_page_size() < (int)G, "default page should be small"); 5521 movl(result, str2); // We need only low 32 bits 5522 andl(result, (os::vm_page_size()-1)); 5523 cmpl(result, (os::vm_page_size()-16)); 5524 jccb(Assembler::belowEqual, CHECK_STR); 5525 5526 // Move small strings to stack to allow load 16 bytes into vec. 5527 subptr(rsp, 16); 5528 int stk_offset = wordSize-2; 5529 push(cnt2); 5530 5531 bind(COPY_SUBSTR); 5532 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); 5533 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 5534 decrement(cnt2); 5535 jccb(Assembler::notZero, COPY_SUBSTR); 5536 5537 pop(cnt2); 5538 movptr(str2, rsp); // New substring address 5539 } // non constant 5540 5541 bind(CHECK_STR); 5542 cmpl(cnt1, 8); 5543 jccb(Assembler::aboveEqual, BIG_STRINGS); 5544 5545 // Check cross page boundary. 5546 movl(result, str1); // We need only low 32 bits 5547 andl(result, (os::vm_page_size()-1)); 5548 cmpl(result, (os::vm_page_size()-16)); 5549 jccb(Assembler::belowEqual, BIG_STRINGS); 5550 5551 subptr(rsp, 16); 5552 int stk_offset = -2; 5553 if (int_cnt2 < 0) { // not constant 5554 push(cnt2); 5555 stk_offset += wordSize; 5556 } 5557 movl(cnt2, cnt1); 5558 5559 bind(COPY_STR); 5560 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); 5561 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 5562 decrement(cnt2); 5563 jccb(Assembler::notZero, COPY_STR); 5564 5565 if (int_cnt2 < 0) { // not constant 5566 pop(cnt2); 5567 } 5568 movptr(str1, rsp); // New string address 5569 5570 bind(BIG_STRINGS); 5571 // Load substring. 5572 if (int_cnt2 < 0) { // -1 5573 movdqu(vec, Address(str2, 0)); 5574 push(cnt2); // substr count 5575 push(str2); // substr addr 5576 push(str1); // string addr 5577 } else { 5578 // Small (< 8 chars) constant substrings are loaded already. 5579 movl(cnt2, int_cnt2); 5580 } 5581 push(tmp); // original SP 5582 5583 } // Finished loading 5584 5585 //======================================================== 5586 // Start search 5587 // 5588 5589 movptr(result, str1); // string addr 5590 5591 if (int_cnt2 < 0) { // Only for non constant substring 5592 jmpb(SCAN_TO_SUBSTR); 5593 5594 // SP saved at sp+0 5595 // String saved at sp+1*wordSize 5596 // Substr saved at sp+2*wordSize 5597 // Substr count saved at sp+3*wordSize 5598 5599 // Reload substr for rescan, this code 5600 // is executed only for large substrings (> 8 chars) 5601 bind(RELOAD_SUBSTR); 5602 movptr(str2, Address(rsp, 2*wordSize)); 5603 movl(cnt2, Address(rsp, 3*wordSize)); 5604 movdqu(vec, Address(str2, 0)); 5605 // We came here after the beginning of the substring was 5606 // matched but the rest of it was not so we need to search 5607 // again. Start from the next element after the previous match. 5608 subptr(str1, result); // Restore counter 5609 shrl(str1, 1); 5610 addl(cnt1, str1); 5611 decrementl(cnt1); // Shift to next element 5612 cmpl(cnt1, cnt2); 5613 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 5614 5615 addptr(result, 2); 5616 } // non constant 5617 5618 // Scan string for start of substr in 16-byte vectors 5619 bind(SCAN_TO_SUBSTR); 5620 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 5621 pcmpestri(vec, Address(result, 0), 0x0d); 5622 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 5623 subl(cnt1, 8); 5624 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 5625 cmpl(cnt1, cnt2); 5626 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 5627 addptr(result, 16); 5628 5629 bind(ADJUST_STR); 5630 cmpl(cnt1, 8); // Do not read beyond string 5631 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 5632 // Back-up string to avoid reading beyond string. 5633 lea(result, Address(result, cnt1, Address::times_2, -16)); 5634 movl(cnt1, 8); 5635 jmpb(SCAN_TO_SUBSTR); 5636 5637 // Found a potential substr 5638 bind(FOUND_CANDIDATE); 5639 // After pcmpestri tmp(rcx) contains matched element index 5640 5641 // Make sure string is still long enough 5642 subl(cnt1, tmp); 5643 cmpl(cnt1, cnt2); 5644 jccb(Assembler::greaterEqual, FOUND_SUBSTR); 5645 // Left less then substring. 5646 5647 bind(RET_NOT_FOUND); 5648 movl(result, -1); 5649 jmpb(CLEANUP); 5650 5651 bind(FOUND_SUBSTR); 5652 // Compute start addr of substr 5653 lea(result, Address(result, tmp, Address::times_2)); 5654 5655 if (int_cnt2 > 0) { // Constant substring 5656 // Repeat search for small substring (< 8 chars) 5657 // from new point without reloading substring. 5658 // Have to check that we don't read beyond string. 5659 cmpl(tmp, 8-int_cnt2); 5660 jccb(Assembler::greater, ADJUST_STR); 5661 // Fall through if matched whole substring. 5662 } else { // non constant 5663 assert(int_cnt2 == -1, "should be != 0"); 5664 5665 addl(tmp, cnt2); 5666 // Found result if we matched whole substring. 5667 cmpl(tmp, 8); 5668 jccb(Assembler::lessEqual, RET_FOUND); 5669 5670 // Repeat search for small substring (<= 8 chars) 5671 // from new point 'str1' without reloading substring. 5672 cmpl(cnt2, 8); 5673 // Have to check that we don't read beyond string. 5674 jccb(Assembler::lessEqual, ADJUST_STR); 5675 5676 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; 5677 // Compare the rest of substring (> 8 chars). 5678 movptr(str1, result); 5679 5680 cmpl(tmp, cnt2); 5681 // First 8 chars are already matched. 5682 jccb(Assembler::equal, CHECK_NEXT); 5683 5684 bind(SCAN_SUBSTR); 5685 pcmpestri(vec, Address(str1, 0), 0x0d); 5686 // Need to reload strings pointers if not matched whole vector 5687 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 5688 5689 bind(CHECK_NEXT); 5690 subl(cnt2, 8); 5691 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring 5692 addptr(str1, 16); 5693 addptr(str2, 16); 5694 subl(cnt1, 8); 5695 cmpl(cnt2, 8); // Do not read beyond substring 5696 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); 5697 // Back-up strings to avoid reading beyond substring. 5698 lea(str2, Address(str2, cnt2, Address::times_2, -16)); 5699 lea(str1, Address(str1, cnt2, Address::times_2, -16)); 5700 subl(cnt1, cnt2); 5701 movl(cnt2, 8); 5702 addl(cnt1, 8); 5703 bind(CONT_SCAN_SUBSTR); 5704 movdqu(vec, Address(str2, 0)); 5705 jmpb(SCAN_SUBSTR); 5706 5707 bind(RET_FOUND_LONG); 5708 movptr(str1, Address(rsp, wordSize)); 5709 } // non constant 5710 5711 bind(RET_FOUND); 5712 // Compute substr offset 5713 subptr(result, str1); 5714 shrl(result, 1); // index 5715 5716 bind(CLEANUP); 5717 pop(rsp); // restore SP 5718 5719 } // string_indexof 5720 5721 // Compare strings. 5722 void MacroAssembler::string_compare(Register str1, Register str2, 5723 Register cnt1, Register cnt2, Register result, 5724 XMMRegister vec1) { 5725 ShortBranchVerifier sbv(this); 5726 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 5727 5728 // Compute the minimum of the string lengths and the 5729 // difference of the string lengths (stack). 5730 // Do the conditional move stuff 5731 movl(result, cnt1); 5732 subl(cnt1, cnt2); 5733 push(cnt1); 5734 cmov32(Assembler::lessEqual, cnt2, result); 5735 5736 // Is the minimum length zero? 5737 testl(cnt2, cnt2); 5738 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 5739 5740 // Compare first characters 5741 load_unsigned_short(result, Address(str1, 0)); 5742 load_unsigned_short(cnt1, Address(str2, 0)); 5743 subl(result, cnt1); 5744 jcc(Assembler::notZero, POP_LABEL); 5745 cmpl(cnt2, 1); 5746 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 5747 5748 // Check if the strings start at the same location. 5749 cmpptr(str1, str2); 5750 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 5751 5752 Address::ScaleFactor scale = Address::times_2; 5753 int stride = 8; 5754 5755 if (UseAVX >= 2 && UseSSE42Intrinsics) { 5756 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR; 5757 Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR; 5758 Label COMPARE_TAIL_LONG; 5759 int pcmpmask = 0x19; 5760 5761 // Setup to compare 16-chars (32-bytes) vectors, 5762 // start from first character again because it has aligned address. 5763 int stride2 = 16; 5764 int adr_stride = stride << scale; 5765 int adr_stride2 = stride2 << scale; 5766 5767 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 5768 // rax and rdx are used by pcmpestri as elements counters 5769 movl(result, cnt2); 5770 andl(cnt2, ~(stride2-1)); // cnt2 holds the vector count 5771 jcc(Assembler::zero, COMPARE_TAIL_LONG); 5772 5773 // fast path : compare first 2 8-char vectors. 5774 bind(COMPARE_16_CHARS); 5775 movdqu(vec1, Address(str1, 0)); 5776 pcmpestri(vec1, Address(str2, 0), pcmpmask); 5777 jccb(Assembler::below, COMPARE_INDEX_CHAR); 5778 5779 movdqu(vec1, Address(str1, adr_stride)); 5780 pcmpestri(vec1, Address(str2, adr_stride), pcmpmask); 5781 jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS); 5782 addl(cnt1, stride); 5783 5784 // Compare the characters at index in cnt1 5785 bind(COMPARE_INDEX_CHAR); //cnt1 has the offset of the mismatching character 5786 load_unsigned_short(result, Address(str1, cnt1, scale)); 5787 load_unsigned_short(cnt2, Address(str2, cnt1, scale)); 5788 subl(result, cnt2); 5789 jmp(POP_LABEL); 5790 5791 // Setup the registers to start vector comparison loop 5792 bind(COMPARE_WIDE_VECTORS); 5793 lea(str1, Address(str1, result, scale)); 5794 lea(str2, Address(str2, result, scale)); 5795 subl(result, stride2); 5796 subl(cnt2, stride2); 5797 jccb(Assembler::zero, COMPARE_WIDE_TAIL); 5798 negptr(result); 5799 5800 // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest) 5801 bind(COMPARE_WIDE_VECTORS_LOOP); 5802 vmovdqu(vec1, Address(str1, result, scale)); 5803 vpxor(vec1, Address(str2, result, scale)); 5804 vptest(vec1, vec1); 5805 jccb(Assembler::notZero, VECTOR_NOT_EQUAL); 5806 addptr(result, stride2); 5807 subl(cnt2, stride2); 5808 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); 5809 // clean upper bits of YMM registers 5810 vzeroupper(); 5811 5812 // compare wide vectors tail 5813 bind(COMPARE_WIDE_TAIL); 5814 testptr(result, result); 5815 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 5816 5817 movl(result, stride2); 5818 movl(cnt2, result); 5819 negptr(result); 5820 jmpb(COMPARE_WIDE_VECTORS_LOOP); 5821 5822 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. 5823 bind(VECTOR_NOT_EQUAL); 5824 // clean upper bits of YMM registers 5825 vzeroupper(); 5826 lea(str1, Address(str1, result, scale)); 5827 lea(str2, Address(str2, result, scale)); 5828 jmp(COMPARE_16_CHARS); 5829 5830 // Compare tail chars, length between 1 to 15 chars 5831 bind(COMPARE_TAIL_LONG); 5832 movl(cnt2, result); 5833 cmpl(cnt2, stride); 5834 jccb(Assembler::less, COMPARE_SMALL_STR); 5835 5836 movdqu(vec1, Address(str1, 0)); 5837 pcmpestri(vec1, Address(str2, 0), pcmpmask); 5838 jcc(Assembler::below, COMPARE_INDEX_CHAR); 5839 subptr(cnt2, stride); 5840 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 5841 lea(str1, Address(str1, result, scale)); 5842 lea(str2, Address(str2, result, scale)); 5843 negptr(cnt2); 5844 jmpb(WHILE_HEAD_LABEL); 5845 5846 bind(COMPARE_SMALL_STR); 5847 } else if (UseSSE42Intrinsics) { 5848 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 5849 int pcmpmask = 0x19; 5850 // Setup to compare 8-char (16-byte) vectors, 5851 // start from first character again because it has aligned address. 5852 movl(result, cnt2); 5853 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 5854 jccb(Assembler::zero, COMPARE_TAIL); 5855 5856 lea(str1, Address(str1, result, scale)); 5857 lea(str2, Address(str2, result, scale)); 5858 negptr(result); 5859 5860 // pcmpestri 5861 // inputs: 5862 // vec1- substring 5863 // rax - negative string length (elements count) 5864 // mem - scaned string 5865 // rdx - string length (elements count) 5866 // pcmpmask - cmp mode: 11000 (string compare with negated result) 5867 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 5868 // outputs: 5869 // rcx - first mismatched element index 5870 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 5871 5872 bind(COMPARE_WIDE_VECTORS); 5873 movdqu(vec1, Address(str1, result, scale)); 5874 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 5875 // After pcmpestri cnt1(rcx) contains mismatched element index 5876 5877 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 5878 addptr(result, stride); 5879 subptr(cnt2, stride); 5880 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); 5881 5882 // compare wide vectors tail 5883 testptr(result, result); 5884 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 5885 5886 movl(cnt2, stride); 5887 movl(result, stride); 5888 negptr(result); 5889 movdqu(vec1, Address(str1, result, scale)); 5890 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 5891 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); 5892 5893 // Mismatched characters in the vectors 5894 bind(VECTOR_NOT_EQUAL); 5895 addptr(cnt1, result); 5896 load_unsigned_short(result, Address(str1, cnt1, scale)); 5897 load_unsigned_short(cnt2, Address(str2, cnt1, scale)); 5898 subl(result, cnt2); 5899 jmpb(POP_LABEL); 5900 5901 bind(COMPARE_TAIL); // limit is zero 5902 movl(cnt2, result); 5903 // Fallthru to tail compare 5904 } 5905 // Shift str2 and str1 to the end of the arrays, negate min 5906 lea(str1, Address(str1, cnt2, scale)); 5907 lea(str2, Address(str2, cnt2, scale)); 5908 decrementl(cnt2); // first character was compared already 5909 negptr(cnt2); 5910 5911 // Compare the rest of the elements 5912 bind(WHILE_HEAD_LABEL); 5913 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); 5914 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); 5915 subl(result, cnt1); 5916 jccb(Assembler::notZero, POP_LABEL); 5917 increment(cnt2); 5918 jccb(Assembler::notZero, WHILE_HEAD_LABEL); 5919 5920 // Strings are equal up to min length. Return the length difference. 5921 bind(LENGTH_DIFF_LABEL); 5922 pop(result); 5923 jmpb(DONE_LABEL); 5924 5925 // Discard the stored length difference 5926 bind(POP_LABEL); 5927 pop(cnt1); 5928 5929 // That's it 5930 bind(DONE_LABEL); 5931 } 5932 5933 // Compare char[] arrays aligned to 4 bytes or substrings. 5934 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 5935 Register limit, Register result, Register chr, 5936 XMMRegister vec1, XMMRegister vec2) { 5937 ShortBranchVerifier sbv(this); 5938 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 5939 5940 int length_offset = arrayOopDesc::length_offset_in_bytes(); 5941 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 5942 5943 // Check the input args 5944 cmpptr(ary1, ary2); 5945 jcc(Assembler::equal, TRUE_LABEL); 5946 5947 if (is_array_equ) { 5948 // Need additional checks for arrays_equals. 5949 testptr(ary1, ary1); 5950 jcc(Assembler::zero, FALSE_LABEL); 5951 testptr(ary2, ary2); 5952 jcc(Assembler::zero, FALSE_LABEL); 5953 5954 // Check the lengths 5955 movl(limit, Address(ary1, length_offset)); 5956 cmpl(limit, Address(ary2, length_offset)); 5957 jcc(Assembler::notEqual, FALSE_LABEL); 5958 } 5959 5960 // count == 0 5961 testl(limit, limit); 5962 jcc(Assembler::zero, TRUE_LABEL); 5963 5964 if (is_array_equ) { 5965 // Load array address 5966 lea(ary1, Address(ary1, base_offset)); 5967 lea(ary2, Address(ary2, base_offset)); 5968 } 5969 5970 shll(limit, 1); // byte count != 0 5971 movl(result, limit); // copy 5972 5973 if (UseAVX >= 2) { 5974 // With AVX2, use 32-byte vector compare 5975 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 5976 5977 // Compare 32-byte vectors 5978 andl(result, 0x0000001e); // tail count (in bytes) 5979 andl(limit, 0xffffffe0); // vector count (in bytes) 5980 jccb(Assembler::zero, COMPARE_TAIL); 5981 5982 lea(ary1, Address(ary1, limit, Address::times_1)); 5983 lea(ary2, Address(ary2, limit, Address::times_1)); 5984 negptr(limit); 5985 5986 bind(COMPARE_WIDE_VECTORS); 5987 vmovdqu(vec1, Address(ary1, limit, Address::times_1)); 5988 vmovdqu(vec2, Address(ary2, limit, Address::times_1)); 5989 vpxor(vec1, vec2); 5990 5991 vptest(vec1, vec1); 5992 jccb(Assembler::notZero, FALSE_LABEL); 5993 addptr(limit, 32); 5994 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 5995 5996 testl(result, result); 5997 jccb(Assembler::zero, TRUE_LABEL); 5998 5999 vmovdqu(vec1, Address(ary1, result, Address::times_1, -32)); 6000 vmovdqu(vec2, Address(ary2, result, Address::times_1, -32)); 6001 vpxor(vec1, vec2); 6002 6003 vptest(vec1, vec1); 6004 jccb(Assembler::notZero, FALSE_LABEL); 6005 jmpb(TRUE_LABEL); 6006 6007 bind(COMPARE_TAIL); // limit is zero 6008 movl(limit, result); 6009 // Fallthru to tail compare 6010 } else if (UseSSE42Intrinsics) { 6011 // With SSE4.2, use double quad vector compare 6012 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 6013 6014 // Compare 16-byte vectors 6015 andl(result, 0x0000000e); // tail count (in bytes) 6016 andl(limit, 0xfffffff0); // vector count (in bytes) 6017 jccb(Assembler::zero, COMPARE_TAIL); 6018 6019 lea(ary1, Address(ary1, limit, Address::times_1)); 6020 lea(ary2, Address(ary2, limit, Address::times_1)); 6021 negptr(limit); 6022 6023 bind(COMPARE_WIDE_VECTORS); 6024 movdqu(vec1, Address(ary1, limit, Address::times_1)); 6025 movdqu(vec2, Address(ary2, limit, Address::times_1)); 6026 pxor(vec1, vec2); 6027 6028 ptest(vec1, vec1); 6029 jccb(Assembler::notZero, FALSE_LABEL); 6030 addptr(limit, 16); 6031 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 6032 6033 testl(result, result); 6034 jccb(Assembler::zero, TRUE_LABEL); 6035 6036 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); 6037 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); 6038 pxor(vec1, vec2); 6039 6040 ptest(vec1, vec1); 6041 jccb(Assembler::notZero, FALSE_LABEL); 6042 jmpb(TRUE_LABEL); 6043 6044 bind(COMPARE_TAIL); // limit is zero 6045 movl(limit, result); 6046 // Fallthru to tail compare 6047 } 6048 6049 // Compare 4-byte vectors 6050 andl(limit, 0xfffffffc); // vector count (in bytes) 6051 jccb(Assembler::zero, COMPARE_CHAR); 6052 6053 lea(ary1, Address(ary1, limit, Address::times_1)); 6054 lea(ary2, Address(ary2, limit, Address::times_1)); 6055 negptr(limit); 6056 6057 bind(COMPARE_VECTORS); 6058 movl(chr, Address(ary1, limit, Address::times_1)); 6059 cmpl(chr, Address(ary2, limit, Address::times_1)); 6060 jccb(Assembler::notEqual, FALSE_LABEL); 6061 addptr(limit, 4); 6062 jcc(Assembler::notZero, COMPARE_VECTORS); 6063 6064 // Compare trailing char (final 2 bytes), if any 6065 bind(COMPARE_CHAR); 6066 testl(result, 0x2); // tail char 6067 jccb(Assembler::zero, TRUE_LABEL); 6068 load_unsigned_short(chr, Address(ary1, 0)); 6069 load_unsigned_short(limit, Address(ary2, 0)); 6070 cmpl(chr, limit); 6071 jccb(Assembler::notEqual, FALSE_LABEL); 6072 6073 bind(TRUE_LABEL); 6074 movl(result, 1); // return true 6075 jmpb(DONE); 6076 6077 bind(FALSE_LABEL); 6078 xorl(result, result); // return false 6079 6080 // That's it 6081 bind(DONE); 6082 if (UseAVX >= 2) { 6083 // clean upper bits of YMM registers 6084 vzeroupper(); 6085 } 6086 } 6087 6088 void MacroAssembler::generate_fill(BasicType t, bool aligned, 6089 Register to, Register value, Register count, 6090 Register rtmp, XMMRegister xtmp) { 6091 ShortBranchVerifier sbv(this); 6092 assert_different_registers(to, value, count, rtmp); 6093 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 6094 Label L_fill_2_bytes, L_fill_4_bytes; 6095 6096 int shift = -1; 6097 switch (t) { 6098 case T_BYTE: 6099 shift = 2; 6100 break; 6101 case T_SHORT: 6102 shift = 1; 6103 break; 6104 case T_INT: 6105 shift = 0; 6106 break; 6107 default: ShouldNotReachHere(); 6108 } 6109 6110 if (t == T_BYTE) { 6111 andl(value, 0xff); 6112 movl(rtmp, value); 6113 shll(rtmp, 8); 6114 orl(value, rtmp); 6115 } 6116 if (t == T_SHORT) { 6117 andl(value, 0xffff); 6118 } 6119 if (t == T_BYTE || t == T_SHORT) { 6120 movl(rtmp, value); 6121 shll(rtmp, 16); 6122 orl(value, rtmp); 6123 } 6124 6125 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 6126 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 6127 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 6128 // align source address at 4 bytes address boundary 6129 if (t == T_BYTE) { 6130 // One byte misalignment happens only for byte arrays 6131 testptr(to, 1); 6132 jccb(Assembler::zero, L_skip_align1); 6133 movb(Address(to, 0), value); 6134 increment(to); 6135 decrement(count); 6136 BIND(L_skip_align1); 6137 } 6138 // Two bytes misalignment happens only for byte and short (char) arrays 6139 testptr(to, 2); 6140 jccb(Assembler::zero, L_skip_align2); 6141 movw(Address(to, 0), value); 6142 addptr(to, 2); 6143 subl(count, 1<<(shift-1)); 6144 BIND(L_skip_align2); 6145 } 6146 if (UseSSE < 2) { 6147 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 6148 // Fill 32-byte chunks 6149 subl(count, 8 << shift); 6150 jcc(Assembler::less, L_check_fill_8_bytes); 6151 align(16); 6152 6153 BIND(L_fill_32_bytes_loop); 6154 6155 for (int i = 0; i < 32; i += 4) { 6156 movl(Address(to, i), value); 6157 } 6158 6159 addptr(to, 32); 6160 subl(count, 8 << shift); 6161 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 6162 BIND(L_check_fill_8_bytes); 6163 addl(count, 8 << shift); 6164 jccb(Assembler::zero, L_exit); 6165 jmpb(L_fill_8_bytes); 6166 6167 // 6168 // length is too short, just fill qwords 6169 // 6170 BIND(L_fill_8_bytes_loop); 6171 movl(Address(to, 0), value); 6172 movl(Address(to, 4), value); 6173 addptr(to, 8); 6174 BIND(L_fill_8_bytes); 6175 subl(count, 1 << (shift + 1)); 6176 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 6177 // fall through to fill 4 bytes 6178 } else { 6179 Label L_fill_32_bytes; 6180 if (!UseUnalignedLoadStores) { 6181 // align to 8 bytes, we know we are 4 byte aligned to start 6182 testptr(to, 4); 6183 jccb(Assembler::zero, L_fill_32_bytes); 6184 movl(Address(to, 0), value); 6185 addptr(to, 4); 6186 subl(count, 1<<shift); 6187 } 6188 BIND(L_fill_32_bytes); 6189 { 6190 assert( UseSSE >= 2, "supported cpu only" ); 6191 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 6192 movdl(xtmp, value); 6193 if (UseAVX >= 2 && UseUnalignedLoadStores) { 6194 // Fill 64-byte chunks 6195 Label L_fill_64_bytes_loop, L_check_fill_32_bytes; 6196 vpbroadcastd(xtmp, xtmp); 6197 6198 subl(count, 16 << shift); 6199 jcc(Assembler::less, L_check_fill_32_bytes); 6200 align(16); 6201 6202 BIND(L_fill_64_bytes_loop); 6203 vmovdqu(Address(to, 0), xtmp); 6204 vmovdqu(Address(to, 32), xtmp); 6205 addptr(to, 64); 6206 subl(count, 16 << shift); 6207 jcc(Assembler::greaterEqual, L_fill_64_bytes_loop); 6208 6209 BIND(L_check_fill_32_bytes); 6210 addl(count, 8 << shift); 6211 jccb(Assembler::less, L_check_fill_8_bytes); 6212 vmovdqu(Address(to, 0), xtmp); 6213 addptr(to, 32); 6214 subl(count, 8 << shift); 6215 6216 BIND(L_check_fill_8_bytes); 6217 // clean upper bits of YMM registers 6218 vzeroupper(); 6219 } else { 6220 // Fill 32-byte chunks 6221 pshufd(xtmp, xtmp, 0); 6222 6223 subl(count, 8 << shift); 6224 jcc(Assembler::less, L_check_fill_8_bytes); 6225 align(16); 6226 6227 BIND(L_fill_32_bytes_loop); 6228 6229 if (UseUnalignedLoadStores) { 6230 movdqu(Address(to, 0), xtmp); 6231 movdqu(Address(to, 16), xtmp); 6232 } else { 6233 movq(Address(to, 0), xtmp); 6234 movq(Address(to, 8), xtmp); 6235 movq(Address(to, 16), xtmp); 6236 movq(Address(to, 24), xtmp); 6237 } 6238 6239 addptr(to, 32); 6240 subl(count, 8 << shift); 6241 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 6242 6243 BIND(L_check_fill_8_bytes); 6244 } 6245 addl(count, 8 << shift); 6246 jccb(Assembler::zero, L_exit); 6247 jmpb(L_fill_8_bytes); 6248 6249 // 6250 // length is too short, just fill qwords 6251 // 6252 BIND(L_fill_8_bytes_loop); 6253 movq(Address(to, 0), xtmp); 6254 addptr(to, 8); 6255 BIND(L_fill_8_bytes); 6256 subl(count, 1 << (shift + 1)); 6257 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 6258 } 6259 } 6260 // fill trailing 4 bytes 6261 BIND(L_fill_4_bytes); 6262 testl(count, 1<<shift); 6263 jccb(Assembler::zero, L_fill_2_bytes); 6264 movl(Address(to, 0), value); 6265 if (t == T_BYTE || t == T_SHORT) { 6266 addptr(to, 4); 6267 BIND(L_fill_2_bytes); 6268 // fill trailing 2 bytes 6269 testl(count, 1<<(shift-1)); 6270 jccb(Assembler::zero, L_fill_byte); 6271 movw(Address(to, 0), value); 6272 if (t == T_BYTE) { 6273 addptr(to, 2); 6274 BIND(L_fill_byte); 6275 // fill trailing byte 6276 testl(count, 1); 6277 jccb(Assembler::zero, L_exit); 6278 movb(Address(to, 0), value); 6279 } else { 6280 BIND(L_fill_byte); 6281 } 6282 } else { 6283 BIND(L_fill_2_bytes); 6284 } 6285 BIND(L_exit); 6286 } 6287 6288 // encode char[] to byte[] in ISO_8859_1 6289 void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, 6290 XMMRegister tmp1Reg, XMMRegister tmp2Reg, 6291 XMMRegister tmp3Reg, XMMRegister tmp4Reg, 6292 Register tmp5, Register result) { 6293 // rsi: src 6294 // rdi: dst 6295 // rdx: len 6296 // rcx: tmp5 6297 // rax: result 6298 ShortBranchVerifier sbv(this); 6299 assert_different_registers(src, dst, len, tmp5, result); 6300 Label L_done, L_copy_1_char, L_copy_1_char_exit; 6301 6302 // set result 6303 xorl(result, result); 6304 // check for zero length 6305 testl(len, len); 6306 jcc(Assembler::zero, L_done); 6307 movl(result, len); 6308 6309 // Setup pointers 6310 lea(src, Address(src, len, Address::times_2)); // char[] 6311 lea(dst, Address(dst, len, Address::times_1)); // byte[] 6312 negptr(len); 6313 6314 if (UseSSE42Intrinsics || UseAVX >= 2) { 6315 Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit; 6316 Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit; 6317 6318 if (UseAVX >= 2) { 6319 Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit; 6320 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector 6321 movdl(tmp1Reg, tmp5); 6322 vpbroadcastd(tmp1Reg, tmp1Reg); 6323 jmpb(L_chars_32_check); 6324 6325 bind(L_copy_32_chars); 6326 vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64)); 6327 vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32)); 6328 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ true); 6329 vptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector 6330 jccb(Assembler::notZero, L_copy_32_chars_exit); 6331 vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector256 */ true); 6332 vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector256 */ true); 6333 vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg); 6334 6335 bind(L_chars_32_check); 6336 addptr(len, 32); 6337 jccb(Assembler::lessEqual, L_copy_32_chars); 6338 6339 bind(L_copy_32_chars_exit); 6340 subptr(len, 16); 6341 jccb(Assembler::greater, L_copy_16_chars_exit); 6342 6343 } else if (UseSSE42Intrinsics) { 6344 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector 6345 movdl(tmp1Reg, tmp5); 6346 pshufd(tmp1Reg, tmp1Reg, 0); 6347 jmpb(L_chars_16_check); 6348 } 6349 6350 bind(L_copy_16_chars); 6351 if (UseAVX >= 2) { 6352 vmovdqu(tmp2Reg, Address(src, len, Address::times_2, -32)); 6353 vptest(tmp2Reg, tmp1Reg); 6354 jccb(Assembler::notZero, L_copy_16_chars_exit); 6355 vpackuswb(tmp2Reg, tmp2Reg, tmp1Reg, /* vector256 */ true); 6356 vpermq(tmp3Reg, tmp2Reg, 0xD8, /* vector256 */ true); 6357 } else { 6358 if (UseAVX > 0) { 6359 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); 6360 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); 6361 vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector256 */ false); 6362 } else { 6363 movdqu(tmp3Reg, Address(src, len, Address::times_2, -32)); 6364 por(tmp2Reg, tmp3Reg); 6365 movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); 6366 por(tmp2Reg, tmp4Reg); 6367 } 6368 ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector 6369 jccb(Assembler::notZero, L_copy_16_chars_exit); 6370 packuswb(tmp3Reg, tmp4Reg); 6371 } 6372 movdqu(Address(dst, len, Address::times_1, -16), tmp3Reg); 6373 6374 bind(L_chars_16_check); 6375 addptr(len, 16); 6376 jccb(Assembler::lessEqual, L_copy_16_chars); 6377 6378 bind(L_copy_16_chars_exit); 6379 if (UseAVX >= 2) { 6380 // clean upper bits of YMM registers 6381 vzeroupper(); 6382 } 6383 subptr(len, 8); 6384 jccb(Assembler::greater, L_copy_8_chars_exit); 6385 6386 bind(L_copy_8_chars); 6387 movdqu(tmp3Reg, Address(src, len, Address::times_2, -16)); 6388 ptest(tmp3Reg, tmp1Reg); 6389 jccb(Assembler::notZero, L_copy_8_chars_exit); 6390 packuswb(tmp3Reg, tmp1Reg); 6391 movq(Address(dst, len, Address::times_1, -8), tmp3Reg); 6392 addptr(len, 8); 6393 jccb(Assembler::lessEqual, L_copy_8_chars); 6394 6395 bind(L_copy_8_chars_exit); 6396 subptr(len, 8); 6397 jccb(Assembler::zero, L_done); 6398 } 6399 6400 bind(L_copy_1_char); 6401 load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0)); 6402 testl(tmp5, 0xff00); // check if Unicode char 6403 jccb(Assembler::notZero, L_copy_1_char_exit); 6404 movb(Address(dst, len, Address::times_1, 0), tmp5); 6405 addptr(len, 1); 6406 jccb(Assembler::less, L_copy_1_char); 6407 6408 bind(L_copy_1_char_exit); 6409 addptr(result, len); // len is negative count of not processed elements 6410 bind(L_done); 6411 } 6412 6413 /** 6414 * Emits code to update CRC-32 with a byte value according to constants in table 6415 * 6416 * @param [in,out]crc Register containing the crc. 6417 * @param [in]val Register containing the byte to fold into the CRC. 6418 * @param [in]table Register containing the table of crc constants. 6419 * 6420 * uint32_t crc; 6421 * val = crc_table[(val ^ crc) & 0xFF]; 6422 * crc = val ^ (crc >> 8); 6423 * 6424 */ 6425 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { 6426 xorl(val, crc); 6427 andl(val, 0xFF); 6428 shrl(crc, 8); // unsigned shift 6429 xorl(crc, Address(table, val, Address::times_4, 0)); 6430 } 6431 6432 /** 6433 * Fold 128-bit data chunk 6434 */ 6435 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) { 6436 vpclmulhdq(xtmp, xK, xcrc); // [123:64] 6437 vpclmulldq(xcrc, xK, xcrc); // [63:0] 6438 vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */); 6439 pxor(xcrc, xtmp); 6440 } 6441 6442 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) { 6443 vpclmulhdq(xtmp, xK, xcrc); 6444 vpclmulldq(xcrc, xK, xcrc); 6445 pxor(xcrc, xbuf); 6446 pxor(xcrc, xtmp); 6447 } 6448 6449 /** 6450 * 8-bit folds to compute 32-bit CRC 6451 * 6452 * uint64_t xcrc; 6453 * timesXtoThe32[xcrc & 0xFF] ^ (xcrc >> 8); 6454 */ 6455 void MacroAssembler::fold_8bit_crc32(XMMRegister xcrc, Register table, XMMRegister xtmp, Register tmp) { 6456 movdl(tmp, xcrc); 6457 andl(tmp, 0xFF); 6458 movdl(xtmp, Address(table, tmp, Address::times_4, 0)); 6459 psrldq(xcrc, 1); // unsigned shift one byte 6460 pxor(xcrc, xtmp); 6461 } 6462 6463 /** 6464 * uint32_t crc; 6465 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); 6466 */ 6467 void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) { 6468 movl(tmp, crc); 6469 andl(tmp, 0xFF); 6470 shrl(crc, 8); 6471 xorl(crc, Address(table, tmp, Address::times_4, 0)); 6472 } 6473 6474 /** 6475 * @param crc register containing existing CRC (32-bit) 6476 * @param buf register pointing to input byte buffer (byte*) 6477 * @param len register containing number of bytes 6478 * @param table register that will contain address of CRC table 6479 * @param tmp scratch register 6480 */ 6481 void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp) { 6482 assert_different_registers(crc, buf, len, table, tmp, rax); 6483 6484 Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned; 6485 Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop; 6486 6487 lea(table, ExternalAddress(StubRoutines::crc_table_addr())); 6488 notl(crc); // ~crc 6489 cmpl(len, 16); 6490 jcc(Assembler::less, L_tail); 6491 6492 // Align buffer to 16 bytes 6493 movl(tmp, buf); 6494 andl(tmp, 0xF); 6495 jccb(Assembler::zero, L_aligned); 6496 subl(tmp, 16); 6497 addl(len, tmp); 6498 6499 align(4); 6500 BIND(L_align_loop); 6501 movsbl(rax, Address(buf, 0)); // load byte with sign extension 6502 update_byte_crc32(crc, rax, table); 6503 increment(buf); 6504 incrementl(tmp); 6505 jccb(Assembler::less, L_align_loop); 6506 6507 BIND(L_aligned); 6508 movl(tmp, len); // save 6509 shrl(len, 4); 6510 jcc(Assembler::zero, L_tail_restore); 6511 6512 // Fold crc into first bytes of vector 6513 movdqa(xmm1, Address(buf, 0)); 6514 movdl(rax, xmm1); 6515 xorl(crc, rax); 6516 pinsrd(xmm1, crc, 0); 6517 addptr(buf, 16); 6518 subl(len, 4); // len > 0 6519 jcc(Assembler::less, L_fold_tail); 6520 6521 movdqa(xmm2, Address(buf, 0)); 6522 movdqa(xmm3, Address(buf, 16)); 6523 movdqa(xmm4, Address(buf, 32)); 6524 addptr(buf, 48); 6525 subl(len, 3); 6526 jcc(Assembler::lessEqual, L_fold_512b); 6527 6528 // Fold total 512 bits of polynomial on each iteration, 6529 // 128 bits per each of 4 parallel streams. 6530 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 32)); 6531 6532 align(32); 6533 BIND(L_fold_512b_loop); 6534 fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0); 6535 fold_128bit_crc32(xmm2, xmm0, xmm5, buf, 16); 6536 fold_128bit_crc32(xmm3, xmm0, xmm5, buf, 32); 6537 fold_128bit_crc32(xmm4, xmm0, xmm5, buf, 48); 6538 addptr(buf, 64); 6539 subl(len, 4); 6540 jcc(Assembler::greater, L_fold_512b_loop); 6541 6542 // Fold 512 bits to 128 bits. 6543 BIND(L_fold_512b); 6544 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16)); 6545 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm2); 6546 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm3); 6547 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm4); 6548 6549 // Fold the rest of 128 bits data chunks 6550 BIND(L_fold_tail); 6551 addl(len, 3); 6552 jccb(Assembler::lessEqual, L_fold_128b); 6553 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16)); 6554 6555 BIND(L_fold_tail_loop); 6556 fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0); 6557 addptr(buf, 16); 6558 decrementl(len); 6559 jccb(Assembler::greater, L_fold_tail_loop); 6560 6561 // Fold 128 bits in xmm1 down into 32 bits in crc register. 6562 BIND(L_fold_128b); 6563 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr())); 6564 vpclmulqdq(xmm2, xmm0, xmm1, 0x1); 6565 vpand(xmm3, xmm0, xmm2, false /* vector256 */); 6566 vpclmulqdq(xmm0, xmm0, xmm3, 0x1); 6567 psrldq(xmm1, 8); 6568 psrldq(xmm2, 4); 6569 pxor(xmm0, xmm1); 6570 pxor(xmm0, xmm2); 6571 6572 // 8 8-bit folds to compute 32-bit CRC. 6573 for (int j = 0; j < 4; j++) { 6574 fold_8bit_crc32(xmm0, table, xmm1, rax); 6575 } 6576 movdl(crc, xmm0); // mov 32 bits to general register 6577 for (int j = 0; j < 4; j++) { 6578 fold_8bit_crc32(crc, table, rax); 6579 } 6580 6581 BIND(L_tail_restore); 6582 movl(len, tmp); // restore 6583 BIND(L_tail); 6584 andl(len, 0xf); 6585 jccb(Assembler::zero, L_exit); 6586 6587 // Fold the rest of bytes 6588 align(4); 6589 BIND(L_tail_loop); 6590 movsbl(rax, Address(buf, 0)); // load byte with sign extension 6591 update_byte_crc32(crc, rax, table); 6592 increment(buf); 6593 decrementl(len); 6594 jccb(Assembler::greater, L_tail_loop); 6595 6596 BIND(L_exit); 6597 notl(crc); // ~c 6598 } 6599 6600 #undef BIND 6601 #undef BLOCK_COMMENT 6602 6603 6604 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 6605 switch (cond) { 6606 // Note some conditions are synonyms for others 6607 case Assembler::zero: return Assembler::notZero; 6608 case Assembler::notZero: return Assembler::zero; 6609 case Assembler::less: return Assembler::greaterEqual; 6610 case Assembler::lessEqual: return Assembler::greater; 6611 case Assembler::greater: return Assembler::lessEqual; 6612 case Assembler::greaterEqual: return Assembler::less; 6613 case Assembler::below: return Assembler::aboveEqual; 6614 case Assembler::belowEqual: return Assembler::above; 6615 case Assembler::above: return Assembler::belowEqual; 6616 case Assembler::aboveEqual: return Assembler::below; 6617 case Assembler::overflow: return Assembler::noOverflow; 6618 case Assembler::noOverflow: return Assembler::overflow; 6619 case Assembler::negative: return Assembler::positive; 6620 case Assembler::positive: return Assembler::negative; 6621 case Assembler::parity: return Assembler::noParity; 6622 case Assembler::noParity: return Assembler::parity; 6623 } 6624 ShouldNotReachHere(); return Assembler::overflow; 6625 } 6626 6627 SkipIfEqual::SkipIfEqual( 6628 MacroAssembler* masm, const bool* flag_addr, bool value) { 6629 _masm = masm; 6630 _masm->cmp8(ExternalAddress((address)flag_addr), value); 6631 _masm->jcc(Assembler::equal, _label); 6632 } 6633 6634 SkipIfEqual::~SkipIfEqual() { 6635 _masm->bind(_label); 6636 }