1 /* 2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "assembler_x86.inline.hpp" 27 #include "gc_interface/collectedHeap.inline.hpp" 28 #include "interpreter/interpreter.hpp" 29 #include "memory/cardTableModRefBS.hpp" 30 #include "memory/resourceArea.hpp" 31 #include "prims/methodHandles.hpp" 32 #include "runtime/biasedLocking.hpp" 33 #include "runtime/interfaceSupport.hpp" 34 #include "runtime/objectMonitor.hpp" 35 #include "runtime/os.hpp" 36 #include "runtime/sharedRuntime.hpp" 37 #include "runtime/stubRoutines.hpp" 38 #ifndef SERIALGC 39 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 40 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 41 #include "gc_implementation/g1/heapRegion.hpp" 42 #endif 43 44 // Implementation of AddressLiteral 45 46 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { 47 _is_lval = false; 48 _target = target; 49 switch (rtype) { 50 case relocInfo::oop_type: 51 // Oops are a special case. Normally they would be their own section 52 // but in cases like icBuffer they are literals in the code stream that 53 // we don't have a section for. We use none so that we get a literal address 54 // which is always patchable. 55 break; 56 case relocInfo::external_word_type: 57 _rspec = external_word_Relocation::spec(target); 58 break; 59 case relocInfo::internal_word_type: 60 _rspec = internal_word_Relocation::spec(target); 61 break; 62 case relocInfo::opt_virtual_call_type: 63 _rspec = opt_virtual_call_Relocation::spec(); 64 break; 65 case relocInfo::static_call_type: 66 _rspec = static_call_Relocation::spec(); 67 break; 68 case relocInfo::runtime_call_type: 69 _rspec = runtime_call_Relocation::spec(); 70 break; 71 case relocInfo::poll_type: 72 case relocInfo::poll_return_type: 73 _rspec = Relocation::spec_simple(rtype); 74 break; 75 case relocInfo::none: 76 break; 77 default: 78 ShouldNotReachHere(); 79 break; 80 } 81 } 82 83 // Implementation of Address 84 85 #ifdef _LP64 86 87 Address Address::make_array(ArrayAddress adr) { 88 // Not implementable on 64bit machines 89 // Should have been handled higher up the call chain. 90 ShouldNotReachHere(); 91 return Address(); 92 } 93 94 // exceedingly dangerous constructor 95 Address::Address(int disp, address loc, relocInfo::relocType rtype) { 96 _base = noreg; 97 _index = noreg; 98 _scale = no_scale; 99 _disp = disp; 100 switch (rtype) { 101 case relocInfo::external_word_type: 102 _rspec = external_word_Relocation::spec(loc); 103 break; 104 case relocInfo::internal_word_type: 105 _rspec = internal_word_Relocation::spec(loc); 106 break; 107 case relocInfo::runtime_call_type: 108 // HMM 109 _rspec = runtime_call_Relocation::spec(); 110 break; 111 case relocInfo::poll_type: 112 case relocInfo::poll_return_type: 113 _rspec = Relocation::spec_simple(rtype); 114 break; 115 case relocInfo::none: 116 break; 117 default: 118 ShouldNotReachHere(); 119 } 120 } 121 #else // LP64 122 123 Address Address::make_array(ArrayAddress adr) { 124 AddressLiteral base = adr.base(); 125 Address index = adr.index(); 126 assert(index._disp == 0, "must not have disp"); // maybe it can? 127 Address array(index._base, index._index, index._scale, (intptr_t) base.target()); 128 array._rspec = base._rspec; 129 return array; 130 } 131 132 // exceedingly dangerous constructor 133 Address::Address(address loc, RelocationHolder spec) { 134 _base = noreg; 135 _index = noreg; 136 _scale = no_scale; 137 _disp = (intptr_t) loc; 138 _rspec = spec; 139 } 140 141 #endif // _LP64 142 143 144 145 // Convert the raw encoding form into the form expected by the constructor for 146 // Address. An index of 4 (rsp) corresponds to having no index, so convert 147 // that to noreg for the Address constructor. 148 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) { 149 RelocationHolder rspec; 150 if (disp_is_oop) { 151 rspec = Relocation::spec_simple(relocInfo::oop_type); 152 } 153 bool valid_index = index != rsp->encoding(); 154 if (valid_index) { 155 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); 156 madr._rspec = rspec; 157 return madr; 158 } else { 159 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); 160 madr._rspec = rspec; 161 return madr; 162 } 163 } 164 165 // Implementation of Assembler 166 167 int AbstractAssembler::code_fill_byte() { 168 return (u_char)'\xF4'; // hlt 169 } 170 171 // make this go away someday 172 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { 173 if (rtype == relocInfo::none) 174 emit_long(data); 175 else emit_data(data, Relocation::spec_simple(rtype), format); 176 } 177 178 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { 179 assert(imm_operand == 0, "default format must be immediate in this file"); 180 assert(inst_mark() != NULL, "must be inside InstructionMark"); 181 if (rspec.type() != relocInfo::none) { 182 #ifdef ASSERT 183 check_relocation(rspec, format); 184 #endif 185 // Do not use AbstractAssembler::relocate, which is not intended for 186 // embedded words. Instead, relocate to the enclosing instruction. 187 188 // hack. call32 is too wide for mask so use disp32 189 if (format == call32_operand) 190 code_section()->relocate(inst_mark(), rspec, disp32_operand); 191 else 192 code_section()->relocate(inst_mark(), rspec, format); 193 } 194 emit_long(data); 195 } 196 197 static int encode(Register r) { 198 int enc = r->encoding(); 199 if (enc >= 8) { 200 enc -= 8; 201 } 202 return enc; 203 } 204 205 static int encode(XMMRegister r) { 206 int enc = r->encoding(); 207 if (enc >= 8) { 208 enc -= 8; 209 } 210 return enc; 211 } 212 213 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { 214 assert(dst->has_byte_register(), "must have byte register"); 215 assert(isByte(op1) && isByte(op2), "wrong opcode"); 216 assert(isByte(imm8), "not a byte"); 217 assert((op1 & 0x01) == 0, "should be 8bit operation"); 218 emit_byte(op1); 219 emit_byte(op2 | encode(dst)); 220 emit_byte(imm8); 221 } 222 223 224 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 225 assert(isByte(op1) && isByte(op2), "wrong opcode"); 226 assert((op1 & 0x01) == 1, "should be 32bit operation"); 227 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 228 if (is8bit(imm32)) { 229 emit_byte(op1 | 0x02); // set sign bit 230 emit_byte(op2 | encode(dst)); 231 emit_byte(imm32 & 0xFF); 232 } else { 233 emit_byte(op1); 234 emit_byte(op2 | encode(dst)); 235 emit_long(imm32); 236 } 237 } 238 239 // immediate-to-memory forms 240 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 241 assert((op1 & 0x01) == 1, "should be 32bit operation"); 242 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 243 if (is8bit(imm32)) { 244 emit_byte(op1 | 0x02); // set sign bit 245 emit_operand(rm, adr, 1); 246 emit_byte(imm32 & 0xFF); 247 } else { 248 emit_byte(op1); 249 emit_operand(rm, adr, 4); 250 emit_long(imm32); 251 } 252 } 253 254 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) { 255 LP64_ONLY(ShouldNotReachHere()); 256 assert(isByte(op1) && isByte(op2), "wrong opcode"); 257 assert((op1 & 0x01) == 1, "should be 32bit operation"); 258 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 259 InstructionMark im(this); 260 emit_byte(op1); 261 emit_byte(op2 | encode(dst)); 262 emit_data((intptr_t)obj, relocInfo::oop_type, 0); 263 } 264 265 266 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { 267 assert(isByte(op1) && isByte(op2), "wrong opcode"); 268 emit_byte(op1); 269 emit_byte(op2 | encode(dst) << 3 | encode(src)); 270 } 271 272 273 void Assembler::emit_operand(Register reg, Register base, Register index, 274 Address::ScaleFactor scale, int disp, 275 RelocationHolder const& rspec, 276 int rip_relative_correction) { 277 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type(); 278 279 // Encode the registers as needed in the fields they are used in 280 281 int regenc = encode(reg) << 3; 282 int indexenc = index->is_valid() ? encode(index) << 3 : 0; 283 int baseenc = base->is_valid() ? encode(base) : 0; 284 285 if (base->is_valid()) { 286 if (index->is_valid()) { 287 assert(scale != Address::no_scale, "inconsistent address"); 288 // [base + index*scale + disp] 289 if (disp == 0 && rtype == relocInfo::none && 290 base != rbp LP64_ONLY(&& base != r13)) { 291 // [base + index*scale] 292 // [00 reg 100][ss index base] 293 assert(index != rsp, "illegal addressing mode"); 294 emit_byte(0x04 | regenc); 295 emit_byte(scale << 6 | indexenc | baseenc); 296 } else if (is8bit(disp) && rtype == relocInfo::none) { 297 // [base + index*scale + imm8] 298 // [01 reg 100][ss index base] imm8 299 assert(index != rsp, "illegal addressing mode"); 300 emit_byte(0x44 | regenc); 301 emit_byte(scale << 6 | indexenc | baseenc); 302 emit_byte(disp & 0xFF); 303 } else { 304 // [base + index*scale + disp32] 305 // [10 reg 100][ss index base] disp32 306 assert(index != rsp, "illegal addressing mode"); 307 emit_byte(0x84 | regenc); 308 emit_byte(scale << 6 | indexenc | baseenc); 309 emit_data(disp, rspec, disp32_operand); 310 } 311 } else if (base == rsp LP64_ONLY(|| base == r12)) { 312 // [rsp + disp] 313 if (disp == 0 && rtype == relocInfo::none) { 314 // [rsp] 315 // [00 reg 100][00 100 100] 316 emit_byte(0x04 | regenc); 317 emit_byte(0x24); 318 } else if (is8bit(disp) && rtype == relocInfo::none) { 319 // [rsp + imm8] 320 // [01 reg 100][00 100 100] disp8 321 emit_byte(0x44 | regenc); 322 emit_byte(0x24); 323 emit_byte(disp & 0xFF); 324 } else { 325 // [rsp + imm32] 326 // [10 reg 100][00 100 100] disp32 327 emit_byte(0x84 | regenc); 328 emit_byte(0x24); 329 emit_data(disp, rspec, disp32_operand); 330 } 331 } else { 332 // [base + disp] 333 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode"); 334 if (disp == 0 && rtype == relocInfo::none && 335 base != rbp LP64_ONLY(&& base != r13)) { 336 // [base] 337 // [00 reg base] 338 emit_byte(0x00 | regenc | baseenc); 339 } else if (is8bit(disp) && rtype == relocInfo::none) { 340 // [base + disp8] 341 // [01 reg base] disp8 342 emit_byte(0x40 | regenc | baseenc); 343 emit_byte(disp & 0xFF); 344 } else { 345 // [base + disp32] 346 // [10 reg base] disp32 347 emit_byte(0x80 | regenc | baseenc); 348 emit_data(disp, rspec, disp32_operand); 349 } 350 } 351 } else { 352 if (index->is_valid()) { 353 assert(scale != Address::no_scale, "inconsistent address"); 354 // [index*scale + disp] 355 // [00 reg 100][ss index 101] disp32 356 assert(index != rsp, "illegal addressing mode"); 357 emit_byte(0x04 | regenc); 358 emit_byte(scale << 6 | indexenc | 0x05); 359 emit_data(disp, rspec, disp32_operand); 360 } else if (rtype != relocInfo::none ) { 361 // [disp] (64bit) RIP-RELATIVE (32bit) abs 362 // [00 000 101] disp32 363 364 emit_byte(0x05 | regenc); 365 // Note that the RIP-rel. correction applies to the generated 366 // disp field, but _not_ to the target address in the rspec. 367 368 // disp was created by converting the target address minus the pc 369 // at the start of the instruction. That needs more correction here. 370 // intptr_t disp = target - next_ip; 371 assert(inst_mark() != NULL, "must be inside InstructionMark"); 372 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction; 373 int64_t adjusted = disp; 374 // Do rip-rel adjustment for 64bit 375 LP64_ONLY(adjusted -= (next_ip - inst_mark())); 376 assert(is_simm32(adjusted), 377 "must be 32bit offset (RIP relative address)"); 378 emit_data((int32_t) adjusted, rspec, disp32_operand); 379 380 } else { 381 // 32bit never did this, did everything as the rip-rel/disp code above 382 // [disp] ABSOLUTE 383 // [00 reg 100][00 100 101] disp32 384 emit_byte(0x04 | regenc); 385 emit_byte(0x25); 386 emit_data(disp, rspec, disp32_operand); 387 } 388 } 389 } 390 391 void Assembler::emit_operand(XMMRegister reg, Register base, Register index, 392 Address::ScaleFactor scale, int disp, 393 RelocationHolder const& rspec) { 394 emit_operand((Register)reg, base, index, scale, disp, rspec); 395 } 396 397 // Secret local extension to Assembler::WhichOperand: 398 #define end_pc_operand (_WhichOperand_limit) 399 400 address Assembler::locate_operand(address inst, WhichOperand which) { 401 // Decode the given instruction, and return the address of 402 // an embedded 32-bit operand word. 403 404 // If "which" is disp32_operand, selects the displacement portion 405 // of an effective address specifier. 406 // If "which" is imm64_operand, selects the trailing immediate constant. 407 // If "which" is call32_operand, selects the displacement of a call or jump. 408 // Caller is responsible for ensuring that there is such an operand, 409 // and that it is 32/64 bits wide. 410 411 // If "which" is end_pc_operand, find the end of the instruction. 412 413 address ip = inst; 414 bool is_64bit = false; 415 416 debug_only(bool has_disp32 = false); 417 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn 418 419 again_after_prefix: 420 switch (0xFF & *ip++) { 421 422 // These convenience macros generate groups of "case" labels for the switch. 423 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3 424 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \ 425 case (x)+4: case (x)+5: case (x)+6: case (x)+7 426 #define REP16(x) REP8((x)+0): \ 427 case REP8((x)+8) 428 429 case CS_segment: 430 case SS_segment: 431 case DS_segment: 432 case ES_segment: 433 case FS_segment: 434 case GS_segment: 435 // Seems dubious 436 LP64_ONLY(assert(false, "shouldn't have that prefix")); 437 assert(ip == inst+1, "only one prefix allowed"); 438 goto again_after_prefix; 439 440 case 0x67: 441 case REX: 442 case REX_B: 443 case REX_X: 444 case REX_XB: 445 case REX_R: 446 case REX_RB: 447 case REX_RX: 448 case REX_RXB: 449 NOT_LP64(assert(false, "64bit prefixes")); 450 goto again_after_prefix; 451 452 case REX_W: 453 case REX_WB: 454 case REX_WX: 455 case REX_WXB: 456 case REX_WR: 457 case REX_WRB: 458 case REX_WRX: 459 case REX_WRXB: 460 NOT_LP64(assert(false, "64bit prefixes")); 461 is_64bit = true; 462 goto again_after_prefix; 463 464 case 0xFF: // pushq a; decl a; incl a; call a; jmp a 465 case 0x88: // movb a, r 466 case 0x89: // movl a, r 467 case 0x8A: // movb r, a 468 case 0x8B: // movl r, a 469 case 0x8F: // popl a 470 debug_only(has_disp32 = true); 471 break; 472 473 case 0x68: // pushq #32 474 if (which == end_pc_operand) { 475 return ip + 4; 476 } 477 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate"); 478 return ip; // not produced by emit_operand 479 480 case 0x66: // movw ... (size prefix) 481 again_after_size_prefix2: 482 switch (0xFF & *ip++) { 483 case REX: 484 case REX_B: 485 case REX_X: 486 case REX_XB: 487 case REX_R: 488 case REX_RB: 489 case REX_RX: 490 case REX_RXB: 491 case REX_W: 492 case REX_WB: 493 case REX_WX: 494 case REX_WXB: 495 case REX_WR: 496 case REX_WRB: 497 case REX_WRX: 498 case REX_WRXB: 499 NOT_LP64(assert(false, "64bit prefix found")); 500 goto again_after_size_prefix2; 501 case 0x8B: // movw r, a 502 case 0x89: // movw a, r 503 debug_only(has_disp32 = true); 504 break; 505 case 0xC7: // movw a, #16 506 debug_only(has_disp32 = true); 507 tail_size = 2; // the imm16 508 break; 509 case 0x0F: // several SSE/SSE2 variants 510 ip--; // reparse the 0x0F 511 goto again_after_prefix; 512 default: 513 ShouldNotReachHere(); 514 } 515 break; 516 517 case REP8(0xB8): // movl/q r, #32/#64(oop?) 518 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4); 519 // these asserts are somewhat nonsensical 520 #ifndef _LP64 521 assert(which == imm_operand || which == disp32_operand, ""); 522 #else 523 assert((which == call32_operand || which == imm_operand) && is_64bit || 524 which == narrow_oop_operand && !is_64bit, ""); 525 #endif // _LP64 526 return ip; 527 528 case 0x69: // imul r, a, #32 529 case 0xC7: // movl a, #32(oop?) 530 tail_size = 4; 531 debug_only(has_disp32 = true); // has both kinds of operands! 532 break; 533 534 case 0x0F: // movx..., etc. 535 switch (0xFF & *ip++) { 536 case 0x12: // movlps 537 case 0x28: // movaps 538 case 0x2E: // ucomiss 539 case 0x2F: // comiss 540 case 0x54: // andps 541 case 0x55: // andnps 542 case 0x56: // orps 543 case 0x57: // xorps 544 case 0x6E: // movd 545 case 0x7E: // movd 546 case 0xAE: // ldmxcsr a 547 // 64bit side says it these have both operands but that doesn't 548 // appear to be true 549 debug_only(has_disp32 = true); 550 break; 551 552 case 0xAD: // shrd r, a, %cl 553 case 0xAF: // imul r, a 554 case 0xBE: // movsbl r, a (movsxb) 555 case 0xBF: // movswl r, a (movsxw) 556 case 0xB6: // movzbl r, a (movzxb) 557 case 0xB7: // movzwl r, a (movzxw) 558 case REP16(0x40): // cmovl cc, r, a 559 case 0xB0: // cmpxchgb 560 case 0xB1: // cmpxchg 561 case 0xC1: // xaddl 562 case 0xC7: // cmpxchg8 563 case REP16(0x90): // setcc a 564 debug_only(has_disp32 = true); 565 // fall out of the switch to decode the address 566 break; 567 568 case 0xAC: // shrd r, a, #8 569 debug_only(has_disp32 = true); 570 tail_size = 1; // the imm8 571 break; 572 573 case REP16(0x80): // jcc rdisp32 574 if (which == end_pc_operand) return ip + 4; 575 assert(which == call32_operand, "jcc has no disp32 or imm"); 576 return ip; 577 default: 578 ShouldNotReachHere(); 579 } 580 break; 581 582 case 0x81: // addl a, #32; addl r, #32 583 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 584 // on 32bit in the case of cmpl, the imm might be an oop 585 tail_size = 4; 586 debug_only(has_disp32 = true); // has both kinds of operands! 587 break; 588 589 case 0x83: // addl a, #8; addl r, #8 590 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 591 debug_only(has_disp32 = true); // has both kinds of operands! 592 tail_size = 1; 593 break; 594 595 case 0x9B: 596 switch (0xFF & *ip++) { 597 case 0xD9: // fnstcw a 598 debug_only(has_disp32 = true); 599 break; 600 default: 601 ShouldNotReachHere(); 602 } 603 break; 604 605 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a 606 case REP4(0x10): // adc... 607 case REP4(0x20): // and... 608 case REP4(0x30): // xor... 609 case REP4(0x08): // or... 610 case REP4(0x18): // sbb... 611 case REP4(0x28): // sub... 612 case 0xF7: // mull a 613 case 0x8D: // lea r, a 614 case 0x87: // xchg r, a 615 case REP4(0x38): // cmp... 616 case 0x85: // test r, a 617 debug_only(has_disp32 = true); // has both kinds of operands! 618 break; 619 620 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8 621 case 0xC6: // movb a, #8 622 case 0x80: // cmpb a, #8 623 case 0x6B: // imul r, a, #8 624 debug_only(has_disp32 = true); // has both kinds of operands! 625 tail_size = 1; // the imm8 626 break; 627 628 case 0xE8: // call rdisp32 629 case 0xE9: // jmp rdisp32 630 if (which == end_pc_operand) return ip + 4; 631 assert(which == call32_operand, "call has no disp32 or imm"); 632 return ip; 633 634 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 635 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 636 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a 637 case 0xDD: // fld_d a; fst_d a; fstp_d a 638 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a 639 case 0xDF: // fild_d a; fistp_d a 640 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a 641 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a 642 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a 643 debug_only(has_disp32 = true); 644 break; 645 646 case 0xF0: // Lock 647 assert(os::is_MP(), "only on MP"); 648 goto again_after_prefix; 649 650 case 0xF3: // For SSE 651 case 0xF2: // For SSE2 652 switch (0xFF & *ip++) { 653 case REX: 654 case REX_B: 655 case REX_X: 656 case REX_XB: 657 case REX_R: 658 case REX_RB: 659 case REX_RX: 660 case REX_RXB: 661 case REX_W: 662 case REX_WB: 663 case REX_WX: 664 case REX_WXB: 665 case REX_WR: 666 case REX_WRB: 667 case REX_WRX: 668 case REX_WRXB: 669 NOT_LP64(assert(false, "found 64bit prefix")); 670 ip++; 671 default: 672 ip++; 673 } 674 debug_only(has_disp32 = true); // has both kinds of operands! 675 break; 676 677 default: 678 ShouldNotReachHere(); 679 680 #undef REP8 681 #undef REP16 682 } 683 684 assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); 685 #ifdef _LP64 686 assert(which != imm_operand, "instruction is not a movq reg, imm64"); 687 #else 688 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field"); 689 assert(which != imm_operand || has_disp32, "instruction has no imm32 field"); 690 #endif // LP64 691 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field"); 692 693 // parse the output of emit_operand 694 int op2 = 0xFF & *ip++; 695 int base = op2 & 0x07; 696 int op3 = -1; 697 const int b100 = 4; 698 const int b101 = 5; 699 if (base == b100 && (op2 >> 6) != 3) { 700 op3 = 0xFF & *ip++; 701 base = op3 & 0x07; // refetch the base 702 } 703 // now ip points at the disp (if any) 704 705 switch (op2 >> 6) { 706 case 0: 707 // [00 reg 100][ss index base] 708 // [00 reg 100][00 100 esp] 709 // [00 reg base] 710 // [00 reg 100][ss index 101][disp32] 711 // [00 reg 101] [disp32] 712 713 if (base == b101) { 714 if (which == disp32_operand) 715 return ip; // caller wants the disp32 716 ip += 4; // skip the disp32 717 } 718 break; 719 720 case 1: 721 // [01 reg 100][ss index base][disp8] 722 // [01 reg 100][00 100 esp][disp8] 723 // [01 reg base] [disp8] 724 ip += 1; // skip the disp8 725 break; 726 727 case 2: 728 // [10 reg 100][ss index base][disp32] 729 // [10 reg 100][00 100 esp][disp32] 730 // [10 reg base] [disp32] 731 if (which == disp32_operand) 732 return ip; // caller wants the disp32 733 ip += 4; // skip the disp32 734 break; 735 736 case 3: 737 // [11 reg base] (not a memory addressing mode) 738 break; 739 } 740 741 if (which == end_pc_operand) { 742 return ip + tail_size; 743 } 744 745 #ifdef _LP64 746 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32"); 747 #else 748 assert(which == imm_operand, "instruction has only an imm field"); 749 #endif // LP64 750 return ip; 751 } 752 753 address Assembler::locate_next_instruction(address inst) { 754 // Secretly share code with locate_operand: 755 return locate_operand(inst, end_pc_operand); 756 } 757 758 759 #ifdef ASSERT 760 void Assembler::check_relocation(RelocationHolder const& rspec, int format) { 761 address inst = inst_mark(); 762 assert(inst != NULL && inst < pc(), "must point to beginning of instruction"); 763 address opnd; 764 765 Relocation* r = rspec.reloc(); 766 if (r->type() == relocInfo::none) { 767 return; 768 } else if (r->is_call() || format == call32_operand) { 769 // assert(format == imm32_operand, "cannot specify a nonzero format"); 770 opnd = locate_operand(inst, call32_operand); 771 } else if (r->is_data()) { 772 assert(format == imm_operand || format == disp32_operand 773 LP64_ONLY(|| format == narrow_oop_operand), "format ok"); 774 opnd = locate_operand(inst, (WhichOperand)format); 775 } else { 776 assert(format == imm_operand, "cannot specify a format"); 777 return; 778 } 779 assert(opnd == pc(), "must put operand where relocs can find it"); 780 } 781 #endif // ASSERT 782 783 void Assembler::emit_operand32(Register reg, Address adr) { 784 assert(reg->encoding() < 8, "no extended registers"); 785 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 786 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 787 adr._rspec); 788 } 789 790 void Assembler::emit_operand(Register reg, Address adr, 791 int rip_relative_correction) { 792 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 793 adr._rspec, 794 rip_relative_correction); 795 } 796 797 void Assembler::emit_operand(XMMRegister reg, Address adr) { 798 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 799 adr._rspec); 800 } 801 802 // MMX operations 803 void Assembler::emit_operand(MMXRegister reg, Address adr) { 804 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 805 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 806 } 807 808 // work around gcc (3.2.1-7a) bug 809 void Assembler::emit_operand(Address adr, MMXRegister reg) { 810 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 811 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 812 } 813 814 815 void Assembler::emit_farith(int b1, int b2, int i) { 816 assert(isByte(b1) && isByte(b2), "wrong opcode"); 817 assert(0 <= i && i < 8, "illegal stack offset"); 818 emit_byte(b1); 819 emit_byte(b2 + i); 820 } 821 822 823 // Now the Assembler instructions (identical for 32/64 bits) 824 825 void Assembler::adcl(Address dst, int32_t imm32) { 826 InstructionMark im(this); 827 prefix(dst); 828 emit_arith_operand(0x81, rdx, dst, imm32); 829 } 830 831 void Assembler::adcl(Address dst, Register src) { 832 InstructionMark im(this); 833 prefix(dst, src); 834 emit_byte(0x11); 835 emit_operand(src, dst); 836 } 837 838 void Assembler::adcl(Register dst, int32_t imm32) { 839 prefix(dst); 840 emit_arith(0x81, 0xD0, dst, imm32); 841 } 842 843 void Assembler::adcl(Register dst, Address src) { 844 InstructionMark im(this); 845 prefix(src, dst); 846 emit_byte(0x13); 847 emit_operand(dst, src); 848 } 849 850 void Assembler::adcl(Register dst, Register src) { 851 (void) prefix_and_encode(dst->encoding(), src->encoding()); 852 emit_arith(0x13, 0xC0, dst, src); 853 } 854 855 void Assembler::addl(Address dst, int32_t imm32) { 856 InstructionMark im(this); 857 prefix(dst); 858 emit_arith_operand(0x81, rax, dst, imm32); 859 } 860 861 void Assembler::addl(Address dst, Register src) { 862 InstructionMark im(this); 863 prefix(dst, src); 864 emit_byte(0x01); 865 emit_operand(src, dst); 866 } 867 868 void Assembler::addl(Register dst, int32_t imm32) { 869 prefix(dst); 870 emit_arith(0x81, 0xC0, dst, imm32); 871 } 872 873 void Assembler::addl(Register dst, Address src) { 874 InstructionMark im(this); 875 prefix(src, dst); 876 emit_byte(0x03); 877 emit_operand(dst, src); 878 } 879 880 void Assembler::addl(Register dst, Register src) { 881 (void) prefix_and_encode(dst->encoding(), src->encoding()); 882 emit_arith(0x03, 0xC0, dst, src); 883 } 884 885 void Assembler::addr_nop_4() { 886 // 4 bytes: NOP DWORD PTR [EAX+0] 887 emit_byte(0x0F); 888 emit_byte(0x1F); 889 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 890 emit_byte(0); // 8-bits offset (1 byte) 891 } 892 893 void Assembler::addr_nop_5() { 894 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 895 emit_byte(0x0F); 896 emit_byte(0x1F); 897 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 898 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 899 emit_byte(0); // 8-bits offset (1 byte) 900 } 901 902 void Assembler::addr_nop_7() { 903 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 904 emit_byte(0x0F); 905 emit_byte(0x1F); 906 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 907 emit_long(0); // 32-bits offset (4 bytes) 908 } 909 910 void Assembler::addr_nop_8() { 911 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 912 emit_byte(0x0F); 913 emit_byte(0x1F); 914 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 915 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 916 emit_long(0); // 32-bits offset (4 bytes) 917 } 918 919 void Assembler::addsd(XMMRegister dst, XMMRegister src) { 920 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 921 emit_byte(0xF2); 922 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 923 emit_byte(0x0F); 924 emit_byte(0x58); 925 emit_byte(0xC0 | encode); 926 } 927 928 void Assembler::addsd(XMMRegister dst, Address src) { 929 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 930 InstructionMark im(this); 931 emit_byte(0xF2); 932 prefix(src, dst); 933 emit_byte(0x0F); 934 emit_byte(0x58); 935 emit_operand(dst, src); 936 } 937 938 void Assembler::addss(XMMRegister dst, XMMRegister src) { 939 NOT_LP64(assert(VM_Version::supports_sse(), "")); 940 emit_byte(0xF3); 941 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 942 emit_byte(0x0F); 943 emit_byte(0x58); 944 emit_byte(0xC0 | encode); 945 } 946 947 void Assembler::addss(XMMRegister dst, Address src) { 948 NOT_LP64(assert(VM_Version::supports_sse(), "")); 949 InstructionMark im(this); 950 emit_byte(0xF3); 951 prefix(src, dst); 952 emit_byte(0x0F); 953 emit_byte(0x58); 954 emit_operand(dst, src); 955 } 956 957 void Assembler::andl(Register dst, int32_t imm32) { 958 prefix(dst); 959 emit_arith(0x81, 0xE0, dst, imm32); 960 } 961 962 void Assembler::andl(Register dst, Address src) { 963 InstructionMark im(this); 964 prefix(src, dst); 965 emit_byte(0x23); 966 emit_operand(dst, src); 967 } 968 969 void Assembler::andl(Register dst, Register src) { 970 (void) prefix_and_encode(dst->encoding(), src->encoding()); 971 emit_arith(0x23, 0xC0, dst, src); 972 } 973 974 void Assembler::andpd(XMMRegister dst, Address src) { 975 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 976 InstructionMark im(this); 977 emit_byte(0x66); 978 prefix(src, dst); 979 emit_byte(0x0F); 980 emit_byte(0x54); 981 emit_operand(dst, src); 982 } 983 984 void Assembler::bsfl(Register dst, Register src) { 985 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 986 emit_byte(0x0F); 987 emit_byte(0xBC); 988 emit_byte(0xC0 | encode); 989 } 990 991 void Assembler::bsrl(Register dst, Register src) { 992 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 993 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 994 emit_byte(0x0F); 995 emit_byte(0xBD); 996 emit_byte(0xC0 | encode); 997 } 998 999 void Assembler::bswapl(Register reg) { // bswap 1000 int encode = prefix_and_encode(reg->encoding()); 1001 emit_byte(0x0F); 1002 emit_byte(0xC8 | encode); 1003 } 1004 1005 void Assembler::call(Label& L, relocInfo::relocType rtype) { 1006 // suspect disp32 is always good 1007 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); 1008 1009 if (L.is_bound()) { 1010 const int long_size = 5; 1011 int offs = (int)( target(L) - pc() ); 1012 assert(offs <= 0, "assembler error"); 1013 InstructionMark im(this); 1014 // 1110 1000 #32-bit disp 1015 emit_byte(0xE8); 1016 emit_data(offs - long_size, rtype, operand); 1017 } else { 1018 InstructionMark im(this); 1019 // 1110 1000 #32-bit disp 1020 L.add_patch_at(code(), locator()); 1021 1022 emit_byte(0xE8); 1023 emit_data(int(0), rtype, operand); 1024 } 1025 } 1026 1027 void Assembler::call(Register dst) { 1028 // This was originally using a 32bit register encoding 1029 // and surely we want 64bit! 1030 // this is a 32bit encoding but in 64bit mode the default 1031 // operand size is 64bit so there is no need for the 1032 // wide prefix. So prefix only happens if we use the 1033 // new registers. Much like push/pop. 1034 int x = offset(); 1035 // this may be true but dbx disassembles it as if it 1036 // were 32bits... 1037 // int encode = prefix_and_encode(dst->encoding()); 1038 // if (offset() != x) assert(dst->encoding() >= 8, "what?"); 1039 int encode = prefixq_and_encode(dst->encoding()); 1040 1041 emit_byte(0xFF); 1042 emit_byte(0xD0 | encode); 1043 } 1044 1045 1046 void Assembler::call(Address adr) { 1047 InstructionMark im(this); 1048 prefix(adr); 1049 emit_byte(0xFF); 1050 emit_operand(rdx, adr); 1051 } 1052 1053 void Assembler::call_literal(address entry, RelocationHolder const& rspec) { 1054 assert(entry != NULL, "call most probably wrong"); 1055 InstructionMark im(this); 1056 emit_byte(0xE8); 1057 intptr_t disp = entry - (_code_pos + sizeof(int32_t)); 1058 assert(is_simm32(disp), "must be 32bit offset (call2)"); 1059 // Technically, should use call32_operand, but this format is 1060 // implied by the fact that we're emitting a call instruction. 1061 1062 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand); 1063 emit_data((int) disp, rspec, operand); 1064 } 1065 1066 void Assembler::cdql() { 1067 emit_byte(0x99); 1068 } 1069 1070 void Assembler::cmovl(Condition cc, Register dst, Register src) { 1071 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1072 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1073 emit_byte(0x0F); 1074 emit_byte(0x40 | cc); 1075 emit_byte(0xC0 | encode); 1076 } 1077 1078 1079 void Assembler::cmovl(Condition cc, Register dst, Address src) { 1080 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1081 prefix(src, dst); 1082 emit_byte(0x0F); 1083 emit_byte(0x40 | cc); 1084 emit_operand(dst, src); 1085 } 1086 1087 void Assembler::cmpb(Address dst, int imm8) { 1088 InstructionMark im(this); 1089 prefix(dst); 1090 emit_byte(0x80); 1091 emit_operand(rdi, dst, 1); 1092 emit_byte(imm8); 1093 } 1094 1095 void Assembler::cmpl(Address dst, int32_t imm32) { 1096 InstructionMark im(this); 1097 prefix(dst); 1098 emit_byte(0x81); 1099 emit_operand(rdi, dst, 4); 1100 emit_long(imm32); 1101 } 1102 1103 void Assembler::cmpl(Register dst, int32_t imm32) { 1104 prefix(dst); 1105 emit_arith(0x81, 0xF8, dst, imm32); 1106 } 1107 1108 void Assembler::cmpl(Register dst, Register src) { 1109 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1110 emit_arith(0x3B, 0xC0, dst, src); 1111 } 1112 1113 1114 void Assembler::cmpl(Register dst, Address src) { 1115 InstructionMark im(this); 1116 prefix(src, dst); 1117 emit_byte(0x3B); 1118 emit_operand(dst, src); 1119 } 1120 1121 void Assembler::cmpw(Address dst, int imm16) { 1122 InstructionMark im(this); 1123 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers"); 1124 emit_byte(0x66); 1125 emit_byte(0x81); 1126 emit_operand(rdi, dst, 2); 1127 emit_word(imm16); 1128 } 1129 1130 // The 32-bit cmpxchg compares the value at adr with the contents of rax, 1131 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1132 // The ZF is set if the compared values were equal, and cleared otherwise. 1133 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg 1134 if (Atomics & 2) { 1135 // caveat: no instructionmark, so this isn't relocatable. 1136 // Emit a synthetic, non-atomic, CAS equivalent. 1137 // Beware. The synthetic form sets all ICCs, not just ZF. 1138 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r) 1139 cmpl(rax, adr); 1140 movl(rax, adr); 1141 if (reg != rax) { 1142 Label L ; 1143 jcc(Assembler::notEqual, L); 1144 movl(adr, reg); 1145 bind(L); 1146 } 1147 } else { 1148 InstructionMark im(this); 1149 prefix(adr, reg); 1150 emit_byte(0x0F); 1151 emit_byte(0xB1); 1152 emit_operand(reg, adr); 1153 } 1154 } 1155 1156 void Assembler::comisd(XMMRegister dst, Address src) { 1157 // NOTE: dbx seems to decode this as comiss even though the 1158 // 0x66 is there. Strangly ucomisd comes out correct 1159 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1160 emit_byte(0x66); 1161 comiss(dst, src); 1162 } 1163 1164 void Assembler::comiss(XMMRegister dst, Address src) { 1165 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1166 1167 InstructionMark im(this); 1168 prefix(src, dst); 1169 emit_byte(0x0F); 1170 emit_byte(0x2F); 1171 emit_operand(dst, src); 1172 } 1173 1174 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 1175 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1176 emit_byte(0xF3); 1177 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1178 emit_byte(0x0F); 1179 emit_byte(0xE6); 1180 emit_byte(0xC0 | encode); 1181 } 1182 1183 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 1184 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1185 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1186 emit_byte(0x0F); 1187 emit_byte(0x5B); 1188 emit_byte(0xC0 | encode); 1189 } 1190 1191 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 1192 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1193 emit_byte(0xF2); 1194 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1195 emit_byte(0x0F); 1196 emit_byte(0x5A); 1197 emit_byte(0xC0 | encode); 1198 } 1199 1200 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { 1201 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1202 emit_byte(0xF2); 1203 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1204 emit_byte(0x0F); 1205 emit_byte(0x2A); 1206 emit_byte(0xC0 | encode); 1207 } 1208 1209 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { 1210 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1211 emit_byte(0xF3); 1212 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1213 emit_byte(0x0F); 1214 emit_byte(0x2A); 1215 emit_byte(0xC0 | encode); 1216 } 1217 1218 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { 1219 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1220 emit_byte(0xF3); 1221 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1222 emit_byte(0x0F); 1223 emit_byte(0x5A); 1224 emit_byte(0xC0 | encode); 1225 } 1226 1227 void Assembler::cvttsd2sil(Register dst, XMMRegister src) { 1228 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1229 emit_byte(0xF2); 1230 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1231 emit_byte(0x0F); 1232 emit_byte(0x2C); 1233 emit_byte(0xC0 | encode); 1234 } 1235 1236 void Assembler::cvttss2sil(Register dst, XMMRegister src) { 1237 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1238 emit_byte(0xF3); 1239 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1240 emit_byte(0x0F); 1241 emit_byte(0x2C); 1242 emit_byte(0xC0 | encode); 1243 } 1244 1245 void Assembler::decl(Address dst) { 1246 // Don't use it directly. Use MacroAssembler::decrement() instead. 1247 InstructionMark im(this); 1248 prefix(dst); 1249 emit_byte(0xFF); 1250 emit_operand(rcx, dst); 1251 } 1252 1253 void Assembler::divsd(XMMRegister dst, Address src) { 1254 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1255 InstructionMark im(this); 1256 emit_byte(0xF2); 1257 prefix(src, dst); 1258 emit_byte(0x0F); 1259 emit_byte(0x5E); 1260 emit_operand(dst, src); 1261 } 1262 1263 void Assembler::divsd(XMMRegister dst, XMMRegister src) { 1264 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1265 emit_byte(0xF2); 1266 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1267 emit_byte(0x0F); 1268 emit_byte(0x5E); 1269 emit_byte(0xC0 | encode); 1270 } 1271 1272 void Assembler::divss(XMMRegister dst, Address src) { 1273 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1274 InstructionMark im(this); 1275 emit_byte(0xF3); 1276 prefix(src, dst); 1277 emit_byte(0x0F); 1278 emit_byte(0x5E); 1279 emit_operand(dst, src); 1280 } 1281 1282 void Assembler::divss(XMMRegister dst, XMMRegister src) { 1283 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1284 emit_byte(0xF3); 1285 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1286 emit_byte(0x0F); 1287 emit_byte(0x5E); 1288 emit_byte(0xC0 | encode); 1289 } 1290 1291 void Assembler::emms() { 1292 NOT_LP64(assert(VM_Version::supports_mmx(), "")); 1293 emit_byte(0x0F); 1294 emit_byte(0x77); 1295 } 1296 1297 void Assembler::hlt() { 1298 emit_byte(0xF4); 1299 } 1300 1301 void Assembler::idivl(Register src) { 1302 int encode = prefix_and_encode(src->encoding()); 1303 emit_byte(0xF7); 1304 emit_byte(0xF8 | encode); 1305 } 1306 1307 void Assembler::divl(Register src) { // Unsigned 1308 int encode = prefix_and_encode(src->encoding()); 1309 emit_byte(0xF7); 1310 emit_byte(0xF0 | encode); 1311 } 1312 1313 void Assembler::imull(Register dst, Register src) { 1314 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1315 emit_byte(0x0F); 1316 emit_byte(0xAF); 1317 emit_byte(0xC0 | encode); 1318 } 1319 1320 1321 void Assembler::imull(Register dst, Register src, int value) { 1322 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1323 if (is8bit(value)) { 1324 emit_byte(0x6B); 1325 emit_byte(0xC0 | encode); 1326 emit_byte(value & 0xFF); 1327 } else { 1328 emit_byte(0x69); 1329 emit_byte(0xC0 | encode); 1330 emit_long(value); 1331 } 1332 } 1333 1334 void Assembler::incl(Address dst) { 1335 // Don't use it directly. Use MacroAssembler::increment() instead. 1336 InstructionMark im(this); 1337 prefix(dst); 1338 emit_byte(0xFF); 1339 emit_operand(rax, dst); 1340 } 1341 1342 void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) { 1343 InstructionMark im(this); 1344 relocate(rtype); 1345 assert((0 <= cc) && (cc < 16), "illegal cc"); 1346 if (L.is_bound()) { 1347 address dst = target(L); 1348 assert(dst != NULL, "jcc most probably wrong"); 1349 1350 const int short_size = 2; 1351 const int long_size = 6; 1352 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos; 1353 if (rtype == relocInfo::none && is8bit(offs - short_size)) { 1354 // 0111 tttn #8-bit disp 1355 emit_byte(0x70 | cc); 1356 emit_byte((offs - short_size) & 0xFF); 1357 } else { 1358 // 0000 1111 1000 tttn #32-bit disp 1359 assert(is_simm32(offs - long_size), 1360 "must be 32bit offset (call4)"); 1361 emit_byte(0x0F); 1362 emit_byte(0x80 | cc); 1363 emit_long(offs - long_size); 1364 } 1365 } else { 1366 // Note: could eliminate cond. jumps to this jump if condition 1367 // is the same however, seems to be rather unlikely case. 1368 // Note: use jccb() if label to be bound is very close to get 1369 // an 8-bit displacement 1370 L.add_patch_at(code(), locator()); 1371 emit_byte(0x0F); 1372 emit_byte(0x80 | cc); 1373 emit_long(0); 1374 } 1375 } 1376 1377 void Assembler::jccb(Condition cc, Label& L) { 1378 if (L.is_bound()) { 1379 const int short_size = 2; 1380 address entry = target(L); 1381 assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)), 1382 "Dispacement too large for a short jmp"); 1383 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; 1384 // 0111 tttn #8-bit disp 1385 emit_byte(0x70 | cc); 1386 emit_byte((offs - short_size) & 0xFF); 1387 } else { 1388 InstructionMark im(this); 1389 L.add_patch_at(code(), locator()); 1390 emit_byte(0x70 | cc); 1391 emit_byte(0); 1392 } 1393 } 1394 1395 void Assembler::jmp(Address adr) { 1396 InstructionMark im(this); 1397 prefix(adr); 1398 emit_byte(0xFF); 1399 emit_operand(rsp, adr); 1400 } 1401 1402 void Assembler::jmp(Label& L, relocInfo::relocType rtype) { 1403 if (L.is_bound()) { 1404 address entry = target(L); 1405 assert(entry != NULL, "jmp most probably wrong"); 1406 InstructionMark im(this); 1407 const int short_size = 2; 1408 const int long_size = 5; 1409 intptr_t offs = entry - _code_pos; 1410 if (rtype == relocInfo::none && is8bit(offs - short_size)) { 1411 emit_byte(0xEB); 1412 emit_byte((offs - short_size) & 0xFF); 1413 } else { 1414 emit_byte(0xE9); 1415 emit_long(offs - long_size); 1416 } 1417 } else { 1418 // By default, forward jumps are always 32-bit displacements, since 1419 // we can't yet know where the label will be bound. If you're sure that 1420 // the forward jump will not run beyond 256 bytes, use jmpb to 1421 // force an 8-bit displacement. 1422 InstructionMark im(this); 1423 relocate(rtype); 1424 L.add_patch_at(code(), locator()); 1425 emit_byte(0xE9); 1426 emit_long(0); 1427 } 1428 } 1429 1430 void Assembler::jmp(Register entry) { 1431 int encode = prefix_and_encode(entry->encoding()); 1432 emit_byte(0xFF); 1433 emit_byte(0xE0 | encode); 1434 } 1435 1436 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { 1437 InstructionMark im(this); 1438 emit_byte(0xE9); 1439 assert(dest != NULL, "must have a target"); 1440 intptr_t disp = dest - (_code_pos + sizeof(int32_t)); 1441 assert(is_simm32(disp), "must be 32bit offset (jmp)"); 1442 emit_data(disp, rspec.reloc(), call32_operand); 1443 } 1444 1445 void Assembler::jmpb(Label& L) { 1446 if (L.is_bound()) { 1447 const int short_size = 2; 1448 address entry = target(L); 1449 assert(is8bit((entry - _code_pos) + short_size), 1450 "Dispacement too large for a short jmp"); 1451 assert(entry != NULL, "jmp most probably wrong"); 1452 intptr_t offs = entry - _code_pos; 1453 emit_byte(0xEB); 1454 emit_byte((offs - short_size) & 0xFF); 1455 } else { 1456 InstructionMark im(this); 1457 L.add_patch_at(code(), locator()); 1458 emit_byte(0xEB); 1459 emit_byte(0); 1460 } 1461 } 1462 1463 void Assembler::ldmxcsr( Address src) { 1464 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1465 InstructionMark im(this); 1466 prefix(src); 1467 emit_byte(0x0F); 1468 emit_byte(0xAE); 1469 emit_operand(as_Register(2), src); 1470 } 1471 1472 void Assembler::leal(Register dst, Address src) { 1473 InstructionMark im(this); 1474 #ifdef _LP64 1475 emit_byte(0x67); // addr32 1476 prefix(src, dst); 1477 #endif // LP64 1478 emit_byte(0x8D); 1479 emit_operand(dst, src); 1480 } 1481 1482 void Assembler::lock() { 1483 if (Atomics & 1) { 1484 // Emit either nothing, a NOP, or a NOP: prefix 1485 emit_byte(0x90) ; 1486 } else { 1487 emit_byte(0xF0); 1488 } 1489 } 1490 1491 void Assembler::lzcntl(Register dst, Register src) { 1492 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 1493 emit_byte(0xF3); 1494 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1495 emit_byte(0x0F); 1496 emit_byte(0xBD); 1497 emit_byte(0xC0 | encode); 1498 } 1499 1500 // Emit mfence instruction 1501 void Assembler::mfence() { 1502 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) 1503 emit_byte( 0x0F ); 1504 emit_byte( 0xAE ); 1505 emit_byte( 0xF0 ); 1506 } 1507 1508 void Assembler::mov(Register dst, Register src) { 1509 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 1510 } 1511 1512 void Assembler::movapd(XMMRegister dst, XMMRegister src) { 1513 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1514 int dstenc = dst->encoding(); 1515 int srcenc = src->encoding(); 1516 emit_byte(0x66); 1517 if (dstenc < 8) { 1518 if (srcenc >= 8) { 1519 prefix(REX_B); 1520 srcenc -= 8; 1521 } 1522 } else { 1523 if (srcenc < 8) { 1524 prefix(REX_R); 1525 } else { 1526 prefix(REX_RB); 1527 srcenc -= 8; 1528 } 1529 dstenc -= 8; 1530 } 1531 emit_byte(0x0F); 1532 emit_byte(0x28); 1533 emit_byte(0xC0 | dstenc << 3 | srcenc); 1534 } 1535 1536 void Assembler::movaps(XMMRegister dst, XMMRegister src) { 1537 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1538 int dstenc = dst->encoding(); 1539 int srcenc = src->encoding(); 1540 if (dstenc < 8) { 1541 if (srcenc >= 8) { 1542 prefix(REX_B); 1543 srcenc -= 8; 1544 } 1545 } else { 1546 if (srcenc < 8) { 1547 prefix(REX_R); 1548 } else { 1549 prefix(REX_RB); 1550 srcenc -= 8; 1551 } 1552 dstenc -= 8; 1553 } 1554 emit_byte(0x0F); 1555 emit_byte(0x28); 1556 emit_byte(0xC0 | dstenc << 3 | srcenc); 1557 } 1558 1559 void Assembler::movb(Register dst, Address src) { 1560 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 1561 InstructionMark im(this); 1562 prefix(src, dst, true); 1563 emit_byte(0x8A); 1564 emit_operand(dst, src); 1565 } 1566 1567 1568 void Assembler::movb(Address dst, int imm8) { 1569 InstructionMark im(this); 1570 prefix(dst); 1571 emit_byte(0xC6); 1572 emit_operand(rax, dst, 1); 1573 emit_byte(imm8); 1574 } 1575 1576 1577 void Assembler::movb(Address dst, Register src) { 1578 assert(src->has_byte_register(), "must have byte register"); 1579 InstructionMark im(this); 1580 prefix(dst, src, true); 1581 emit_byte(0x88); 1582 emit_operand(src, dst); 1583 } 1584 1585 void Assembler::movdl(XMMRegister dst, Register src) { 1586 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1587 emit_byte(0x66); 1588 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1589 emit_byte(0x0F); 1590 emit_byte(0x6E); 1591 emit_byte(0xC0 | encode); 1592 } 1593 1594 void Assembler::movdl(Register dst, XMMRegister src) { 1595 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1596 emit_byte(0x66); 1597 // swap src/dst to get correct prefix 1598 int encode = prefix_and_encode(src->encoding(), dst->encoding()); 1599 emit_byte(0x0F); 1600 emit_byte(0x7E); 1601 emit_byte(0xC0 | encode); 1602 } 1603 1604 void Assembler::movdl(XMMRegister dst, Address src) { 1605 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1606 InstructionMark im(this); 1607 emit_byte(0x66); 1608 prefix(src, dst); 1609 emit_byte(0x0F); 1610 emit_byte(0x6E); 1611 emit_operand(dst, src); 1612 } 1613 1614 1615 void Assembler::movdqa(XMMRegister dst, Address src) { 1616 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1617 InstructionMark im(this); 1618 emit_byte(0x66); 1619 prefix(src, dst); 1620 emit_byte(0x0F); 1621 emit_byte(0x6F); 1622 emit_operand(dst, src); 1623 } 1624 1625 void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 1626 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1627 emit_byte(0x66); 1628 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 1629 emit_byte(0x0F); 1630 emit_byte(0x6F); 1631 emit_byte(0xC0 | encode); 1632 } 1633 1634 void Assembler::movdqa(Address dst, XMMRegister src) { 1635 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1636 InstructionMark im(this); 1637 emit_byte(0x66); 1638 prefix(dst, src); 1639 emit_byte(0x0F); 1640 emit_byte(0x7F); 1641 emit_operand(src, dst); 1642 } 1643 1644 void Assembler::movdqu(XMMRegister dst, Address src) { 1645 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1646 InstructionMark im(this); 1647 emit_byte(0xF3); 1648 prefix(src, dst); 1649 emit_byte(0x0F); 1650 emit_byte(0x6F); 1651 emit_operand(dst, src); 1652 } 1653 1654 void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 1655 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1656 emit_byte(0xF3); 1657 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 1658 emit_byte(0x0F); 1659 emit_byte(0x6F); 1660 emit_byte(0xC0 | encode); 1661 } 1662 1663 void Assembler::movdqu(Address dst, XMMRegister src) { 1664 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1665 InstructionMark im(this); 1666 emit_byte(0xF3); 1667 prefix(dst, src); 1668 emit_byte(0x0F); 1669 emit_byte(0x7F); 1670 emit_operand(src, dst); 1671 } 1672 1673 // Uses zero extension on 64bit 1674 1675 void Assembler::movl(Register dst, int32_t imm32) { 1676 int encode = prefix_and_encode(dst->encoding()); 1677 emit_byte(0xB8 | encode); 1678 emit_long(imm32); 1679 } 1680 1681 void Assembler::movl(Register dst, Register src) { 1682 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1683 emit_byte(0x8B); 1684 emit_byte(0xC0 | encode); 1685 } 1686 1687 void Assembler::movl(Register dst, Address src) { 1688 InstructionMark im(this); 1689 prefix(src, dst); 1690 emit_byte(0x8B); 1691 emit_operand(dst, src); 1692 } 1693 1694 void Assembler::movl(Address dst, int32_t imm32) { 1695 InstructionMark im(this); 1696 prefix(dst); 1697 emit_byte(0xC7); 1698 emit_operand(rax, dst, 4); 1699 emit_long(imm32); 1700 } 1701 1702 void Assembler::movl(Address dst, Register src) { 1703 InstructionMark im(this); 1704 prefix(dst, src); 1705 emit_byte(0x89); 1706 emit_operand(src, dst); 1707 } 1708 1709 // New cpus require to use movsd and movss to avoid partial register stall 1710 // when loading from memory. But for old Opteron use movlpd instead of movsd. 1711 // The selection is done in MacroAssembler::movdbl() and movflt(). 1712 void Assembler::movlpd(XMMRegister dst, Address src) { 1713 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1714 InstructionMark im(this); 1715 emit_byte(0x66); 1716 prefix(src, dst); 1717 emit_byte(0x0F); 1718 emit_byte(0x12); 1719 emit_operand(dst, src); 1720 } 1721 1722 void Assembler::movq( MMXRegister dst, Address src ) { 1723 assert( VM_Version::supports_mmx(), "" ); 1724 emit_byte(0x0F); 1725 emit_byte(0x6F); 1726 emit_operand(dst, src); 1727 } 1728 1729 void Assembler::movq( Address dst, MMXRegister src ) { 1730 assert( VM_Version::supports_mmx(), "" ); 1731 emit_byte(0x0F); 1732 emit_byte(0x7F); 1733 // workaround gcc (3.2.1-7a) bug 1734 // In that version of gcc with only an emit_operand(MMX, Address) 1735 // gcc will tail jump and try and reverse the parameters completely 1736 // obliterating dst in the process. By having a version available 1737 // that doesn't need to swap the args at the tail jump the bug is 1738 // avoided. 1739 emit_operand(dst, src); 1740 } 1741 1742 void Assembler::movq(XMMRegister dst, Address src) { 1743 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1744 InstructionMark im(this); 1745 emit_byte(0xF3); 1746 prefix(src, dst); 1747 emit_byte(0x0F); 1748 emit_byte(0x7E); 1749 emit_operand(dst, src); 1750 } 1751 1752 void Assembler::movq(Address dst, XMMRegister src) { 1753 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1754 InstructionMark im(this); 1755 emit_byte(0x66); 1756 prefix(dst, src); 1757 emit_byte(0x0F); 1758 emit_byte(0xD6); 1759 emit_operand(src, dst); 1760 } 1761 1762 void Assembler::movsbl(Register dst, Address src) { // movsxb 1763 InstructionMark im(this); 1764 prefix(src, dst); 1765 emit_byte(0x0F); 1766 emit_byte(0xBE); 1767 emit_operand(dst, src); 1768 } 1769 1770 void Assembler::movsbl(Register dst, Register src) { // movsxb 1771 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1772 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1773 emit_byte(0x0F); 1774 emit_byte(0xBE); 1775 emit_byte(0xC0 | encode); 1776 } 1777 1778 void Assembler::movsd(XMMRegister dst, XMMRegister src) { 1779 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1780 emit_byte(0xF2); 1781 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1782 emit_byte(0x0F); 1783 emit_byte(0x10); 1784 emit_byte(0xC0 | encode); 1785 } 1786 1787 void Assembler::movsd(XMMRegister dst, Address src) { 1788 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1789 InstructionMark im(this); 1790 emit_byte(0xF2); 1791 prefix(src, dst); 1792 emit_byte(0x0F); 1793 emit_byte(0x10); 1794 emit_operand(dst, src); 1795 } 1796 1797 void Assembler::movsd(Address dst, XMMRegister src) { 1798 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1799 InstructionMark im(this); 1800 emit_byte(0xF2); 1801 prefix(dst, src); 1802 emit_byte(0x0F); 1803 emit_byte(0x11); 1804 emit_operand(src, dst); 1805 } 1806 1807 void Assembler::movss(XMMRegister dst, XMMRegister src) { 1808 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1809 emit_byte(0xF3); 1810 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1811 emit_byte(0x0F); 1812 emit_byte(0x10); 1813 emit_byte(0xC0 | encode); 1814 } 1815 1816 void Assembler::movss(XMMRegister dst, Address src) { 1817 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1818 InstructionMark im(this); 1819 emit_byte(0xF3); 1820 prefix(src, dst); 1821 emit_byte(0x0F); 1822 emit_byte(0x10); 1823 emit_operand(dst, src); 1824 } 1825 1826 void Assembler::movss(Address dst, XMMRegister src) { 1827 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1828 InstructionMark im(this); 1829 emit_byte(0xF3); 1830 prefix(dst, src); 1831 emit_byte(0x0F); 1832 emit_byte(0x11); 1833 emit_operand(src, dst); 1834 } 1835 1836 void Assembler::movswl(Register dst, Address src) { // movsxw 1837 InstructionMark im(this); 1838 prefix(src, dst); 1839 emit_byte(0x0F); 1840 emit_byte(0xBF); 1841 emit_operand(dst, src); 1842 } 1843 1844 void Assembler::movswl(Register dst, Register src) { // movsxw 1845 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1846 emit_byte(0x0F); 1847 emit_byte(0xBF); 1848 emit_byte(0xC0 | encode); 1849 } 1850 1851 void Assembler::movw(Address dst, int imm16) { 1852 InstructionMark im(this); 1853 1854 emit_byte(0x66); // switch to 16-bit mode 1855 prefix(dst); 1856 emit_byte(0xC7); 1857 emit_operand(rax, dst, 2); 1858 emit_word(imm16); 1859 } 1860 1861 void Assembler::movw(Register dst, Address src) { 1862 InstructionMark im(this); 1863 emit_byte(0x66); 1864 prefix(src, dst); 1865 emit_byte(0x8B); 1866 emit_operand(dst, src); 1867 } 1868 1869 void Assembler::movw(Address dst, Register src) { 1870 InstructionMark im(this); 1871 emit_byte(0x66); 1872 prefix(dst, src); 1873 emit_byte(0x89); 1874 emit_operand(src, dst); 1875 } 1876 1877 void Assembler::movzbl(Register dst, Address src) { // movzxb 1878 InstructionMark im(this); 1879 prefix(src, dst); 1880 emit_byte(0x0F); 1881 emit_byte(0xB6); 1882 emit_operand(dst, src); 1883 } 1884 1885 void Assembler::movzbl(Register dst, Register src) { // movzxb 1886 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1887 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1888 emit_byte(0x0F); 1889 emit_byte(0xB6); 1890 emit_byte(0xC0 | encode); 1891 } 1892 1893 void Assembler::movzwl(Register dst, Address src) { // movzxw 1894 InstructionMark im(this); 1895 prefix(src, dst); 1896 emit_byte(0x0F); 1897 emit_byte(0xB7); 1898 emit_operand(dst, src); 1899 } 1900 1901 void Assembler::movzwl(Register dst, Register src) { // movzxw 1902 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1903 emit_byte(0x0F); 1904 emit_byte(0xB7); 1905 emit_byte(0xC0 | encode); 1906 } 1907 1908 void Assembler::mull(Address src) { 1909 InstructionMark im(this); 1910 prefix(src); 1911 emit_byte(0xF7); 1912 emit_operand(rsp, src); 1913 } 1914 1915 void Assembler::mull(Register src) { 1916 int encode = prefix_and_encode(src->encoding()); 1917 emit_byte(0xF7); 1918 emit_byte(0xE0 | encode); 1919 } 1920 1921 void Assembler::mulsd(XMMRegister dst, Address src) { 1922 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1923 InstructionMark im(this); 1924 emit_byte(0xF2); 1925 prefix(src, dst); 1926 emit_byte(0x0F); 1927 emit_byte(0x59); 1928 emit_operand(dst, src); 1929 } 1930 1931 void Assembler::mulsd(XMMRegister dst, XMMRegister src) { 1932 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1933 emit_byte(0xF2); 1934 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1935 emit_byte(0x0F); 1936 emit_byte(0x59); 1937 emit_byte(0xC0 | encode); 1938 } 1939 1940 void Assembler::mulss(XMMRegister dst, Address src) { 1941 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1942 InstructionMark im(this); 1943 emit_byte(0xF3); 1944 prefix(src, dst); 1945 emit_byte(0x0F); 1946 emit_byte(0x59); 1947 emit_operand(dst, src); 1948 } 1949 1950 void Assembler::mulss(XMMRegister dst, XMMRegister src) { 1951 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1952 emit_byte(0xF3); 1953 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1954 emit_byte(0x0F); 1955 emit_byte(0x59); 1956 emit_byte(0xC0 | encode); 1957 } 1958 1959 void Assembler::negl(Register dst) { 1960 int encode = prefix_and_encode(dst->encoding()); 1961 emit_byte(0xF7); 1962 emit_byte(0xD8 | encode); 1963 } 1964 1965 void Assembler::nop(int i) { 1966 #ifdef ASSERT 1967 assert(i > 0, " "); 1968 // The fancy nops aren't currently recognized by debuggers making it a 1969 // pain to disassemble code while debugging. If asserts are on clearly 1970 // speed is not an issue so simply use the single byte traditional nop 1971 // to do alignment. 1972 1973 for (; i > 0 ; i--) emit_byte(0x90); 1974 return; 1975 1976 #endif // ASSERT 1977 1978 if (UseAddressNop && VM_Version::is_intel()) { 1979 // 1980 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 1981 // 1: 0x90 1982 // 2: 0x66 0x90 1983 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 1984 // 4: 0x0F 0x1F 0x40 0x00 1985 // 5: 0x0F 0x1F 0x44 0x00 0x00 1986 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 1987 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1988 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1989 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1990 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1991 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1992 1993 // The rest coding is Intel specific - don't use consecutive address nops 1994 1995 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1996 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1997 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1998 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1999 2000 while(i >= 15) { 2001 // For Intel don't generate consecutive addess nops (mix with regular nops) 2002 i -= 15; 2003 emit_byte(0x66); // size prefix 2004 emit_byte(0x66); // size prefix 2005 emit_byte(0x66); // size prefix 2006 addr_nop_8(); 2007 emit_byte(0x66); // size prefix 2008 emit_byte(0x66); // size prefix 2009 emit_byte(0x66); // size prefix 2010 emit_byte(0x90); // nop 2011 } 2012 switch (i) { 2013 case 14: 2014 emit_byte(0x66); // size prefix 2015 case 13: 2016 emit_byte(0x66); // size prefix 2017 case 12: 2018 addr_nop_8(); 2019 emit_byte(0x66); // size prefix 2020 emit_byte(0x66); // size prefix 2021 emit_byte(0x66); // size prefix 2022 emit_byte(0x90); // nop 2023 break; 2024 case 11: 2025 emit_byte(0x66); // size prefix 2026 case 10: 2027 emit_byte(0x66); // size prefix 2028 case 9: 2029 emit_byte(0x66); // size prefix 2030 case 8: 2031 addr_nop_8(); 2032 break; 2033 case 7: 2034 addr_nop_7(); 2035 break; 2036 case 6: 2037 emit_byte(0x66); // size prefix 2038 case 5: 2039 addr_nop_5(); 2040 break; 2041 case 4: 2042 addr_nop_4(); 2043 break; 2044 case 3: 2045 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2046 emit_byte(0x66); // size prefix 2047 case 2: 2048 emit_byte(0x66); // size prefix 2049 case 1: 2050 emit_byte(0x90); // nop 2051 break; 2052 default: 2053 assert(i == 0, " "); 2054 } 2055 return; 2056 } 2057 if (UseAddressNop && VM_Version::is_amd()) { 2058 // 2059 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD. 2060 // 1: 0x90 2061 // 2: 0x66 0x90 2062 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2063 // 4: 0x0F 0x1F 0x40 0x00 2064 // 5: 0x0F 0x1F 0x44 0x00 0x00 2065 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2066 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2067 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2068 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2069 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2070 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2071 2072 // The rest coding is AMD specific - use consecutive address nops 2073 2074 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2075 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2076 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2077 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2078 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2079 // Size prefixes (0x66) are added for larger sizes 2080 2081 while(i >= 22) { 2082 i -= 11; 2083 emit_byte(0x66); // size prefix 2084 emit_byte(0x66); // size prefix 2085 emit_byte(0x66); // size prefix 2086 addr_nop_8(); 2087 } 2088 // Generate first nop for size between 21-12 2089 switch (i) { 2090 case 21: 2091 i -= 1; 2092 emit_byte(0x66); // size prefix 2093 case 20: 2094 case 19: 2095 i -= 1; 2096 emit_byte(0x66); // size prefix 2097 case 18: 2098 case 17: 2099 i -= 1; 2100 emit_byte(0x66); // size prefix 2101 case 16: 2102 case 15: 2103 i -= 8; 2104 addr_nop_8(); 2105 break; 2106 case 14: 2107 case 13: 2108 i -= 7; 2109 addr_nop_7(); 2110 break; 2111 case 12: 2112 i -= 6; 2113 emit_byte(0x66); // size prefix 2114 addr_nop_5(); 2115 break; 2116 default: 2117 assert(i < 12, " "); 2118 } 2119 2120 // Generate second nop for size between 11-1 2121 switch (i) { 2122 case 11: 2123 emit_byte(0x66); // size prefix 2124 case 10: 2125 emit_byte(0x66); // size prefix 2126 case 9: 2127 emit_byte(0x66); // size prefix 2128 case 8: 2129 addr_nop_8(); 2130 break; 2131 case 7: 2132 addr_nop_7(); 2133 break; 2134 case 6: 2135 emit_byte(0x66); // size prefix 2136 case 5: 2137 addr_nop_5(); 2138 break; 2139 case 4: 2140 addr_nop_4(); 2141 break; 2142 case 3: 2143 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2144 emit_byte(0x66); // size prefix 2145 case 2: 2146 emit_byte(0x66); // size prefix 2147 case 1: 2148 emit_byte(0x90); // nop 2149 break; 2150 default: 2151 assert(i == 0, " "); 2152 } 2153 return; 2154 } 2155 2156 // Using nops with size prefixes "0x66 0x90". 2157 // From AMD Optimization Guide: 2158 // 1: 0x90 2159 // 2: 0x66 0x90 2160 // 3: 0x66 0x66 0x90 2161 // 4: 0x66 0x66 0x66 0x90 2162 // 5: 0x66 0x66 0x90 0x66 0x90 2163 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2164 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2165 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2166 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2167 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2168 // 2169 while(i > 12) { 2170 i -= 4; 2171 emit_byte(0x66); // size prefix 2172 emit_byte(0x66); 2173 emit_byte(0x66); 2174 emit_byte(0x90); // nop 2175 } 2176 // 1 - 12 nops 2177 if(i > 8) { 2178 if(i > 9) { 2179 i -= 1; 2180 emit_byte(0x66); 2181 } 2182 i -= 3; 2183 emit_byte(0x66); 2184 emit_byte(0x66); 2185 emit_byte(0x90); 2186 } 2187 // 1 - 8 nops 2188 if(i > 4) { 2189 if(i > 6) { 2190 i -= 1; 2191 emit_byte(0x66); 2192 } 2193 i -= 3; 2194 emit_byte(0x66); 2195 emit_byte(0x66); 2196 emit_byte(0x90); 2197 } 2198 switch (i) { 2199 case 4: 2200 emit_byte(0x66); 2201 case 3: 2202 emit_byte(0x66); 2203 case 2: 2204 emit_byte(0x66); 2205 case 1: 2206 emit_byte(0x90); 2207 break; 2208 default: 2209 assert(i == 0, " "); 2210 } 2211 } 2212 2213 void Assembler::notl(Register dst) { 2214 int encode = prefix_and_encode(dst->encoding()); 2215 emit_byte(0xF7); 2216 emit_byte(0xD0 | encode ); 2217 } 2218 2219 void Assembler::orl(Address dst, int32_t imm32) { 2220 InstructionMark im(this); 2221 prefix(dst); 2222 emit_arith_operand(0x81, rcx, dst, imm32); 2223 } 2224 2225 void Assembler::orl(Register dst, int32_t imm32) { 2226 prefix(dst); 2227 emit_arith(0x81, 0xC8, dst, imm32); 2228 } 2229 2230 void Assembler::orl(Register dst, Address src) { 2231 InstructionMark im(this); 2232 prefix(src, dst); 2233 emit_byte(0x0B); 2234 emit_operand(dst, src); 2235 } 2236 2237 void Assembler::orl(Register dst, Register src) { 2238 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2239 emit_arith(0x0B, 0xC0, dst, src); 2240 } 2241 2242 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { 2243 assert(VM_Version::supports_sse4_2(), ""); 2244 2245 InstructionMark im(this); 2246 emit_byte(0x66); 2247 prefix(src, dst); 2248 emit_byte(0x0F); 2249 emit_byte(0x3A); 2250 emit_byte(0x61); 2251 emit_operand(dst, src); 2252 emit_byte(imm8); 2253 } 2254 2255 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { 2256 assert(VM_Version::supports_sse4_2(), ""); 2257 2258 emit_byte(0x66); 2259 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 2260 emit_byte(0x0F); 2261 emit_byte(0x3A); 2262 emit_byte(0x61); 2263 emit_byte(0xC0 | encode); 2264 emit_byte(imm8); 2265 } 2266 2267 // generic 2268 void Assembler::pop(Register dst) { 2269 int encode = prefix_and_encode(dst->encoding()); 2270 emit_byte(0x58 | encode); 2271 } 2272 2273 void Assembler::popcntl(Register dst, Address src) { 2274 assert(VM_Version::supports_popcnt(), "must support"); 2275 InstructionMark im(this); 2276 emit_byte(0xF3); 2277 prefix(src, dst); 2278 emit_byte(0x0F); 2279 emit_byte(0xB8); 2280 emit_operand(dst, src); 2281 } 2282 2283 void Assembler::popcntl(Register dst, Register src) { 2284 assert(VM_Version::supports_popcnt(), "must support"); 2285 emit_byte(0xF3); 2286 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2287 emit_byte(0x0F); 2288 emit_byte(0xB8); 2289 emit_byte(0xC0 | encode); 2290 } 2291 2292 void Assembler::popf() { 2293 emit_byte(0x9D); 2294 } 2295 2296 #ifndef _LP64 // no 32bit push/pop on amd64 2297 void Assembler::popl(Address dst) { 2298 // NOTE: this will adjust stack by 8byte on 64bits 2299 InstructionMark im(this); 2300 prefix(dst); 2301 emit_byte(0x8F); 2302 emit_operand(rax, dst); 2303 } 2304 #endif 2305 2306 void Assembler::prefetch_prefix(Address src) { 2307 prefix(src); 2308 emit_byte(0x0F); 2309 } 2310 2311 void Assembler::prefetchnta(Address src) { 2312 NOT_LP64(assert(VM_Version::supports_sse2(), "must support")); 2313 InstructionMark im(this); 2314 prefetch_prefix(src); 2315 emit_byte(0x18); 2316 emit_operand(rax, src); // 0, src 2317 } 2318 2319 void Assembler::prefetchr(Address src) { 2320 NOT_LP64(assert(VM_Version::supports_3dnow_prefetch(), "must support")); 2321 InstructionMark im(this); 2322 prefetch_prefix(src); 2323 emit_byte(0x0D); 2324 emit_operand(rax, src); // 0, src 2325 } 2326 2327 void Assembler::prefetcht0(Address src) { 2328 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2329 InstructionMark im(this); 2330 prefetch_prefix(src); 2331 emit_byte(0x18); 2332 emit_operand(rcx, src); // 1, src 2333 } 2334 2335 void Assembler::prefetcht1(Address src) { 2336 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2337 InstructionMark im(this); 2338 prefetch_prefix(src); 2339 emit_byte(0x18); 2340 emit_operand(rdx, src); // 2, src 2341 } 2342 2343 void Assembler::prefetcht2(Address src) { 2344 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2345 InstructionMark im(this); 2346 prefetch_prefix(src); 2347 emit_byte(0x18); 2348 emit_operand(rbx, src); // 3, src 2349 } 2350 2351 void Assembler::prefetchw(Address src) { 2352 NOT_LP64(assert(VM_Version::supports_3dnow_prefetch(), "must support")); 2353 InstructionMark im(this); 2354 prefetch_prefix(src); 2355 emit_byte(0x0D); 2356 emit_operand(rcx, src); // 1, src 2357 } 2358 2359 void Assembler::prefix(Prefix p) { 2360 a_byte(p); 2361 } 2362 2363 void Assembler::por(XMMRegister dst, XMMRegister src) { 2364 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2365 2366 emit_byte(0x66); 2367 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2368 emit_byte(0x0F); 2369 2370 emit_byte(0xEB); 2371 emit_byte(0xC0 | encode); 2372 } 2373 2374 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 2375 assert(isByte(mode), "invalid value"); 2376 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2377 2378 emit_byte(0x66); 2379 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2380 emit_byte(0x0F); 2381 emit_byte(0x70); 2382 emit_byte(0xC0 | encode); 2383 emit_byte(mode & 0xFF); 2384 2385 } 2386 2387 void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 2388 assert(isByte(mode), "invalid value"); 2389 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2390 2391 InstructionMark im(this); 2392 emit_byte(0x66); 2393 prefix(src, dst); 2394 emit_byte(0x0F); 2395 emit_byte(0x70); 2396 emit_operand(dst, src); 2397 emit_byte(mode & 0xFF); 2398 } 2399 2400 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 2401 assert(isByte(mode), "invalid value"); 2402 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2403 2404 emit_byte(0xF2); 2405 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2406 emit_byte(0x0F); 2407 emit_byte(0x70); 2408 emit_byte(0xC0 | encode); 2409 emit_byte(mode & 0xFF); 2410 } 2411 2412 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 2413 assert(isByte(mode), "invalid value"); 2414 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2415 2416 InstructionMark im(this); 2417 emit_byte(0xF2); 2418 prefix(src, dst); // QQ new 2419 emit_byte(0x0F); 2420 emit_byte(0x70); 2421 emit_operand(dst, src); 2422 emit_byte(mode & 0xFF); 2423 } 2424 2425 void Assembler::psrlq(XMMRegister dst, int shift) { 2426 // Shift 64 bit value logically right by specified number of bits. 2427 // HMM Table D-1 says sse2 or mmx. 2428 // Do not confuse it with psrldq SSE2 instruction which 2429 // shifts 128 bit value in xmm register by number of bytes. 2430 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2431 2432 int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding()); 2433 emit_byte(0x66); 2434 emit_byte(0x0F); 2435 emit_byte(0x73); 2436 emit_byte(0xC0 | encode); 2437 emit_byte(shift); 2438 } 2439 2440 void Assembler::psrldq(XMMRegister dst, int shift) { 2441 // Shift 128 bit value in xmm register by number of bytes. 2442 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2443 2444 int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding()); 2445 emit_byte(0x66); 2446 emit_byte(0x0F); 2447 emit_byte(0x73); 2448 emit_byte(0xC0 | encode); 2449 emit_byte(shift); 2450 } 2451 2452 void Assembler::ptest(XMMRegister dst, Address src) { 2453 assert(VM_Version::supports_sse4_1(), ""); 2454 2455 InstructionMark im(this); 2456 emit_byte(0x66); 2457 prefix(src, dst); 2458 emit_byte(0x0F); 2459 emit_byte(0x38); 2460 emit_byte(0x17); 2461 emit_operand(dst, src); 2462 } 2463 2464 void Assembler::ptest(XMMRegister dst, XMMRegister src) { 2465 assert(VM_Version::supports_sse4_1(), ""); 2466 2467 emit_byte(0x66); 2468 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 2469 emit_byte(0x0F); 2470 emit_byte(0x38); 2471 emit_byte(0x17); 2472 emit_byte(0xC0 | encode); 2473 } 2474 2475 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 2476 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2477 emit_byte(0x66); 2478 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2479 emit_byte(0x0F); 2480 emit_byte(0x60); 2481 emit_byte(0xC0 | encode); 2482 } 2483 2484 void Assembler::push(int32_t imm32) { 2485 // in 64bits we push 64bits onto the stack but only 2486 // take a 32bit immediate 2487 emit_byte(0x68); 2488 emit_long(imm32); 2489 } 2490 2491 void Assembler::push(Register src) { 2492 int encode = prefix_and_encode(src->encoding()); 2493 2494 emit_byte(0x50 | encode); 2495 } 2496 2497 void Assembler::pushf() { 2498 emit_byte(0x9C); 2499 } 2500 2501 #ifndef _LP64 // no 32bit push/pop on amd64 2502 void Assembler::pushl(Address src) { 2503 // Note this will push 64bit on 64bit 2504 InstructionMark im(this); 2505 prefix(src); 2506 emit_byte(0xFF); 2507 emit_operand(rsi, src); 2508 } 2509 #endif 2510 2511 void Assembler::pxor(XMMRegister dst, Address src) { 2512 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2513 InstructionMark im(this); 2514 emit_byte(0x66); 2515 prefix(src, dst); 2516 emit_byte(0x0F); 2517 emit_byte(0xEF); 2518 emit_operand(dst, src); 2519 } 2520 2521 void Assembler::pxor(XMMRegister dst, XMMRegister src) { 2522 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2523 InstructionMark im(this); 2524 emit_byte(0x66); 2525 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2526 emit_byte(0x0F); 2527 emit_byte(0xEF); 2528 emit_byte(0xC0 | encode); 2529 } 2530 2531 void Assembler::rcll(Register dst, int imm8) { 2532 assert(isShiftCount(imm8), "illegal shift count"); 2533 int encode = prefix_and_encode(dst->encoding()); 2534 if (imm8 == 1) { 2535 emit_byte(0xD1); 2536 emit_byte(0xD0 | encode); 2537 } else { 2538 emit_byte(0xC1); 2539 emit_byte(0xD0 | encode); 2540 emit_byte(imm8); 2541 } 2542 } 2543 2544 // copies data from [esi] to [edi] using rcx pointer sized words 2545 // generic 2546 void Assembler::rep_mov() { 2547 emit_byte(0xF3); 2548 // MOVSQ 2549 LP64_ONLY(prefix(REX_W)); 2550 emit_byte(0xA5); 2551 } 2552 2553 // sets rcx pointer sized words with rax, value at [edi] 2554 // generic 2555 void Assembler::rep_set() { // rep_set 2556 emit_byte(0xF3); 2557 // STOSQ 2558 LP64_ONLY(prefix(REX_W)); 2559 emit_byte(0xAB); 2560 } 2561 2562 // scans rcx pointer sized words at [edi] for occurance of rax, 2563 // generic 2564 void Assembler::repne_scan() { // repne_scan 2565 emit_byte(0xF2); 2566 // SCASQ 2567 LP64_ONLY(prefix(REX_W)); 2568 emit_byte(0xAF); 2569 } 2570 2571 #ifdef _LP64 2572 // scans rcx 4 byte words at [edi] for occurance of rax, 2573 // generic 2574 void Assembler::repne_scanl() { // repne_scan 2575 emit_byte(0xF2); 2576 // SCASL 2577 emit_byte(0xAF); 2578 } 2579 #endif 2580 2581 void Assembler::ret(int imm16) { 2582 if (imm16 == 0) { 2583 emit_byte(0xC3); 2584 } else { 2585 emit_byte(0xC2); 2586 emit_word(imm16); 2587 } 2588 } 2589 2590 void Assembler::sahf() { 2591 #ifdef _LP64 2592 // Not supported in 64bit mode 2593 ShouldNotReachHere(); 2594 #endif 2595 emit_byte(0x9E); 2596 } 2597 2598 void Assembler::sarl(Register dst, int imm8) { 2599 int encode = prefix_and_encode(dst->encoding()); 2600 assert(isShiftCount(imm8), "illegal shift count"); 2601 if (imm8 == 1) { 2602 emit_byte(0xD1); 2603 emit_byte(0xF8 | encode); 2604 } else { 2605 emit_byte(0xC1); 2606 emit_byte(0xF8 | encode); 2607 emit_byte(imm8); 2608 } 2609 } 2610 2611 void Assembler::sarl(Register dst) { 2612 int encode = prefix_and_encode(dst->encoding()); 2613 emit_byte(0xD3); 2614 emit_byte(0xF8 | encode); 2615 } 2616 2617 void Assembler::sbbl(Address dst, int32_t imm32) { 2618 InstructionMark im(this); 2619 prefix(dst); 2620 emit_arith_operand(0x81, rbx, dst, imm32); 2621 } 2622 2623 void Assembler::sbbl(Register dst, int32_t imm32) { 2624 prefix(dst); 2625 emit_arith(0x81, 0xD8, dst, imm32); 2626 } 2627 2628 2629 void Assembler::sbbl(Register dst, Address src) { 2630 InstructionMark im(this); 2631 prefix(src, dst); 2632 emit_byte(0x1B); 2633 emit_operand(dst, src); 2634 } 2635 2636 void Assembler::sbbl(Register dst, Register src) { 2637 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2638 emit_arith(0x1B, 0xC0, dst, src); 2639 } 2640 2641 void Assembler::setb(Condition cc, Register dst) { 2642 assert(0 <= cc && cc < 16, "illegal cc"); 2643 int encode = prefix_and_encode(dst->encoding(), true); 2644 emit_byte(0x0F); 2645 emit_byte(0x90 | cc); 2646 emit_byte(0xC0 | encode); 2647 } 2648 2649 void Assembler::shll(Register dst, int imm8) { 2650 assert(isShiftCount(imm8), "illegal shift count"); 2651 int encode = prefix_and_encode(dst->encoding()); 2652 if (imm8 == 1 ) { 2653 emit_byte(0xD1); 2654 emit_byte(0xE0 | encode); 2655 } else { 2656 emit_byte(0xC1); 2657 emit_byte(0xE0 | encode); 2658 emit_byte(imm8); 2659 } 2660 } 2661 2662 void Assembler::shll(Register dst) { 2663 int encode = prefix_and_encode(dst->encoding()); 2664 emit_byte(0xD3); 2665 emit_byte(0xE0 | encode); 2666 } 2667 2668 void Assembler::shrl(Register dst, int imm8) { 2669 assert(isShiftCount(imm8), "illegal shift count"); 2670 int encode = prefix_and_encode(dst->encoding()); 2671 emit_byte(0xC1); 2672 emit_byte(0xE8 | encode); 2673 emit_byte(imm8); 2674 } 2675 2676 void Assembler::shrl(Register dst) { 2677 int encode = prefix_and_encode(dst->encoding()); 2678 emit_byte(0xD3); 2679 emit_byte(0xE8 | encode); 2680 } 2681 2682 // copies a single word from [esi] to [edi] 2683 void Assembler::smovl() { 2684 emit_byte(0xA5); 2685 } 2686 2687 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { 2688 // HMM Table D-1 says sse2 2689 // NOT_LP64(assert(VM_Version::supports_sse(), "")); 2690 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2691 emit_byte(0xF2); 2692 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2693 emit_byte(0x0F); 2694 emit_byte(0x51); 2695 emit_byte(0xC0 | encode); 2696 } 2697 2698 void Assembler::sqrtsd(XMMRegister dst, Address src) { 2699 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2700 InstructionMark im(this); 2701 emit_byte(0xF2); 2702 prefix(src, dst); 2703 emit_byte(0x0F); 2704 emit_byte(0x51); 2705 emit_operand(dst, src); 2706 } 2707 2708 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { 2709 // HMM Table D-1 says sse2 2710 // NOT_LP64(assert(VM_Version::supports_sse(), "")); 2711 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2712 emit_byte(0xF3); 2713 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2714 emit_byte(0x0F); 2715 emit_byte(0x51); 2716 emit_byte(0xC0 | encode); 2717 } 2718 2719 void Assembler::sqrtss(XMMRegister dst, Address src) { 2720 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2721 InstructionMark im(this); 2722 emit_byte(0xF3); 2723 prefix(src, dst); 2724 emit_byte(0x0F); 2725 emit_byte(0x51); 2726 emit_operand(dst, src); 2727 } 2728 2729 void Assembler::stmxcsr( Address dst) { 2730 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2731 InstructionMark im(this); 2732 prefix(dst); 2733 emit_byte(0x0F); 2734 emit_byte(0xAE); 2735 emit_operand(as_Register(3), dst); 2736 } 2737 2738 void Assembler::subl(Address dst, int32_t imm32) { 2739 InstructionMark im(this); 2740 prefix(dst); 2741 emit_arith_operand(0x81, rbp, dst, imm32); 2742 } 2743 2744 void Assembler::subl(Address dst, Register src) { 2745 InstructionMark im(this); 2746 prefix(dst, src); 2747 emit_byte(0x29); 2748 emit_operand(src, dst); 2749 } 2750 2751 void Assembler::subl(Register dst, int32_t imm32) { 2752 prefix(dst); 2753 emit_arith(0x81, 0xE8, dst, imm32); 2754 } 2755 2756 void Assembler::subl(Register dst, Address src) { 2757 InstructionMark im(this); 2758 prefix(src, dst); 2759 emit_byte(0x2B); 2760 emit_operand(dst, src); 2761 } 2762 2763 void Assembler::subl(Register dst, Register src) { 2764 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2765 emit_arith(0x2B, 0xC0, dst, src); 2766 } 2767 2768 void Assembler::subsd(XMMRegister dst, XMMRegister src) { 2769 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2770 emit_byte(0xF2); 2771 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2772 emit_byte(0x0F); 2773 emit_byte(0x5C); 2774 emit_byte(0xC0 | encode); 2775 } 2776 2777 void Assembler::subsd(XMMRegister dst, Address src) { 2778 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2779 InstructionMark im(this); 2780 emit_byte(0xF2); 2781 prefix(src, dst); 2782 emit_byte(0x0F); 2783 emit_byte(0x5C); 2784 emit_operand(dst, src); 2785 } 2786 2787 void Assembler::subss(XMMRegister dst, XMMRegister src) { 2788 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2789 emit_byte(0xF3); 2790 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2791 emit_byte(0x0F); 2792 emit_byte(0x5C); 2793 emit_byte(0xC0 | encode); 2794 } 2795 2796 void Assembler::subss(XMMRegister dst, Address src) { 2797 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2798 InstructionMark im(this); 2799 emit_byte(0xF3); 2800 prefix(src, dst); 2801 emit_byte(0x0F); 2802 emit_byte(0x5C); 2803 emit_operand(dst, src); 2804 } 2805 2806 void Assembler::testb(Register dst, int imm8) { 2807 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 2808 (void) prefix_and_encode(dst->encoding(), true); 2809 emit_arith_b(0xF6, 0xC0, dst, imm8); 2810 } 2811 2812 void Assembler::testl(Register dst, int32_t imm32) { 2813 // not using emit_arith because test 2814 // doesn't support sign-extension of 2815 // 8bit operands 2816 int encode = dst->encoding(); 2817 if (encode == 0) { 2818 emit_byte(0xA9); 2819 } else { 2820 encode = prefix_and_encode(encode); 2821 emit_byte(0xF7); 2822 emit_byte(0xC0 | encode); 2823 } 2824 emit_long(imm32); 2825 } 2826 2827 void Assembler::testl(Register dst, Register src) { 2828 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2829 emit_arith(0x85, 0xC0, dst, src); 2830 } 2831 2832 void Assembler::testl(Register dst, Address src) { 2833 InstructionMark im(this); 2834 prefix(src, dst); 2835 emit_byte(0x85); 2836 emit_operand(dst, src); 2837 } 2838 2839 void Assembler::ucomisd(XMMRegister dst, Address src) { 2840 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2841 emit_byte(0x66); 2842 ucomiss(dst, src); 2843 } 2844 2845 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { 2846 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2847 emit_byte(0x66); 2848 ucomiss(dst, src); 2849 } 2850 2851 void Assembler::ucomiss(XMMRegister dst, Address src) { 2852 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2853 2854 InstructionMark im(this); 2855 prefix(src, dst); 2856 emit_byte(0x0F); 2857 emit_byte(0x2E); 2858 emit_operand(dst, src); 2859 } 2860 2861 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { 2862 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2863 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2864 emit_byte(0x0F); 2865 emit_byte(0x2E); 2866 emit_byte(0xC0 | encode); 2867 } 2868 2869 2870 void Assembler::xaddl(Address dst, Register src) { 2871 InstructionMark im(this); 2872 prefix(dst, src); 2873 emit_byte(0x0F); 2874 emit_byte(0xC1); 2875 emit_operand(src, dst); 2876 } 2877 2878 void Assembler::xchgl(Register dst, Address src) { // xchg 2879 InstructionMark im(this); 2880 prefix(src, dst); 2881 emit_byte(0x87); 2882 emit_operand(dst, src); 2883 } 2884 2885 void Assembler::xchgl(Register dst, Register src) { 2886 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2887 emit_byte(0x87); 2888 emit_byte(0xc0 | encode); 2889 } 2890 2891 void Assembler::xorl(Register dst, int32_t imm32) { 2892 prefix(dst); 2893 emit_arith(0x81, 0xF0, dst, imm32); 2894 } 2895 2896 void Assembler::xorl(Register dst, Address src) { 2897 InstructionMark im(this); 2898 prefix(src, dst); 2899 emit_byte(0x33); 2900 emit_operand(dst, src); 2901 } 2902 2903 void Assembler::xorl(Register dst, Register src) { 2904 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2905 emit_arith(0x33, 0xC0, dst, src); 2906 } 2907 2908 void Assembler::xorpd(XMMRegister dst, XMMRegister src) { 2909 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2910 emit_byte(0x66); 2911 xorps(dst, src); 2912 } 2913 2914 void Assembler::xorpd(XMMRegister dst, Address src) { 2915 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2916 InstructionMark im(this); 2917 emit_byte(0x66); 2918 prefix(src, dst); 2919 emit_byte(0x0F); 2920 emit_byte(0x57); 2921 emit_operand(dst, src); 2922 } 2923 2924 2925 void Assembler::xorps(XMMRegister dst, XMMRegister src) { 2926 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2927 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2928 emit_byte(0x0F); 2929 emit_byte(0x57); 2930 emit_byte(0xC0 | encode); 2931 } 2932 2933 void Assembler::xorps(XMMRegister dst, Address src) { 2934 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2935 InstructionMark im(this); 2936 prefix(src, dst); 2937 emit_byte(0x0F); 2938 emit_byte(0x57); 2939 emit_operand(dst, src); 2940 } 2941 2942 #ifndef _LP64 2943 // 32bit only pieces of the assembler 2944 2945 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { 2946 // NO PREFIX AS NEVER 64BIT 2947 InstructionMark im(this); 2948 emit_byte(0x81); 2949 emit_byte(0xF8 | src1->encoding()); 2950 emit_data(imm32, rspec, 0); 2951 } 2952 2953 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { 2954 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs 2955 InstructionMark im(this); 2956 emit_byte(0x81); 2957 emit_operand(rdi, src1); 2958 emit_data(imm32, rspec, 0); 2959 } 2960 2961 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax, 2962 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded 2963 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. 2964 void Assembler::cmpxchg8(Address adr) { 2965 InstructionMark im(this); 2966 emit_byte(0x0F); 2967 emit_byte(0xc7); 2968 emit_operand(rcx, adr); 2969 } 2970 2971 void Assembler::decl(Register dst) { 2972 // Don't use it directly. Use MacroAssembler::decrementl() instead. 2973 emit_byte(0x48 | dst->encoding()); 2974 } 2975 2976 #endif // _LP64 2977 2978 // 64bit typically doesn't use the x87 but needs to for the trig funcs 2979 2980 void Assembler::fabs() { 2981 emit_byte(0xD9); 2982 emit_byte(0xE1); 2983 } 2984 2985 void Assembler::fadd(int i) { 2986 emit_farith(0xD8, 0xC0, i); 2987 } 2988 2989 void Assembler::fadd_d(Address src) { 2990 InstructionMark im(this); 2991 emit_byte(0xDC); 2992 emit_operand32(rax, src); 2993 } 2994 2995 void Assembler::fadd_s(Address src) { 2996 InstructionMark im(this); 2997 emit_byte(0xD8); 2998 emit_operand32(rax, src); 2999 } 3000 3001 void Assembler::fadda(int i) { 3002 emit_farith(0xDC, 0xC0, i); 3003 } 3004 3005 void Assembler::faddp(int i) { 3006 emit_farith(0xDE, 0xC0, i); 3007 } 3008 3009 void Assembler::fchs() { 3010 emit_byte(0xD9); 3011 emit_byte(0xE0); 3012 } 3013 3014 void Assembler::fcom(int i) { 3015 emit_farith(0xD8, 0xD0, i); 3016 } 3017 3018 void Assembler::fcomp(int i) { 3019 emit_farith(0xD8, 0xD8, i); 3020 } 3021 3022 void Assembler::fcomp_d(Address src) { 3023 InstructionMark im(this); 3024 emit_byte(0xDC); 3025 emit_operand32(rbx, src); 3026 } 3027 3028 void Assembler::fcomp_s(Address src) { 3029 InstructionMark im(this); 3030 emit_byte(0xD8); 3031 emit_operand32(rbx, src); 3032 } 3033 3034 void Assembler::fcompp() { 3035 emit_byte(0xDE); 3036 emit_byte(0xD9); 3037 } 3038 3039 void Assembler::fcos() { 3040 emit_byte(0xD9); 3041 emit_byte(0xFF); 3042 } 3043 3044 void Assembler::fdecstp() { 3045 emit_byte(0xD9); 3046 emit_byte(0xF6); 3047 } 3048 3049 void Assembler::fdiv(int i) { 3050 emit_farith(0xD8, 0xF0, i); 3051 } 3052 3053 void Assembler::fdiv_d(Address src) { 3054 InstructionMark im(this); 3055 emit_byte(0xDC); 3056 emit_operand32(rsi, src); 3057 } 3058 3059 void Assembler::fdiv_s(Address src) { 3060 InstructionMark im(this); 3061 emit_byte(0xD8); 3062 emit_operand32(rsi, src); 3063 } 3064 3065 void Assembler::fdiva(int i) { 3066 emit_farith(0xDC, 0xF8, i); 3067 } 3068 3069 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) 3070 // is erroneous for some of the floating-point instructions below. 3071 3072 void Assembler::fdivp(int i) { 3073 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) 3074 } 3075 3076 void Assembler::fdivr(int i) { 3077 emit_farith(0xD8, 0xF8, i); 3078 } 3079 3080 void Assembler::fdivr_d(Address src) { 3081 InstructionMark im(this); 3082 emit_byte(0xDC); 3083 emit_operand32(rdi, src); 3084 } 3085 3086 void Assembler::fdivr_s(Address src) { 3087 InstructionMark im(this); 3088 emit_byte(0xD8); 3089 emit_operand32(rdi, src); 3090 } 3091 3092 void Assembler::fdivra(int i) { 3093 emit_farith(0xDC, 0xF0, i); 3094 } 3095 3096 void Assembler::fdivrp(int i) { 3097 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) 3098 } 3099 3100 void Assembler::ffree(int i) { 3101 emit_farith(0xDD, 0xC0, i); 3102 } 3103 3104 void Assembler::fild_d(Address adr) { 3105 InstructionMark im(this); 3106 emit_byte(0xDF); 3107 emit_operand32(rbp, adr); 3108 } 3109 3110 void Assembler::fild_s(Address adr) { 3111 InstructionMark im(this); 3112 emit_byte(0xDB); 3113 emit_operand32(rax, adr); 3114 } 3115 3116 void Assembler::fincstp() { 3117 emit_byte(0xD9); 3118 emit_byte(0xF7); 3119 } 3120 3121 void Assembler::finit() { 3122 emit_byte(0x9B); 3123 emit_byte(0xDB); 3124 emit_byte(0xE3); 3125 } 3126 3127 void Assembler::fist_s(Address adr) { 3128 InstructionMark im(this); 3129 emit_byte(0xDB); 3130 emit_operand32(rdx, adr); 3131 } 3132 3133 void Assembler::fistp_d(Address adr) { 3134 InstructionMark im(this); 3135 emit_byte(0xDF); 3136 emit_operand32(rdi, adr); 3137 } 3138 3139 void Assembler::fistp_s(Address adr) { 3140 InstructionMark im(this); 3141 emit_byte(0xDB); 3142 emit_operand32(rbx, adr); 3143 } 3144 3145 void Assembler::fld1() { 3146 emit_byte(0xD9); 3147 emit_byte(0xE8); 3148 } 3149 3150 void Assembler::fld_d(Address adr) { 3151 InstructionMark im(this); 3152 emit_byte(0xDD); 3153 emit_operand32(rax, adr); 3154 } 3155 3156 void Assembler::fld_s(Address adr) { 3157 InstructionMark im(this); 3158 emit_byte(0xD9); 3159 emit_operand32(rax, adr); 3160 } 3161 3162 3163 void Assembler::fld_s(int index) { 3164 emit_farith(0xD9, 0xC0, index); 3165 } 3166 3167 void Assembler::fld_x(Address adr) { 3168 InstructionMark im(this); 3169 emit_byte(0xDB); 3170 emit_operand32(rbp, adr); 3171 } 3172 3173 void Assembler::fldcw(Address src) { 3174 InstructionMark im(this); 3175 emit_byte(0xd9); 3176 emit_operand32(rbp, src); 3177 } 3178 3179 void Assembler::fldenv(Address src) { 3180 InstructionMark im(this); 3181 emit_byte(0xD9); 3182 emit_operand32(rsp, src); 3183 } 3184 3185 void Assembler::fldlg2() { 3186 emit_byte(0xD9); 3187 emit_byte(0xEC); 3188 } 3189 3190 void Assembler::fldln2() { 3191 emit_byte(0xD9); 3192 emit_byte(0xED); 3193 } 3194 3195 void Assembler::fldz() { 3196 emit_byte(0xD9); 3197 emit_byte(0xEE); 3198 } 3199 3200 void Assembler::flog() { 3201 fldln2(); 3202 fxch(); 3203 fyl2x(); 3204 } 3205 3206 void Assembler::flog10() { 3207 fldlg2(); 3208 fxch(); 3209 fyl2x(); 3210 } 3211 3212 void Assembler::fmul(int i) { 3213 emit_farith(0xD8, 0xC8, i); 3214 } 3215 3216 void Assembler::fmul_d(Address src) { 3217 InstructionMark im(this); 3218 emit_byte(0xDC); 3219 emit_operand32(rcx, src); 3220 } 3221 3222 void Assembler::fmul_s(Address src) { 3223 InstructionMark im(this); 3224 emit_byte(0xD8); 3225 emit_operand32(rcx, src); 3226 } 3227 3228 void Assembler::fmula(int i) { 3229 emit_farith(0xDC, 0xC8, i); 3230 } 3231 3232 void Assembler::fmulp(int i) { 3233 emit_farith(0xDE, 0xC8, i); 3234 } 3235 3236 void Assembler::fnsave(Address dst) { 3237 InstructionMark im(this); 3238 emit_byte(0xDD); 3239 emit_operand32(rsi, dst); 3240 } 3241 3242 void Assembler::fnstcw(Address src) { 3243 InstructionMark im(this); 3244 emit_byte(0x9B); 3245 emit_byte(0xD9); 3246 emit_operand32(rdi, src); 3247 } 3248 3249 void Assembler::fnstsw_ax() { 3250 emit_byte(0xdF); 3251 emit_byte(0xE0); 3252 } 3253 3254 void Assembler::fprem() { 3255 emit_byte(0xD9); 3256 emit_byte(0xF8); 3257 } 3258 3259 void Assembler::fprem1() { 3260 emit_byte(0xD9); 3261 emit_byte(0xF5); 3262 } 3263 3264 void Assembler::frstor(Address src) { 3265 InstructionMark im(this); 3266 emit_byte(0xDD); 3267 emit_operand32(rsp, src); 3268 } 3269 3270 void Assembler::fsin() { 3271 emit_byte(0xD9); 3272 emit_byte(0xFE); 3273 } 3274 3275 void Assembler::fsqrt() { 3276 emit_byte(0xD9); 3277 emit_byte(0xFA); 3278 } 3279 3280 void Assembler::fst_d(Address adr) { 3281 InstructionMark im(this); 3282 emit_byte(0xDD); 3283 emit_operand32(rdx, adr); 3284 } 3285 3286 void Assembler::fst_s(Address adr) { 3287 InstructionMark im(this); 3288 emit_byte(0xD9); 3289 emit_operand32(rdx, adr); 3290 } 3291 3292 void Assembler::fstp_d(Address adr) { 3293 InstructionMark im(this); 3294 emit_byte(0xDD); 3295 emit_operand32(rbx, adr); 3296 } 3297 3298 void Assembler::fstp_d(int index) { 3299 emit_farith(0xDD, 0xD8, index); 3300 } 3301 3302 void Assembler::fstp_s(Address adr) { 3303 InstructionMark im(this); 3304 emit_byte(0xD9); 3305 emit_operand32(rbx, adr); 3306 } 3307 3308 void Assembler::fstp_x(Address adr) { 3309 InstructionMark im(this); 3310 emit_byte(0xDB); 3311 emit_operand32(rdi, adr); 3312 } 3313 3314 void Assembler::fsub(int i) { 3315 emit_farith(0xD8, 0xE0, i); 3316 } 3317 3318 void Assembler::fsub_d(Address src) { 3319 InstructionMark im(this); 3320 emit_byte(0xDC); 3321 emit_operand32(rsp, src); 3322 } 3323 3324 void Assembler::fsub_s(Address src) { 3325 InstructionMark im(this); 3326 emit_byte(0xD8); 3327 emit_operand32(rsp, src); 3328 } 3329 3330 void Assembler::fsuba(int i) { 3331 emit_farith(0xDC, 0xE8, i); 3332 } 3333 3334 void Assembler::fsubp(int i) { 3335 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) 3336 } 3337 3338 void Assembler::fsubr(int i) { 3339 emit_farith(0xD8, 0xE8, i); 3340 } 3341 3342 void Assembler::fsubr_d(Address src) { 3343 InstructionMark im(this); 3344 emit_byte(0xDC); 3345 emit_operand32(rbp, src); 3346 } 3347 3348 void Assembler::fsubr_s(Address src) { 3349 InstructionMark im(this); 3350 emit_byte(0xD8); 3351 emit_operand32(rbp, src); 3352 } 3353 3354 void Assembler::fsubra(int i) { 3355 emit_farith(0xDC, 0xE0, i); 3356 } 3357 3358 void Assembler::fsubrp(int i) { 3359 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) 3360 } 3361 3362 void Assembler::ftan() { 3363 emit_byte(0xD9); 3364 emit_byte(0xF2); 3365 emit_byte(0xDD); 3366 emit_byte(0xD8); 3367 } 3368 3369 void Assembler::ftst() { 3370 emit_byte(0xD9); 3371 emit_byte(0xE4); 3372 } 3373 3374 void Assembler::fucomi(int i) { 3375 // make sure the instruction is supported (introduced for P6, together with cmov) 3376 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3377 emit_farith(0xDB, 0xE8, i); 3378 } 3379 3380 void Assembler::fucomip(int i) { 3381 // make sure the instruction is supported (introduced for P6, together with cmov) 3382 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3383 emit_farith(0xDF, 0xE8, i); 3384 } 3385 3386 void Assembler::fwait() { 3387 emit_byte(0x9B); 3388 } 3389 3390 void Assembler::fxch(int i) { 3391 emit_farith(0xD9, 0xC8, i); 3392 } 3393 3394 void Assembler::fyl2x() { 3395 emit_byte(0xD9); 3396 emit_byte(0xF1); 3397 } 3398 3399 3400 #ifndef _LP64 3401 3402 void Assembler::incl(Register dst) { 3403 // Don't use it directly. Use MacroAssembler::incrementl() instead. 3404 emit_byte(0x40 | dst->encoding()); 3405 } 3406 3407 void Assembler::lea(Register dst, Address src) { 3408 leal(dst, src); 3409 } 3410 3411 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { 3412 InstructionMark im(this); 3413 emit_byte(0xC7); 3414 emit_operand(rax, dst); 3415 emit_data((int)imm32, rspec, 0); 3416 } 3417 3418 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { 3419 InstructionMark im(this); 3420 int encode = prefix_and_encode(dst->encoding()); 3421 emit_byte(0xB8 | encode); 3422 emit_data((int)imm32, rspec, 0); 3423 } 3424 3425 void Assembler::popa() { // 32bit 3426 emit_byte(0x61); 3427 } 3428 3429 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { 3430 InstructionMark im(this); 3431 emit_byte(0x68); 3432 emit_data(imm32, rspec, 0); 3433 } 3434 3435 void Assembler::pusha() { // 32bit 3436 emit_byte(0x60); 3437 } 3438 3439 void Assembler::set_byte_if_not_zero(Register dst) { 3440 emit_byte(0x0F); 3441 emit_byte(0x95); 3442 emit_byte(0xE0 | dst->encoding()); 3443 } 3444 3445 void Assembler::shldl(Register dst, Register src) { 3446 emit_byte(0x0F); 3447 emit_byte(0xA5); 3448 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3449 } 3450 3451 void Assembler::shrdl(Register dst, Register src) { 3452 emit_byte(0x0F); 3453 emit_byte(0xAD); 3454 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3455 } 3456 3457 #else // LP64 3458 3459 void Assembler::set_byte_if_not_zero(Register dst) { 3460 int enc = prefix_and_encode(dst->encoding(), true); 3461 emit_byte(0x0F); 3462 emit_byte(0x95); 3463 emit_byte(0xE0 | enc); 3464 } 3465 3466 // 64bit only pieces of the assembler 3467 // This should only be used by 64bit instructions that can use rip-relative 3468 // it cannot be used by instructions that want an immediate value. 3469 3470 bool Assembler::reachable(AddressLiteral adr) { 3471 int64_t disp; 3472 // None will force a 64bit literal to the code stream. Likely a placeholder 3473 // for something that will be patched later and we need to certain it will 3474 // always be reachable. 3475 if (adr.reloc() == relocInfo::none) { 3476 return false; 3477 } 3478 if (adr.reloc() == relocInfo::internal_word_type) { 3479 // This should be rip relative and easily reachable. 3480 return true; 3481 } 3482 if (adr.reloc() == relocInfo::virtual_call_type || 3483 adr.reloc() == relocInfo::opt_virtual_call_type || 3484 adr.reloc() == relocInfo::static_call_type || 3485 adr.reloc() == relocInfo::static_stub_type ) { 3486 // This should be rip relative within the code cache and easily 3487 // reachable until we get huge code caches. (At which point 3488 // ic code is going to have issues). 3489 return true; 3490 } 3491 if (adr.reloc() != relocInfo::external_word_type && 3492 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special 3493 adr.reloc() != relocInfo::poll_type && // relocs to identify them 3494 adr.reloc() != relocInfo::runtime_call_type ) { 3495 return false; 3496 } 3497 3498 // Stress the correction code 3499 if (ForceUnreachable) { 3500 // Must be runtimecall reloc, see if it is in the codecache 3501 // Flipping stuff in the codecache to be unreachable causes issues 3502 // with things like inline caches where the additional instructions 3503 // are not handled. 3504 if (CodeCache::find_blob(adr._target) == NULL) { 3505 return false; 3506 } 3507 } 3508 // For external_word_type/runtime_call_type if it is reachable from where we 3509 // are now (possibly a temp buffer) and where we might end up 3510 // anywhere in the codeCache then we are always reachable. 3511 // This would have to change if we ever save/restore shared code 3512 // to be more pessimistic. 3513 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int)); 3514 if (!is_simm32(disp)) return false; 3515 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int)); 3516 if (!is_simm32(disp)) return false; 3517 3518 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int)); 3519 3520 // Because rip relative is a disp + address_of_next_instruction and we 3521 // don't know the value of address_of_next_instruction we apply a fudge factor 3522 // to make sure we will be ok no matter the size of the instruction we get placed into. 3523 // We don't have to fudge the checks above here because they are already worst case. 3524 3525 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal 3526 // + 4 because better safe than sorry. 3527 const int fudge = 12 + 4; 3528 if (disp < 0) { 3529 disp -= fudge; 3530 } else { 3531 disp += fudge; 3532 } 3533 return is_simm32(disp); 3534 } 3535 3536 // Check if the polling page is not reachable from the code cache using rip-relative 3537 // addressing. 3538 bool Assembler::is_polling_page_far() { 3539 intptr_t addr = (intptr_t)os::get_polling_page(); 3540 return !is_simm32(addr - (intptr_t)CodeCache::low_bound()) || 3541 !is_simm32(addr - (intptr_t)CodeCache::high_bound()); 3542 } 3543 3544 void Assembler::emit_data64(jlong data, 3545 relocInfo::relocType rtype, 3546 int format) { 3547 if (rtype == relocInfo::none) { 3548 emit_long64(data); 3549 } else { 3550 emit_data64(data, Relocation::spec_simple(rtype), format); 3551 } 3552 } 3553 3554 void Assembler::emit_data64(jlong data, 3555 RelocationHolder const& rspec, 3556 int format) { 3557 assert(imm_operand == 0, "default format must be immediate in this file"); 3558 assert(imm_operand == format, "must be immediate"); 3559 assert(inst_mark() != NULL, "must be inside InstructionMark"); 3560 // Do not use AbstractAssembler::relocate, which is not intended for 3561 // embedded words. Instead, relocate to the enclosing instruction. 3562 code_section()->relocate(inst_mark(), rspec, format); 3563 #ifdef ASSERT 3564 check_relocation(rspec, format); 3565 #endif 3566 emit_long64(data); 3567 } 3568 3569 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { 3570 if (reg_enc >= 8) { 3571 prefix(REX_B); 3572 reg_enc -= 8; 3573 } else if (byteinst && reg_enc >= 4) { 3574 prefix(REX); 3575 } 3576 return reg_enc; 3577 } 3578 3579 int Assembler::prefixq_and_encode(int reg_enc) { 3580 if (reg_enc < 8) { 3581 prefix(REX_W); 3582 } else { 3583 prefix(REX_WB); 3584 reg_enc -= 8; 3585 } 3586 return reg_enc; 3587 } 3588 3589 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { 3590 if (dst_enc < 8) { 3591 if (src_enc >= 8) { 3592 prefix(REX_B); 3593 src_enc -= 8; 3594 } else if (byteinst && src_enc >= 4) { 3595 prefix(REX); 3596 } 3597 } else { 3598 if (src_enc < 8) { 3599 prefix(REX_R); 3600 } else { 3601 prefix(REX_RB); 3602 src_enc -= 8; 3603 } 3604 dst_enc -= 8; 3605 } 3606 return dst_enc << 3 | src_enc; 3607 } 3608 3609 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { 3610 if (dst_enc < 8) { 3611 if (src_enc < 8) { 3612 prefix(REX_W); 3613 } else { 3614 prefix(REX_WB); 3615 src_enc -= 8; 3616 } 3617 } else { 3618 if (src_enc < 8) { 3619 prefix(REX_WR); 3620 } else { 3621 prefix(REX_WRB); 3622 src_enc -= 8; 3623 } 3624 dst_enc -= 8; 3625 } 3626 return dst_enc << 3 | src_enc; 3627 } 3628 3629 void Assembler::prefix(Register reg) { 3630 if (reg->encoding() >= 8) { 3631 prefix(REX_B); 3632 } 3633 } 3634 3635 void Assembler::prefix(Address adr) { 3636 if (adr.base_needs_rex()) { 3637 if (adr.index_needs_rex()) { 3638 prefix(REX_XB); 3639 } else { 3640 prefix(REX_B); 3641 } 3642 } else { 3643 if (adr.index_needs_rex()) { 3644 prefix(REX_X); 3645 } 3646 } 3647 } 3648 3649 void Assembler::prefixq(Address adr) { 3650 if (adr.base_needs_rex()) { 3651 if (adr.index_needs_rex()) { 3652 prefix(REX_WXB); 3653 } else { 3654 prefix(REX_WB); 3655 } 3656 } else { 3657 if (adr.index_needs_rex()) { 3658 prefix(REX_WX); 3659 } else { 3660 prefix(REX_W); 3661 } 3662 } 3663 } 3664 3665 3666 void Assembler::prefix(Address adr, Register reg, bool byteinst) { 3667 if (reg->encoding() < 8) { 3668 if (adr.base_needs_rex()) { 3669 if (adr.index_needs_rex()) { 3670 prefix(REX_XB); 3671 } else { 3672 prefix(REX_B); 3673 } 3674 } else { 3675 if (adr.index_needs_rex()) { 3676 prefix(REX_X); 3677 } else if (reg->encoding() >= 4 ) { 3678 prefix(REX); 3679 } 3680 } 3681 } else { 3682 if (adr.base_needs_rex()) { 3683 if (adr.index_needs_rex()) { 3684 prefix(REX_RXB); 3685 } else { 3686 prefix(REX_RB); 3687 } 3688 } else { 3689 if (adr.index_needs_rex()) { 3690 prefix(REX_RX); 3691 } else { 3692 prefix(REX_R); 3693 } 3694 } 3695 } 3696 } 3697 3698 void Assembler::prefixq(Address adr, Register src) { 3699 if (src->encoding() < 8) { 3700 if (adr.base_needs_rex()) { 3701 if (adr.index_needs_rex()) { 3702 prefix(REX_WXB); 3703 } else { 3704 prefix(REX_WB); 3705 } 3706 } else { 3707 if (adr.index_needs_rex()) { 3708 prefix(REX_WX); 3709 } else { 3710 prefix(REX_W); 3711 } 3712 } 3713 } else { 3714 if (adr.base_needs_rex()) { 3715 if (adr.index_needs_rex()) { 3716 prefix(REX_WRXB); 3717 } else { 3718 prefix(REX_WRB); 3719 } 3720 } else { 3721 if (adr.index_needs_rex()) { 3722 prefix(REX_WRX); 3723 } else { 3724 prefix(REX_WR); 3725 } 3726 } 3727 } 3728 } 3729 3730 void Assembler::prefix(Address adr, XMMRegister reg) { 3731 if (reg->encoding() < 8) { 3732 if (adr.base_needs_rex()) { 3733 if (adr.index_needs_rex()) { 3734 prefix(REX_XB); 3735 } else { 3736 prefix(REX_B); 3737 } 3738 } else { 3739 if (adr.index_needs_rex()) { 3740 prefix(REX_X); 3741 } 3742 } 3743 } else { 3744 if (adr.base_needs_rex()) { 3745 if (adr.index_needs_rex()) { 3746 prefix(REX_RXB); 3747 } else { 3748 prefix(REX_RB); 3749 } 3750 } else { 3751 if (adr.index_needs_rex()) { 3752 prefix(REX_RX); 3753 } else { 3754 prefix(REX_R); 3755 } 3756 } 3757 } 3758 } 3759 3760 void Assembler::adcq(Register dst, int32_t imm32) { 3761 (void) prefixq_and_encode(dst->encoding()); 3762 emit_arith(0x81, 0xD0, dst, imm32); 3763 } 3764 3765 void Assembler::adcq(Register dst, Address src) { 3766 InstructionMark im(this); 3767 prefixq(src, dst); 3768 emit_byte(0x13); 3769 emit_operand(dst, src); 3770 } 3771 3772 void Assembler::adcq(Register dst, Register src) { 3773 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 3774 emit_arith(0x13, 0xC0, dst, src); 3775 } 3776 3777 void Assembler::addq(Address dst, int32_t imm32) { 3778 InstructionMark im(this); 3779 prefixq(dst); 3780 emit_arith_operand(0x81, rax, dst,imm32); 3781 } 3782 3783 void Assembler::addq(Address dst, Register src) { 3784 InstructionMark im(this); 3785 prefixq(dst, src); 3786 emit_byte(0x01); 3787 emit_operand(src, dst); 3788 } 3789 3790 void Assembler::addq(Register dst, int32_t imm32) { 3791 (void) prefixq_and_encode(dst->encoding()); 3792 emit_arith(0x81, 0xC0, dst, imm32); 3793 } 3794 3795 void Assembler::addq(Register dst, Address src) { 3796 InstructionMark im(this); 3797 prefixq(src, dst); 3798 emit_byte(0x03); 3799 emit_operand(dst, src); 3800 } 3801 3802 void Assembler::addq(Register dst, Register src) { 3803 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 3804 emit_arith(0x03, 0xC0, dst, src); 3805 } 3806 3807 void Assembler::andq(Register dst, int32_t imm32) { 3808 (void) prefixq_and_encode(dst->encoding()); 3809 emit_arith(0x81, 0xE0, dst, imm32); 3810 } 3811 3812 void Assembler::andq(Register dst, Address src) { 3813 InstructionMark im(this); 3814 prefixq(src, dst); 3815 emit_byte(0x23); 3816 emit_operand(dst, src); 3817 } 3818 3819 void Assembler::andq(Register dst, Register src) { 3820 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 3821 emit_arith(0x23, 0xC0, dst, src); 3822 } 3823 3824 void Assembler::bsfq(Register dst, Register src) { 3825 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3826 emit_byte(0x0F); 3827 emit_byte(0xBC); 3828 emit_byte(0xC0 | encode); 3829 } 3830 3831 void Assembler::bsrq(Register dst, Register src) { 3832 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 3833 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3834 emit_byte(0x0F); 3835 emit_byte(0xBD); 3836 emit_byte(0xC0 | encode); 3837 } 3838 3839 void Assembler::bswapq(Register reg) { 3840 int encode = prefixq_and_encode(reg->encoding()); 3841 emit_byte(0x0F); 3842 emit_byte(0xC8 | encode); 3843 } 3844 3845 void Assembler::cdqq() { 3846 prefix(REX_W); 3847 emit_byte(0x99); 3848 } 3849 3850 void Assembler::clflush(Address adr) { 3851 prefix(adr); 3852 emit_byte(0x0F); 3853 emit_byte(0xAE); 3854 emit_operand(rdi, adr); 3855 } 3856 3857 void Assembler::cmovq(Condition cc, Register dst, Register src) { 3858 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3859 emit_byte(0x0F); 3860 emit_byte(0x40 | cc); 3861 emit_byte(0xC0 | encode); 3862 } 3863 3864 void Assembler::cmovq(Condition cc, Register dst, Address src) { 3865 InstructionMark im(this); 3866 prefixq(src, dst); 3867 emit_byte(0x0F); 3868 emit_byte(0x40 | cc); 3869 emit_operand(dst, src); 3870 } 3871 3872 void Assembler::cmpq(Address dst, int32_t imm32) { 3873 InstructionMark im(this); 3874 prefixq(dst); 3875 emit_byte(0x81); 3876 emit_operand(rdi, dst, 4); 3877 emit_long(imm32); 3878 } 3879 3880 void Assembler::cmpq(Register dst, int32_t imm32) { 3881 (void) prefixq_and_encode(dst->encoding()); 3882 emit_arith(0x81, 0xF8, dst, imm32); 3883 } 3884 3885 void Assembler::cmpq(Address dst, Register src) { 3886 InstructionMark im(this); 3887 prefixq(dst, src); 3888 emit_byte(0x3B); 3889 emit_operand(src, dst); 3890 } 3891 3892 void Assembler::cmpq(Register dst, Register src) { 3893 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 3894 emit_arith(0x3B, 0xC0, dst, src); 3895 } 3896 3897 void Assembler::cmpq(Register dst, Address src) { 3898 InstructionMark im(this); 3899 prefixq(src, dst); 3900 emit_byte(0x3B); 3901 emit_operand(dst, src); 3902 } 3903 3904 void Assembler::cmpxchgq(Register reg, Address adr) { 3905 InstructionMark im(this); 3906 prefixq(adr, reg); 3907 emit_byte(0x0F); 3908 emit_byte(0xB1); 3909 emit_operand(reg, adr); 3910 } 3911 3912 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { 3913 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3914 emit_byte(0xF2); 3915 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3916 emit_byte(0x0F); 3917 emit_byte(0x2A); 3918 emit_byte(0xC0 | encode); 3919 } 3920 3921 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { 3922 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3923 emit_byte(0xF3); 3924 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3925 emit_byte(0x0F); 3926 emit_byte(0x2A); 3927 emit_byte(0xC0 | encode); 3928 } 3929 3930 void Assembler::cvttsd2siq(Register dst, XMMRegister src) { 3931 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3932 emit_byte(0xF2); 3933 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3934 emit_byte(0x0F); 3935 emit_byte(0x2C); 3936 emit_byte(0xC0 | encode); 3937 } 3938 3939 void Assembler::cvttss2siq(Register dst, XMMRegister src) { 3940 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3941 emit_byte(0xF3); 3942 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3943 emit_byte(0x0F); 3944 emit_byte(0x2C); 3945 emit_byte(0xC0 | encode); 3946 } 3947 3948 void Assembler::decl(Register dst) { 3949 // Don't use it directly. Use MacroAssembler::decrementl() instead. 3950 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 3951 int encode = prefix_and_encode(dst->encoding()); 3952 emit_byte(0xFF); 3953 emit_byte(0xC8 | encode); 3954 } 3955 3956 void Assembler::decq(Register dst) { 3957 // Don't use it directly. Use MacroAssembler::decrementq() instead. 3958 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3959 int encode = prefixq_and_encode(dst->encoding()); 3960 emit_byte(0xFF); 3961 emit_byte(0xC8 | encode); 3962 } 3963 3964 void Assembler::decq(Address dst) { 3965 // Don't use it directly. Use MacroAssembler::decrementq() instead. 3966 InstructionMark im(this); 3967 prefixq(dst); 3968 emit_byte(0xFF); 3969 emit_operand(rcx, dst); 3970 } 3971 3972 void Assembler::fxrstor(Address src) { 3973 prefixq(src); 3974 emit_byte(0x0F); 3975 emit_byte(0xAE); 3976 emit_operand(as_Register(1), src); 3977 } 3978 3979 void Assembler::fxsave(Address dst) { 3980 prefixq(dst); 3981 emit_byte(0x0F); 3982 emit_byte(0xAE); 3983 emit_operand(as_Register(0), dst); 3984 } 3985 3986 void Assembler::idivq(Register src) { 3987 int encode = prefixq_and_encode(src->encoding()); 3988 emit_byte(0xF7); 3989 emit_byte(0xF8 | encode); 3990 } 3991 3992 void Assembler::imulq(Register dst, Register src) { 3993 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3994 emit_byte(0x0F); 3995 emit_byte(0xAF); 3996 emit_byte(0xC0 | encode); 3997 } 3998 3999 void Assembler::imulq(Register dst, Register src, int value) { 4000 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4001 if (is8bit(value)) { 4002 emit_byte(0x6B); 4003 emit_byte(0xC0 | encode); 4004 emit_byte(value & 0xFF); 4005 } else { 4006 emit_byte(0x69); 4007 emit_byte(0xC0 | encode); 4008 emit_long(value); 4009 } 4010 } 4011 4012 void Assembler::incl(Register dst) { 4013 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4014 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4015 int encode = prefix_and_encode(dst->encoding()); 4016 emit_byte(0xFF); 4017 emit_byte(0xC0 | encode); 4018 } 4019 4020 void Assembler::incq(Register dst) { 4021 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4022 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4023 int encode = prefixq_and_encode(dst->encoding()); 4024 emit_byte(0xFF); 4025 emit_byte(0xC0 | encode); 4026 } 4027 4028 void Assembler::incq(Address dst) { 4029 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4030 InstructionMark im(this); 4031 prefixq(dst); 4032 emit_byte(0xFF); 4033 emit_operand(rax, dst); 4034 } 4035 4036 void Assembler::lea(Register dst, Address src) { 4037 leaq(dst, src); 4038 } 4039 4040 void Assembler::leaq(Register dst, Address src) { 4041 InstructionMark im(this); 4042 prefixq(src, dst); 4043 emit_byte(0x8D); 4044 emit_operand(dst, src); 4045 } 4046 4047 void Assembler::mov64(Register dst, int64_t imm64) { 4048 InstructionMark im(this); 4049 int encode = prefixq_and_encode(dst->encoding()); 4050 emit_byte(0xB8 | encode); 4051 emit_long64(imm64); 4052 } 4053 4054 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { 4055 InstructionMark im(this); 4056 int encode = prefixq_and_encode(dst->encoding()); 4057 emit_byte(0xB8 | encode); 4058 emit_data64(imm64, rspec); 4059 } 4060 4061 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4062 InstructionMark im(this); 4063 int encode = prefix_and_encode(dst->encoding()); 4064 emit_byte(0xB8 | encode); 4065 emit_data((int)imm32, rspec, narrow_oop_operand); 4066 } 4067 4068 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4069 InstructionMark im(this); 4070 prefix(dst); 4071 emit_byte(0xC7); 4072 emit_operand(rax, dst, 4); 4073 emit_data((int)imm32, rspec, narrow_oop_operand); 4074 } 4075 4076 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) { 4077 InstructionMark im(this); 4078 int encode = prefix_and_encode(src1->encoding()); 4079 emit_byte(0x81); 4080 emit_byte(0xF8 | encode); 4081 emit_data((int)imm32, rspec, narrow_oop_operand); 4082 } 4083 4084 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) { 4085 InstructionMark im(this); 4086 prefix(src1); 4087 emit_byte(0x81); 4088 emit_operand(rax, src1, 4); 4089 emit_data((int)imm32, rspec, narrow_oop_operand); 4090 } 4091 4092 void Assembler::lzcntq(Register dst, Register src) { 4093 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 4094 emit_byte(0xF3); 4095 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4096 emit_byte(0x0F); 4097 emit_byte(0xBD); 4098 emit_byte(0xC0 | encode); 4099 } 4100 4101 void Assembler::movdq(XMMRegister dst, Register src) { 4102 // table D-1 says MMX/SSE2 4103 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); 4104 emit_byte(0x66); 4105 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4106 emit_byte(0x0F); 4107 emit_byte(0x6E); 4108 emit_byte(0xC0 | encode); 4109 } 4110 4111 void Assembler::movdq(Register dst, XMMRegister src) { 4112 // table D-1 says MMX/SSE2 4113 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); 4114 emit_byte(0x66); 4115 // swap src/dst to get correct prefix 4116 int encode = prefixq_and_encode(src->encoding(), dst->encoding()); 4117 emit_byte(0x0F); 4118 emit_byte(0x7E); 4119 emit_byte(0xC0 | encode); 4120 } 4121 4122 void Assembler::movq(Register dst, Register src) { 4123 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4124 emit_byte(0x8B); 4125 emit_byte(0xC0 | encode); 4126 } 4127 4128 void Assembler::movq(Register dst, Address src) { 4129 InstructionMark im(this); 4130 prefixq(src, dst); 4131 emit_byte(0x8B); 4132 emit_operand(dst, src); 4133 } 4134 4135 void Assembler::movq(Address dst, Register src) { 4136 InstructionMark im(this); 4137 prefixq(dst, src); 4138 emit_byte(0x89); 4139 emit_operand(src, dst); 4140 } 4141 4142 void Assembler::movsbq(Register dst, Address src) { 4143 InstructionMark im(this); 4144 prefixq(src, dst); 4145 emit_byte(0x0F); 4146 emit_byte(0xBE); 4147 emit_operand(dst, src); 4148 } 4149 4150 void Assembler::movsbq(Register dst, Register src) { 4151 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4152 emit_byte(0x0F); 4153 emit_byte(0xBE); 4154 emit_byte(0xC0 | encode); 4155 } 4156 4157 void Assembler::movslq(Register dst, int32_t imm32) { 4158 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx) 4159 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx) 4160 // as a result we shouldn't use until tested at runtime... 4161 ShouldNotReachHere(); 4162 InstructionMark im(this); 4163 int encode = prefixq_and_encode(dst->encoding()); 4164 emit_byte(0xC7 | encode); 4165 emit_long(imm32); 4166 } 4167 4168 void Assembler::movslq(Address dst, int32_t imm32) { 4169 assert(is_simm32(imm32), "lost bits"); 4170 InstructionMark im(this); 4171 prefixq(dst); 4172 emit_byte(0xC7); 4173 emit_operand(rax, dst, 4); 4174 emit_long(imm32); 4175 } 4176 4177 void Assembler::movslq(Register dst, Address src) { 4178 InstructionMark im(this); 4179 prefixq(src, dst); 4180 emit_byte(0x63); 4181 emit_operand(dst, src); 4182 } 4183 4184 void Assembler::movslq(Register dst, Register src) { 4185 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4186 emit_byte(0x63); 4187 emit_byte(0xC0 | encode); 4188 } 4189 4190 void Assembler::movswq(Register dst, Address src) { 4191 InstructionMark im(this); 4192 prefixq(src, dst); 4193 emit_byte(0x0F); 4194 emit_byte(0xBF); 4195 emit_operand(dst, src); 4196 } 4197 4198 void Assembler::movswq(Register dst, Register src) { 4199 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4200 emit_byte(0x0F); 4201 emit_byte(0xBF); 4202 emit_byte(0xC0 | encode); 4203 } 4204 4205 void Assembler::movzbq(Register dst, Address src) { 4206 InstructionMark im(this); 4207 prefixq(src, dst); 4208 emit_byte(0x0F); 4209 emit_byte(0xB6); 4210 emit_operand(dst, src); 4211 } 4212 4213 void Assembler::movzbq(Register dst, Register src) { 4214 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4215 emit_byte(0x0F); 4216 emit_byte(0xB6); 4217 emit_byte(0xC0 | encode); 4218 } 4219 4220 void Assembler::movzwq(Register dst, Address src) { 4221 InstructionMark im(this); 4222 prefixq(src, dst); 4223 emit_byte(0x0F); 4224 emit_byte(0xB7); 4225 emit_operand(dst, src); 4226 } 4227 4228 void Assembler::movzwq(Register dst, Register src) { 4229 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4230 emit_byte(0x0F); 4231 emit_byte(0xB7); 4232 emit_byte(0xC0 | encode); 4233 } 4234 4235 void Assembler::negq(Register dst) { 4236 int encode = prefixq_and_encode(dst->encoding()); 4237 emit_byte(0xF7); 4238 emit_byte(0xD8 | encode); 4239 } 4240 4241 void Assembler::notq(Register dst) { 4242 int encode = prefixq_and_encode(dst->encoding()); 4243 emit_byte(0xF7); 4244 emit_byte(0xD0 | encode); 4245 } 4246 4247 void Assembler::orq(Address dst, int32_t imm32) { 4248 InstructionMark im(this); 4249 prefixq(dst); 4250 emit_byte(0x81); 4251 emit_operand(rcx, dst, 4); 4252 emit_long(imm32); 4253 } 4254 4255 void Assembler::orq(Register dst, int32_t imm32) { 4256 (void) prefixq_and_encode(dst->encoding()); 4257 emit_arith(0x81, 0xC8, dst, imm32); 4258 } 4259 4260 void Assembler::orq(Register dst, Address src) { 4261 InstructionMark im(this); 4262 prefixq(src, dst); 4263 emit_byte(0x0B); 4264 emit_operand(dst, src); 4265 } 4266 4267 void Assembler::orq(Register dst, Register src) { 4268 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4269 emit_arith(0x0B, 0xC0, dst, src); 4270 } 4271 4272 void Assembler::popa() { // 64bit 4273 movq(r15, Address(rsp, 0)); 4274 movq(r14, Address(rsp, wordSize)); 4275 movq(r13, Address(rsp, 2 * wordSize)); 4276 movq(r12, Address(rsp, 3 * wordSize)); 4277 movq(r11, Address(rsp, 4 * wordSize)); 4278 movq(r10, Address(rsp, 5 * wordSize)); 4279 movq(r9, Address(rsp, 6 * wordSize)); 4280 movq(r8, Address(rsp, 7 * wordSize)); 4281 movq(rdi, Address(rsp, 8 * wordSize)); 4282 movq(rsi, Address(rsp, 9 * wordSize)); 4283 movq(rbp, Address(rsp, 10 * wordSize)); 4284 // skip rsp 4285 movq(rbx, Address(rsp, 12 * wordSize)); 4286 movq(rdx, Address(rsp, 13 * wordSize)); 4287 movq(rcx, Address(rsp, 14 * wordSize)); 4288 movq(rax, Address(rsp, 15 * wordSize)); 4289 4290 addq(rsp, 16 * wordSize); 4291 } 4292 4293 void Assembler::popcntq(Register dst, Address src) { 4294 assert(VM_Version::supports_popcnt(), "must support"); 4295 InstructionMark im(this); 4296 emit_byte(0xF3); 4297 prefixq(src, dst); 4298 emit_byte(0x0F); 4299 emit_byte(0xB8); 4300 emit_operand(dst, src); 4301 } 4302 4303 void Assembler::popcntq(Register dst, Register src) { 4304 assert(VM_Version::supports_popcnt(), "must support"); 4305 emit_byte(0xF3); 4306 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4307 emit_byte(0x0F); 4308 emit_byte(0xB8); 4309 emit_byte(0xC0 | encode); 4310 } 4311 4312 void Assembler::popq(Address dst) { 4313 InstructionMark im(this); 4314 prefixq(dst); 4315 emit_byte(0x8F); 4316 emit_operand(rax, dst); 4317 } 4318 4319 void Assembler::pusha() { // 64bit 4320 // we have to store original rsp. ABI says that 128 bytes 4321 // below rsp are local scratch. 4322 movq(Address(rsp, -5 * wordSize), rsp); 4323 4324 subq(rsp, 16 * wordSize); 4325 4326 movq(Address(rsp, 15 * wordSize), rax); 4327 movq(Address(rsp, 14 * wordSize), rcx); 4328 movq(Address(rsp, 13 * wordSize), rdx); 4329 movq(Address(rsp, 12 * wordSize), rbx); 4330 // skip rsp 4331 movq(Address(rsp, 10 * wordSize), rbp); 4332 movq(Address(rsp, 9 * wordSize), rsi); 4333 movq(Address(rsp, 8 * wordSize), rdi); 4334 movq(Address(rsp, 7 * wordSize), r8); 4335 movq(Address(rsp, 6 * wordSize), r9); 4336 movq(Address(rsp, 5 * wordSize), r10); 4337 movq(Address(rsp, 4 * wordSize), r11); 4338 movq(Address(rsp, 3 * wordSize), r12); 4339 movq(Address(rsp, 2 * wordSize), r13); 4340 movq(Address(rsp, wordSize), r14); 4341 movq(Address(rsp, 0), r15); 4342 } 4343 4344 void Assembler::pushq(Address src) { 4345 InstructionMark im(this); 4346 prefixq(src); 4347 emit_byte(0xFF); 4348 emit_operand(rsi, src); 4349 } 4350 4351 void Assembler::rclq(Register dst, int imm8) { 4352 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4353 int encode = prefixq_and_encode(dst->encoding()); 4354 if (imm8 == 1) { 4355 emit_byte(0xD1); 4356 emit_byte(0xD0 | encode); 4357 } else { 4358 emit_byte(0xC1); 4359 emit_byte(0xD0 | encode); 4360 emit_byte(imm8); 4361 } 4362 } 4363 void Assembler::sarq(Register dst, int imm8) { 4364 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4365 int encode = prefixq_and_encode(dst->encoding()); 4366 if (imm8 == 1) { 4367 emit_byte(0xD1); 4368 emit_byte(0xF8 | encode); 4369 } else { 4370 emit_byte(0xC1); 4371 emit_byte(0xF8 | encode); 4372 emit_byte(imm8); 4373 } 4374 } 4375 4376 void Assembler::sarq(Register dst) { 4377 int encode = prefixq_and_encode(dst->encoding()); 4378 emit_byte(0xD3); 4379 emit_byte(0xF8 | encode); 4380 } 4381 4382 void Assembler::sbbq(Address dst, int32_t imm32) { 4383 InstructionMark im(this); 4384 prefixq(dst); 4385 emit_arith_operand(0x81, rbx, dst, imm32); 4386 } 4387 4388 void Assembler::sbbq(Register dst, int32_t imm32) { 4389 (void) prefixq_and_encode(dst->encoding()); 4390 emit_arith(0x81, 0xD8, dst, imm32); 4391 } 4392 4393 void Assembler::sbbq(Register dst, Address src) { 4394 InstructionMark im(this); 4395 prefixq(src, dst); 4396 emit_byte(0x1B); 4397 emit_operand(dst, src); 4398 } 4399 4400 void Assembler::sbbq(Register dst, Register src) { 4401 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4402 emit_arith(0x1B, 0xC0, dst, src); 4403 } 4404 4405 void Assembler::shlq(Register dst, int imm8) { 4406 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4407 int encode = prefixq_and_encode(dst->encoding()); 4408 if (imm8 == 1) { 4409 emit_byte(0xD1); 4410 emit_byte(0xE0 | encode); 4411 } else { 4412 emit_byte(0xC1); 4413 emit_byte(0xE0 | encode); 4414 emit_byte(imm8); 4415 } 4416 } 4417 4418 void Assembler::shlq(Register dst) { 4419 int encode = prefixq_and_encode(dst->encoding()); 4420 emit_byte(0xD3); 4421 emit_byte(0xE0 | encode); 4422 } 4423 4424 void Assembler::shrq(Register dst, int imm8) { 4425 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4426 int encode = prefixq_and_encode(dst->encoding()); 4427 emit_byte(0xC1); 4428 emit_byte(0xE8 | encode); 4429 emit_byte(imm8); 4430 } 4431 4432 void Assembler::shrq(Register dst) { 4433 int encode = prefixq_and_encode(dst->encoding()); 4434 emit_byte(0xD3); 4435 emit_byte(0xE8 | encode); 4436 } 4437 4438 void Assembler::subq(Address dst, int32_t imm32) { 4439 InstructionMark im(this); 4440 prefixq(dst); 4441 emit_arith_operand(0x81, rbp, dst, imm32); 4442 } 4443 4444 void Assembler::subq(Address dst, Register src) { 4445 InstructionMark im(this); 4446 prefixq(dst, src); 4447 emit_byte(0x29); 4448 emit_operand(src, dst); 4449 } 4450 4451 void Assembler::subq(Register dst, int32_t imm32) { 4452 (void) prefixq_and_encode(dst->encoding()); 4453 emit_arith(0x81, 0xE8, dst, imm32); 4454 } 4455 4456 void Assembler::subq(Register dst, Address src) { 4457 InstructionMark im(this); 4458 prefixq(src, dst); 4459 emit_byte(0x2B); 4460 emit_operand(dst, src); 4461 } 4462 4463 void Assembler::subq(Register dst, Register src) { 4464 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4465 emit_arith(0x2B, 0xC0, dst, src); 4466 } 4467 4468 void Assembler::testq(Register dst, int32_t imm32) { 4469 // not using emit_arith because test 4470 // doesn't support sign-extension of 4471 // 8bit operands 4472 int encode = dst->encoding(); 4473 if (encode == 0) { 4474 prefix(REX_W); 4475 emit_byte(0xA9); 4476 } else { 4477 encode = prefixq_and_encode(encode); 4478 emit_byte(0xF7); 4479 emit_byte(0xC0 | encode); 4480 } 4481 emit_long(imm32); 4482 } 4483 4484 void Assembler::testq(Register dst, Register src) { 4485 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4486 emit_arith(0x85, 0xC0, dst, src); 4487 } 4488 4489 void Assembler::xaddq(Address dst, Register src) { 4490 InstructionMark im(this); 4491 prefixq(dst, src); 4492 emit_byte(0x0F); 4493 emit_byte(0xC1); 4494 emit_operand(src, dst); 4495 } 4496 4497 void Assembler::xchgq(Register dst, Address src) { 4498 InstructionMark im(this); 4499 prefixq(src, dst); 4500 emit_byte(0x87); 4501 emit_operand(dst, src); 4502 } 4503 4504 void Assembler::xchgq(Register dst, Register src) { 4505 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4506 emit_byte(0x87); 4507 emit_byte(0xc0 | encode); 4508 } 4509 4510 void Assembler::xorq(Register dst, Register src) { 4511 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4512 emit_arith(0x33, 0xC0, dst, src); 4513 } 4514 4515 void Assembler::xorq(Register dst, Address src) { 4516 InstructionMark im(this); 4517 prefixq(src, dst); 4518 emit_byte(0x33); 4519 emit_operand(dst, src); 4520 } 4521 4522 #endif // !LP64 4523 4524 static Assembler::Condition reverse[] = { 4525 Assembler::noOverflow /* overflow = 0x0 */ , 4526 Assembler::overflow /* noOverflow = 0x1 */ , 4527 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 4528 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 4529 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 4530 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 4531 Assembler::above /* belowEqual = 0x6 */ , 4532 Assembler::belowEqual /* above = 0x7 */ , 4533 Assembler::positive /* negative = 0x8 */ , 4534 Assembler::negative /* positive = 0x9 */ , 4535 Assembler::noParity /* parity = 0xa */ , 4536 Assembler::parity /* noParity = 0xb */ , 4537 Assembler::greaterEqual /* less = 0xc */ , 4538 Assembler::less /* greaterEqual = 0xd */ , 4539 Assembler::greater /* lessEqual = 0xe */ , 4540 Assembler::lessEqual /* greater = 0xf, */ 4541 4542 }; 4543 4544 4545 // Implementation of MacroAssembler 4546 4547 // First all the versions that have distinct versions depending on 32/64 bit 4548 // Unless the difference is trivial (1 line or so). 4549 4550 #ifndef _LP64 4551 4552 // 32bit versions 4553 4554 Address MacroAssembler::as_Address(AddressLiteral adr) { 4555 return Address(adr.target(), adr.rspec()); 4556 } 4557 4558 Address MacroAssembler::as_Address(ArrayAddress adr) { 4559 return Address::make_array(adr); 4560 } 4561 4562 int MacroAssembler::biased_locking_enter(Register lock_reg, 4563 Register obj_reg, 4564 Register swap_reg, 4565 Register tmp_reg, 4566 bool swap_reg_contains_mark, 4567 Label& done, 4568 Label* slow_case, 4569 BiasedLockingCounters* counters) { 4570 assert(UseBiasedLocking, "why call this otherwise?"); 4571 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 4572 assert_different_registers(lock_reg, obj_reg, swap_reg); 4573 4574 if (PrintBiasedLockingStatistics && counters == NULL) 4575 counters = BiasedLocking::counters(); 4576 4577 bool need_tmp_reg = false; 4578 if (tmp_reg == noreg) { 4579 need_tmp_reg = true; 4580 tmp_reg = lock_reg; 4581 } else { 4582 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 4583 } 4584 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 4585 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 4586 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 4587 Address saved_mark_addr(lock_reg, 0); 4588 4589 // Biased locking 4590 // See whether the lock is currently biased toward our thread and 4591 // whether the epoch is still valid 4592 // Note that the runtime guarantees sufficient alignment of JavaThread 4593 // pointers to allow age to be placed into low bits 4594 // First check to see whether biasing is even enabled for this object 4595 Label cas_label; 4596 int null_check_offset = -1; 4597 if (!swap_reg_contains_mark) { 4598 null_check_offset = offset(); 4599 movl(swap_reg, mark_addr); 4600 } 4601 if (need_tmp_reg) { 4602 push(tmp_reg); 4603 } 4604 movl(tmp_reg, swap_reg); 4605 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 4606 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 4607 if (need_tmp_reg) { 4608 pop(tmp_reg); 4609 } 4610 jcc(Assembler::notEqual, cas_label); 4611 // The bias pattern is present in the object's header. Need to check 4612 // whether the bias owner and the epoch are both still current. 4613 // Note that because there is no current thread register on x86 we 4614 // need to store off the mark word we read out of the object to 4615 // avoid reloading it and needing to recheck invariants below. This 4616 // store is unfortunate but it makes the overall code shorter and 4617 // simpler. 4618 movl(saved_mark_addr, swap_reg); 4619 if (need_tmp_reg) { 4620 push(tmp_reg); 4621 } 4622 get_thread(tmp_reg); 4623 xorl(swap_reg, tmp_reg); 4624 if (swap_reg_contains_mark) { 4625 null_check_offset = offset(); 4626 } 4627 movl(tmp_reg, klass_addr); 4628 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4629 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 4630 if (need_tmp_reg) { 4631 pop(tmp_reg); 4632 } 4633 if (counters != NULL) { 4634 cond_inc32(Assembler::zero, 4635 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 4636 } 4637 jcc(Assembler::equal, done); 4638 4639 Label try_revoke_bias; 4640 Label try_rebias; 4641 4642 // At this point we know that the header has the bias pattern and 4643 // that we are not the bias owner in the current epoch. We need to 4644 // figure out more details about the state of the header in order to 4645 // know what operations can be legally performed on the object's 4646 // header. 4647 4648 // If the low three bits in the xor result aren't clear, that means 4649 // the prototype header is no longer biased and we have to revoke 4650 // the bias on this object. 4651 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 4652 jcc(Assembler::notZero, try_revoke_bias); 4653 4654 // Biasing is still enabled for this data type. See whether the 4655 // epoch of the current bias is still valid, meaning that the epoch 4656 // bits of the mark word are equal to the epoch bits of the 4657 // prototype header. (Note that the prototype header's epoch bits 4658 // only change at a safepoint.) If not, attempt to rebias the object 4659 // toward the current thread. Note that we must be absolutely sure 4660 // that the current epoch is invalid in order to do this because 4661 // otherwise the manipulations it performs on the mark word are 4662 // illegal. 4663 testl(swap_reg, markOopDesc::epoch_mask_in_place); 4664 jcc(Assembler::notZero, try_rebias); 4665 4666 // The epoch of the current bias is still valid but we know nothing 4667 // about the owner; it might be set or it might be clear. Try to 4668 // acquire the bias of the object using an atomic operation. If this 4669 // fails we will go in to the runtime to revoke the object's bias. 4670 // Note that we first construct the presumed unbiased header so we 4671 // don't accidentally blow away another thread's valid bias. 4672 movl(swap_reg, saved_mark_addr); 4673 andl(swap_reg, 4674 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 4675 if (need_tmp_reg) { 4676 push(tmp_reg); 4677 } 4678 get_thread(tmp_reg); 4679 orl(tmp_reg, swap_reg); 4680 if (os::is_MP()) { 4681 lock(); 4682 } 4683 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4684 if (need_tmp_reg) { 4685 pop(tmp_reg); 4686 } 4687 // If the biasing toward our thread failed, this means that 4688 // another thread succeeded in biasing it toward itself and we 4689 // need to revoke that bias. The revocation will occur in the 4690 // interpreter runtime in the slow case. 4691 if (counters != NULL) { 4692 cond_inc32(Assembler::zero, 4693 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 4694 } 4695 if (slow_case != NULL) { 4696 jcc(Assembler::notZero, *slow_case); 4697 } 4698 jmp(done); 4699 4700 bind(try_rebias); 4701 // At this point we know the epoch has expired, meaning that the 4702 // current "bias owner", if any, is actually invalid. Under these 4703 // circumstances _only_, we are allowed to use the current header's 4704 // value as the comparison value when doing the cas to acquire the 4705 // bias in the current epoch. In other words, we allow transfer of 4706 // the bias from one thread to another directly in this situation. 4707 // 4708 // FIXME: due to a lack of registers we currently blow away the age 4709 // bits in this situation. Should attempt to preserve them. 4710 if (need_tmp_reg) { 4711 push(tmp_reg); 4712 } 4713 get_thread(tmp_reg); 4714 movl(swap_reg, klass_addr); 4715 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4716 movl(swap_reg, saved_mark_addr); 4717 if (os::is_MP()) { 4718 lock(); 4719 } 4720 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4721 if (need_tmp_reg) { 4722 pop(tmp_reg); 4723 } 4724 // If the biasing toward our thread failed, then another thread 4725 // succeeded in biasing it toward itself and we need to revoke that 4726 // bias. The revocation will occur in the runtime in the slow case. 4727 if (counters != NULL) { 4728 cond_inc32(Assembler::zero, 4729 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 4730 } 4731 if (slow_case != NULL) { 4732 jcc(Assembler::notZero, *slow_case); 4733 } 4734 jmp(done); 4735 4736 bind(try_revoke_bias); 4737 // The prototype mark in the klass doesn't have the bias bit set any 4738 // more, indicating that objects of this data type are not supposed 4739 // to be biased any more. We are going to try to reset the mark of 4740 // this object to the prototype value and fall through to the 4741 // CAS-based locking scheme. Note that if our CAS fails, it means 4742 // that another thread raced us for the privilege of revoking the 4743 // bias of this particular object, so it's okay to continue in the 4744 // normal locking code. 4745 // 4746 // FIXME: due to a lack of registers we currently blow away the age 4747 // bits in this situation. Should attempt to preserve them. 4748 movl(swap_reg, saved_mark_addr); 4749 if (need_tmp_reg) { 4750 push(tmp_reg); 4751 } 4752 movl(tmp_reg, klass_addr); 4753 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4754 if (os::is_MP()) { 4755 lock(); 4756 } 4757 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4758 if (need_tmp_reg) { 4759 pop(tmp_reg); 4760 } 4761 // Fall through to the normal CAS-based lock, because no matter what 4762 // the result of the above CAS, some thread must have succeeded in 4763 // removing the bias bit from the object's header. 4764 if (counters != NULL) { 4765 cond_inc32(Assembler::zero, 4766 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 4767 } 4768 4769 bind(cas_label); 4770 4771 return null_check_offset; 4772 } 4773 void MacroAssembler::call_VM_leaf_base(address entry_point, 4774 int number_of_arguments) { 4775 call(RuntimeAddress(entry_point)); 4776 increment(rsp, number_of_arguments * wordSize); 4777 } 4778 4779 void MacroAssembler::cmpoop(Address src1, jobject obj) { 4780 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4781 } 4782 4783 void MacroAssembler::cmpoop(Register src1, jobject obj) { 4784 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4785 } 4786 4787 void MacroAssembler::extend_sign(Register hi, Register lo) { 4788 // According to Intel Doc. AP-526, "Integer Divide", p.18. 4789 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 4790 cdql(); 4791 } else { 4792 movl(hi, lo); 4793 sarl(hi, 31); 4794 } 4795 } 4796 4797 void MacroAssembler::fat_nop() { 4798 // A 5 byte nop that is safe for patching (see patch_verified_entry) 4799 emit_byte(0x26); // es: 4800 emit_byte(0x2e); // cs: 4801 emit_byte(0x64); // fs: 4802 emit_byte(0x65); // gs: 4803 emit_byte(0x90); 4804 } 4805 4806 void MacroAssembler::jC2(Register tmp, Label& L) { 4807 // set parity bit if FPU flag C2 is set (via rax) 4808 save_rax(tmp); 4809 fwait(); fnstsw_ax(); 4810 sahf(); 4811 restore_rax(tmp); 4812 // branch 4813 jcc(Assembler::parity, L); 4814 } 4815 4816 void MacroAssembler::jnC2(Register tmp, Label& L) { 4817 // set parity bit if FPU flag C2 is set (via rax) 4818 save_rax(tmp); 4819 fwait(); fnstsw_ax(); 4820 sahf(); 4821 restore_rax(tmp); 4822 // branch 4823 jcc(Assembler::noParity, L); 4824 } 4825 4826 // 32bit can do a case table jump in one instruction but we no longer allow the base 4827 // to be installed in the Address class 4828 void MacroAssembler::jump(ArrayAddress entry) { 4829 jmp(as_Address(entry)); 4830 } 4831 4832 // Note: y_lo will be destroyed 4833 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 4834 // Long compare for Java (semantics as described in JVM spec.) 4835 Label high, low, done; 4836 4837 cmpl(x_hi, y_hi); 4838 jcc(Assembler::less, low); 4839 jcc(Assembler::greater, high); 4840 // x_hi is the return register 4841 xorl(x_hi, x_hi); 4842 cmpl(x_lo, y_lo); 4843 jcc(Assembler::below, low); 4844 jcc(Assembler::equal, done); 4845 4846 bind(high); 4847 xorl(x_hi, x_hi); 4848 increment(x_hi); 4849 jmp(done); 4850 4851 bind(low); 4852 xorl(x_hi, x_hi); 4853 decrementl(x_hi); 4854 4855 bind(done); 4856 } 4857 4858 void MacroAssembler::lea(Register dst, AddressLiteral src) { 4859 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 4860 } 4861 4862 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 4863 // leal(dst, as_Address(adr)); 4864 // see note in movl as to why we must use a move 4865 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 4866 } 4867 4868 void MacroAssembler::leave() { 4869 mov(rsp, rbp); 4870 pop(rbp); 4871 } 4872 4873 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 4874 // Multiplication of two Java long values stored on the stack 4875 // as illustrated below. Result is in rdx:rax. 4876 // 4877 // rsp ---> [ ?? ] \ \ 4878 // .... | y_rsp_offset | 4879 // [ y_lo ] / (in bytes) | x_rsp_offset 4880 // [ y_hi ] | (in bytes) 4881 // .... | 4882 // [ x_lo ] / 4883 // [ x_hi ] 4884 // .... 4885 // 4886 // Basic idea: lo(result) = lo(x_lo * y_lo) 4887 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 4888 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 4889 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 4890 Label quick; 4891 // load x_hi, y_hi and check if quick 4892 // multiplication is possible 4893 movl(rbx, x_hi); 4894 movl(rcx, y_hi); 4895 movl(rax, rbx); 4896 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 4897 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 4898 // do full multiplication 4899 // 1st step 4900 mull(y_lo); // x_hi * y_lo 4901 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 4902 // 2nd step 4903 movl(rax, x_lo); 4904 mull(rcx); // x_lo * y_hi 4905 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 4906 // 3rd step 4907 bind(quick); // note: rbx, = 0 if quick multiply! 4908 movl(rax, x_lo); 4909 mull(y_lo); // x_lo * y_lo 4910 addl(rdx, rbx); // correct hi(x_lo * y_lo) 4911 } 4912 4913 void MacroAssembler::lneg(Register hi, Register lo) { 4914 negl(lo); 4915 adcl(hi, 0); 4916 negl(hi); 4917 } 4918 4919 void MacroAssembler::lshl(Register hi, Register lo) { 4920 // Java shift left long support (semantics as described in JVM spec., p.305) 4921 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 4922 // shift value is in rcx ! 4923 assert(hi != rcx, "must not use rcx"); 4924 assert(lo != rcx, "must not use rcx"); 4925 const Register s = rcx; // shift count 4926 const int n = BitsPerWord; 4927 Label L; 4928 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 4929 cmpl(s, n); // if (s < n) 4930 jcc(Assembler::less, L); // else (s >= n) 4931 movl(hi, lo); // x := x << n 4932 xorl(lo, lo); 4933 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 4934 bind(L); // s (mod n) < n 4935 shldl(hi, lo); // x := x << s 4936 shll(lo); 4937 } 4938 4939 4940 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 4941 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 4942 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 4943 assert(hi != rcx, "must not use rcx"); 4944 assert(lo != rcx, "must not use rcx"); 4945 const Register s = rcx; // shift count 4946 const int n = BitsPerWord; 4947 Label L; 4948 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 4949 cmpl(s, n); // if (s < n) 4950 jcc(Assembler::less, L); // else (s >= n) 4951 movl(lo, hi); // x := x >> n 4952 if (sign_extension) sarl(hi, 31); 4953 else xorl(hi, hi); 4954 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 4955 bind(L); // s (mod n) < n 4956 shrdl(lo, hi); // x := x >> s 4957 if (sign_extension) sarl(hi); 4958 else shrl(hi); 4959 } 4960 4961 void MacroAssembler::movoop(Register dst, jobject obj) { 4962 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4963 } 4964 4965 void MacroAssembler::movoop(Address dst, jobject obj) { 4966 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4967 } 4968 4969 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 4970 if (src.is_lval()) { 4971 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 4972 } else { 4973 movl(dst, as_Address(src)); 4974 } 4975 } 4976 4977 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 4978 movl(as_Address(dst), src); 4979 } 4980 4981 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 4982 movl(dst, as_Address(src)); 4983 } 4984 4985 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 4986 void MacroAssembler::movptr(Address dst, intptr_t src) { 4987 movl(dst, src); 4988 } 4989 4990 4991 void MacroAssembler::pop_callee_saved_registers() { 4992 pop(rcx); 4993 pop(rdx); 4994 pop(rdi); 4995 pop(rsi); 4996 } 4997 4998 void MacroAssembler::pop_fTOS() { 4999 fld_d(Address(rsp, 0)); 5000 addl(rsp, 2 * wordSize); 5001 } 5002 5003 void MacroAssembler::push_callee_saved_registers() { 5004 push(rsi); 5005 push(rdi); 5006 push(rdx); 5007 push(rcx); 5008 } 5009 5010 void MacroAssembler::push_fTOS() { 5011 subl(rsp, 2 * wordSize); 5012 fstp_d(Address(rsp, 0)); 5013 } 5014 5015 5016 void MacroAssembler::pushoop(jobject obj) { 5017 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 5018 } 5019 5020 5021 void MacroAssembler::pushptr(AddressLiteral src) { 5022 if (src.is_lval()) { 5023 push_literal32((int32_t)src.target(), src.rspec()); 5024 } else { 5025 pushl(as_Address(src)); 5026 } 5027 } 5028 5029 void MacroAssembler::set_word_if_not_zero(Register dst) { 5030 xorl(dst, dst); 5031 set_byte_if_not_zero(dst); 5032 } 5033 5034 static void pass_arg0(MacroAssembler* masm, Register arg) { 5035 masm->push(arg); 5036 } 5037 5038 static void pass_arg1(MacroAssembler* masm, Register arg) { 5039 masm->push(arg); 5040 } 5041 5042 static void pass_arg2(MacroAssembler* masm, Register arg) { 5043 masm->push(arg); 5044 } 5045 5046 static void pass_arg3(MacroAssembler* masm, Register arg) { 5047 masm->push(arg); 5048 } 5049 5050 #ifndef PRODUCT 5051 extern "C" void findpc(intptr_t x); 5052 #endif 5053 5054 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 5055 // In order to get locks to work, we need to fake a in_VM state 5056 JavaThread* thread = JavaThread::current(); 5057 JavaThreadState saved_state = thread->thread_state(); 5058 thread->set_thread_state(_thread_in_vm); 5059 if (ShowMessageBoxOnError) { 5060 JavaThread* thread = JavaThread::current(); 5061 JavaThreadState saved_state = thread->thread_state(); 5062 thread->set_thread_state(_thread_in_vm); 5063 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5064 ttyLocker ttyl; 5065 BytecodeCounter::print(); 5066 } 5067 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5068 // This is the value of eip which points to where verify_oop will return. 5069 if (os::message_box(msg, "Execution stopped, print registers?")) { 5070 ttyLocker ttyl; 5071 tty->print_cr("eip = 0x%08x", eip); 5072 #ifndef PRODUCT 5073 if ((WizardMode || Verbose) && PrintMiscellaneous) { 5074 tty->cr(); 5075 findpc(eip); 5076 tty->cr(); 5077 } 5078 #endif 5079 tty->print_cr("rax = 0x%08x", rax); 5080 tty->print_cr("rbx = 0x%08x", rbx); 5081 tty->print_cr("rcx = 0x%08x", rcx); 5082 tty->print_cr("rdx = 0x%08x", rdx); 5083 tty->print_cr("rdi = 0x%08x", rdi); 5084 tty->print_cr("rsi = 0x%08x", rsi); 5085 tty->print_cr("rbp = 0x%08x", rbp); 5086 tty->print_cr("rsp = 0x%08x", rsp); 5087 BREAKPOINT; 5088 assert(false, "start up GDB"); 5089 } 5090 } else { 5091 ttyLocker ttyl; 5092 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 5093 assert(false, "DEBUG MESSAGE"); 5094 } 5095 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5096 } 5097 5098 void MacroAssembler::stop(const char* msg) { 5099 ExternalAddress message((address)msg); 5100 // push address of message 5101 pushptr(message.addr()); 5102 { Label L; call(L, relocInfo::none); bind(L); } // push eip 5103 pusha(); // push registers 5104 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 5105 hlt(); 5106 } 5107 5108 void MacroAssembler::warn(const char* msg) { 5109 push_CPU_state(); 5110 5111 ExternalAddress message((address) msg); 5112 // push address of message 5113 pushptr(message.addr()); 5114 5115 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 5116 addl(rsp, wordSize); // discard argument 5117 pop_CPU_state(); 5118 } 5119 5120 #else // _LP64 5121 5122 // 64 bit versions 5123 5124 Address MacroAssembler::as_Address(AddressLiteral adr) { 5125 // amd64 always does this as a pc-rel 5126 // we can be absolute or disp based on the instruction type 5127 // jmp/call are displacements others are absolute 5128 assert(!adr.is_lval(), "must be rval"); 5129 assert(reachable(adr), "must be"); 5130 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 5131 5132 } 5133 5134 Address MacroAssembler::as_Address(ArrayAddress adr) { 5135 AddressLiteral base = adr.base(); 5136 lea(rscratch1, base); 5137 Address index = adr.index(); 5138 assert(index._disp == 0, "must not have disp"); // maybe it can? 5139 Address array(rscratch1, index._index, index._scale, index._disp); 5140 return array; 5141 } 5142 5143 int MacroAssembler::biased_locking_enter(Register lock_reg, 5144 Register obj_reg, 5145 Register swap_reg, 5146 Register tmp_reg, 5147 bool swap_reg_contains_mark, 5148 Label& done, 5149 Label* slow_case, 5150 BiasedLockingCounters* counters) { 5151 assert(UseBiasedLocking, "why call this otherwise?"); 5152 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 5153 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 5154 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 5155 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 5156 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 5157 Address saved_mark_addr(lock_reg, 0); 5158 5159 if (PrintBiasedLockingStatistics && counters == NULL) 5160 counters = BiasedLocking::counters(); 5161 5162 // Biased locking 5163 // See whether the lock is currently biased toward our thread and 5164 // whether the epoch is still valid 5165 // Note that the runtime guarantees sufficient alignment of JavaThread 5166 // pointers to allow age to be placed into low bits 5167 // First check to see whether biasing is even enabled for this object 5168 Label cas_label; 5169 int null_check_offset = -1; 5170 if (!swap_reg_contains_mark) { 5171 null_check_offset = offset(); 5172 movq(swap_reg, mark_addr); 5173 } 5174 movq(tmp_reg, swap_reg); 5175 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5176 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 5177 jcc(Assembler::notEqual, cas_label); 5178 // The bias pattern is present in the object's header. Need to check 5179 // whether the bias owner and the epoch are both still current. 5180 load_prototype_header(tmp_reg, obj_reg); 5181 orq(tmp_reg, r15_thread); 5182 xorq(tmp_reg, swap_reg); 5183 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 5184 if (counters != NULL) { 5185 cond_inc32(Assembler::zero, 5186 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5187 } 5188 jcc(Assembler::equal, done); 5189 5190 Label try_revoke_bias; 5191 Label try_rebias; 5192 5193 // At this point we know that the header has the bias pattern and 5194 // that we are not the bias owner in the current epoch. We need to 5195 // figure out more details about the state of the header in order to 5196 // know what operations can be legally performed on the object's 5197 // header. 5198 5199 // If the low three bits in the xor result aren't clear, that means 5200 // the prototype header is no longer biased and we have to revoke 5201 // the bias on this object. 5202 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5203 jcc(Assembler::notZero, try_revoke_bias); 5204 5205 // Biasing is still enabled for this data type. See whether the 5206 // epoch of the current bias is still valid, meaning that the epoch 5207 // bits of the mark word are equal to the epoch bits of the 5208 // prototype header. (Note that the prototype header's epoch bits 5209 // only change at a safepoint.) If not, attempt to rebias the object 5210 // toward the current thread. Note that we must be absolutely sure 5211 // that the current epoch is invalid in order to do this because 5212 // otherwise the manipulations it performs on the mark word are 5213 // illegal. 5214 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 5215 jcc(Assembler::notZero, try_rebias); 5216 5217 // The epoch of the current bias is still valid but we know nothing 5218 // about the owner; it might be set or it might be clear. Try to 5219 // acquire the bias of the object using an atomic operation. If this 5220 // fails we will go in to the runtime to revoke the object's bias. 5221 // Note that we first construct the presumed unbiased header so we 5222 // don't accidentally blow away another thread's valid bias. 5223 andq(swap_reg, 5224 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 5225 movq(tmp_reg, swap_reg); 5226 orq(tmp_reg, r15_thread); 5227 if (os::is_MP()) { 5228 lock(); 5229 } 5230 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5231 // If the biasing toward our thread failed, this means that 5232 // another thread succeeded in biasing it toward itself and we 5233 // need to revoke that bias. The revocation will occur in the 5234 // interpreter runtime in the slow case. 5235 if (counters != NULL) { 5236 cond_inc32(Assembler::zero, 5237 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5238 } 5239 if (slow_case != NULL) { 5240 jcc(Assembler::notZero, *slow_case); 5241 } 5242 jmp(done); 5243 5244 bind(try_rebias); 5245 // At this point we know the epoch has expired, meaning that the 5246 // current "bias owner", if any, is actually invalid. Under these 5247 // circumstances _only_, we are allowed to use the current header's 5248 // value as the comparison value when doing the cas to acquire the 5249 // bias in the current epoch. In other words, we allow transfer of 5250 // the bias from one thread to another directly in this situation. 5251 // 5252 // FIXME: due to a lack of registers we currently blow away the age 5253 // bits in this situation. Should attempt to preserve them. 5254 load_prototype_header(tmp_reg, obj_reg); 5255 orq(tmp_reg, r15_thread); 5256 if (os::is_MP()) { 5257 lock(); 5258 } 5259 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5260 // If the biasing toward our thread failed, then another thread 5261 // succeeded in biasing it toward itself and we need to revoke that 5262 // bias. The revocation will occur in the runtime in the slow case. 5263 if (counters != NULL) { 5264 cond_inc32(Assembler::zero, 5265 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 5266 } 5267 if (slow_case != NULL) { 5268 jcc(Assembler::notZero, *slow_case); 5269 } 5270 jmp(done); 5271 5272 bind(try_revoke_bias); 5273 // The prototype mark in the klass doesn't have the bias bit set any 5274 // more, indicating that objects of this data type are not supposed 5275 // to be biased any more. We are going to try to reset the mark of 5276 // this object to the prototype value and fall through to the 5277 // CAS-based locking scheme. Note that if our CAS fails, it means 5278 // that another thread raced us for the privilege of revoking the 5279 // bias of this particular object, so it's okay to continue in the 5280 // normal locking code. 5281 // 5282 // FIXME: due to a lack of registers we currently blow away the age 5283 // bits in this situation. Should attempt to preserve them. 5284 load_prototype_header(tmp_reg, obj_reg); 5285 if (os::is_MP()) { 5286 lock(); 5287 } 5288 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5289 // Fall through to the normal CAS-based lock, because no matter what 5290 // the result of the above CAS, some thread must have succeeded in 5291 // removing the bias bit from the object's header. 5292 if (counters != NULL) { 5293 cond_inc32(Assembler::zero, 5294 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 5295 } 5296 5297 bind(cas_label); 5298 5299 return null_check_offset; 5300 } 5301 5302 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 5303 Label L, E; 5304 5305 #ifdef _WIN64 5306 // Windows always allocates space for it's register args 5307 assert(num_args <= 4, "only register arguments supported"); 5308 subq(rsp, frame::arg_reg_save_area_bytes); 5309 #endif 5310 5311 // Align stack if necessary 5312 testl(rsp, 15); 5313 jcc(Assembler::zero, L); 5314 5315 subq(rsp, 8); 5316 { 5317 call(RuntimeAddress(entry_point)); 5318 } 5319 addq(rsp, 8); 5320 jmp(E); 5321 5322 bind(L); 5323 { 5324 call(RuntimeAddress(entry_point)); 5325 } 5326 5327 bind(E); 5328 5329 #ifdef _WIN64 5330 // restore stack pointer 5331 addq(rsp, frame::arg_reg_save_area_bytes); 5332 #endif 5333 5334 } 5335 5336 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 5337 assert(!src2.is_lval(), "should use cmpptr"); 5338 5339 if (reachable(src2)) { 5340 cmpq(src1, as_Address(src2)); 5341 } else { 5342 lea(rscratch1, src2); 5343 Assembler::cmpq(src1, Address(rscratch1, 0)); 5344 } 5345 } 5346 5347 int MacroAssembler::corrected_idivq(Register reg) { 5348 // Full implementation of Java ldiv and lrem; checks for special 5349 // case as described in JVM spec., p.243 & p.271. The function 5350 // returns the (pc) offset of the idivl instruction - may be needed 5351 // for implicit exceptions. 5352 // 5353 // normal case special case 5354 // 5355 // input : rax: dividend min_long 5356 // reg: divisor (may not be eax/edx) -1 5357 // 5358 // output: rax: quotient (= rax idiv reg) min_long 5359 // rdx: remainder (= rax irem reg) 0 5360 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 5361 static const int64_t min_long = 0x8000000000000000; 5362 Label normal_case, special_case; 5363 5364 // check for special case 5365 cmp64(rax, ExternalAddress((address) &min_long)); 5366 jcc(Assembler::notEqual, normal_case); 5367 xorl(rdx, rdx); // prepare rdx for possible special case (where 5368 // remainder = 0) 5369 cmpq(reg, -1); 5370 jcc(Assembler::equal, special_case); 5371 5372 // handle normal case 5373 bind(normal_case); 5374 cdqq(); 5375 int idivq_offset = offset(); 5376 idivq(reg); 5377 5378 // normal and special case exit 5379 bind(special_case); 5380 5381 return idivq_offset; 5382 } 5383 5384 void MacroAssembler::decrementq(Register reg, int value) { 5385 if (value == min_jint) { subq(reg, value); return; } 5386 if (value < 0) { incrementq(reg, -value); return; } 5387 if (value == 0) { ; return; } 5388 if (value == 1 && UseIncDec) { decq(reg) ; return; } 5389 /* else */ { subq(reg, value) ; return; } 5390 } 5391 5392 void MacroAssembler::decrementq(Address dst, int value) { 5393 if (value == min_jint) { subq(dst, value); return; } 5394 if (value < 0) { incrementq(dst, -value); return; } 5395 if (value == 0) { ; return; } 5396 if (value == 1 && UseIncDec) { decq(dst) ; return; } 5397 /* else */ { subq(dst, value) ; return; } 5398 } 5399 5400 void MacroAssembler::fat_nop() { 5401 // A 5 byte nop that is safe for patching (see patch_verified_entry) 5402 // Recommened sequence from 'Software Optimization Guide for the AMD 5403 // Hammer Processor' 5404 emit_byte(0x66); 5405 emit_byte(0x66); 5406 emit_byte(0x90); 5407 emit_byte(0x66); 5408 emit_byte(0x90); 5409 } 5410 5411 void MacroAssembler::incrementq(Register reg, int value) { 5412 if (value == min_jint) { addq(reg, value); return; } 5413 if (value < 0) { decrementq(reg, -value); return; } 5414 if (value == 0) { ; return; } 5415 if (value == 1 && UseIncDec) { incq(reg) ; return; } 5416 /* else */ { addq(reg, value) ; return; } 5417 } 5418 5419 void MacroAssembler::incrementq(Address dst, int value) { 5420 if (value == min_jint) { addq(dst, value); return; } 5421 if (value < 0) { decrementq(dst, -value); return; } 5422 if (value == 0) { ; return; } 5423 if (value == 1 && UseIncDec) { incq(dst) ; return; } 5424 /* else */ { addq(dst, value) ; return; } 5425 } 5426 5427 // 32bit can do a case table jump in one instruction but we no longer allow the base 5428 // to be installed in the Address class 5429 void MacroAssembler::jump(ArrayAddress entry) { 5430 lea(rscratch1, entry.base()); 5431 Address dispatch = entry.index(); 5432 assert(dispatch._base == noreg, "must be"); 5433 dispatch._base = rscratch1; 5434 jmp(dispatch); 5435 } 5436 5437 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5438 ShouldNotReachHere(); // 64bit doesn't use two regs 5439 cmpq(x_lo, y_lo); 5440 } 5441 5442 void MacroAssembler::lea(Register dst, AddressLiteral src) { 5443 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5444 } 5445 5446 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5447 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 5448 movptr(dst, rscratch1); 5449 } 5450 5451 void MacroAssembler::leave() { 5452 // %%% is this really better? Why not on 32bit too? 5453 emit_byte(0xC9); // LEAVE 5454 } 5455 5456 void MacroAssembler::lneg(Register hi, Register lo) { 5457 ShouldNotReachHere(); // 64bit doesn't use two regs 5458 negq(lo); 5459 } 5460 5461 void MacroAssembler::movoop(Register dst, jobject obj) { 5462 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5463 } 5464 5465 void MacroAssembler::movoop(Address dst, jobject obj) { 5466 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5467 movq(dst, rscratch1); 5468 } 5469 5470 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5471 if (src.is_lval()) { 5472 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5473 } else { 5474 if (reachable(src)) { 5475 movq(dst, as_Address(src)); 5476 } else { 5477 lea(rscratch1, src); 5478 movq(dst, Address(rscratch1,0)); 5479 } 5480 } 5481 } 5482 5483 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5484 movq(as_Address(dst), src); 5485 } 5486 5487 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5488 movq(dst, as_Address(src)); 5489 } 5490 5491 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 5492 void MacroAssembler::movptr(Address dst, intptr_t src) { 5493 mov64(rscratch1, src); 5494 movq(dst, rscratch1); 5495 } 5496 5497 // These are mostly for initializing NULL 5498 void MacroAssembler::movptr(Address dst, int32_t src) { 5499 movslq(dst, src); 5500 } 5501 5502 void MacroAssembler::movptr(Register dst, int32_t src) { 5503 mov64(dst, (intptr_t)src); 5504 } 5505 5506 void MacroAssembler::pushoop(jobject obj) { 5507 movoop(rscratch1, obj); 5508 push(rscratch1); 5509 } 5510 5511 void MacroAssembler::pushptr(AddressLiteral src) { 5512 lea(rscratch1, src); 5513 if (src.is_lval()) { 5514 push(rscratch1); 5515 } else { 5516 pushq(Address(rscratch1, 0)); 5517 } 5518 } 5519 5520 void MacroAssembler::reset_last_Java_frame(bool clear_fp, 5521 bool clear_pc) { 5522 // we must set sp to zero to clear frame 5523 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 5524 // must clear fp, so that compiled frames are not confused; it is 5525 // possible that we need it only for debugging 5526 if (clear_fp) { 5527 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 5528 } 5529 5530 if (clear_pc) { 5531 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 5532 } 5533 } 5534 5535 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 5536 Register last_java_fp, 5537 address last_java_pc) { 5538 // determine last_java_sp register 5539 if (!last_java_sp->is_valid()) { 5540 last_java_sp = rsp; 5541 } 5542 5543 // last_java_fp is optional 5544 if (last_java_fp->is_valid()) { 5545 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 5546 last_java_fp); 5547 } 5548 5549 // last_java_pc is optional 5550 if (last_java_pc != NULL) { 5551 Address java_pc(r15_thread, 5552 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 5553 lea(rscratch1, InternalAddress(last_java_pc)); 5554 movptr(java_pc, rscratch1); 5555 } 5556 5557 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 5558 } 5559 5560 static void pass_arg0(MacroAssembler* masm, Register arg) { 5561 if (c_rarg0 != arg ) { 5562 masm->mov(c_rarg0, arg); 5563 } 5564 } 5565 5566 static void pass_arg1(MacroAssembler* masm, Register arg) { 5567 if (c_rarg1 != arg ) { 5568 masm->mov(c_rarg1, arg); 5569 } 5570 } 5571 5572 static void pass_arg2(MacroAssembler* masm, Register arg) { 5573 if (c_rarg2 != arg ) { 5574 masm->mov(c_rarg2, arg); 5575 } 5576 } 5577 5578 static void pass_arg3(MacroAssembler* masm, Register arg) { 5579 if (c_rarg3 != arg ) { 5580 masm->mov(c_rarg3, arg); 5581 } 5582 } 5583 5584 void MacroAssembler::stop(const char* msg) { 5585 address rip = pc(); 5586 pusha(); // get regs on stack 5587 lea(c_rarg0, ExternalAddress((address) msg)); 5588 lea(c_rarg1, InternalAddress(rip)); 5589 movq(c_rarg2, rsp); // pass pointer to regs array 5590 andq(rsp, -16); // align stack as required by ABI 5591 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 5592 hlt(); 5593 } 5594 5595 void MacroAssembler::warn(const char* msg) { 5596 push(rsp); 5597 andq(rsp, -16); // align stack as required by push_CPU_state and call 5598 5599 push_CPU_state(); // keeps alignment at 16 bytes 5600 lea(c_rarg0, ExternalAddress((address) msg)); 5601 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 5602 pop_CPU_state(); 5603 pop(rsp); 5604 } 5605 5606 #ifndef PRODUCT 5607 extern "C" void findpc(intptr_t x); 5608 #endif 5609 5610 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 5611 // In order to get locks to work, we need to fake a in_VM state 5612 if (ShowMessageBoxOnError ) { 5613 JavaThread* thread = JavaThread::current(); 5614 JavaThreadState saved_state = thread->thread_state(); 5615 thread->set_thread_state(_thread_in_vm); 5616 #ifndef PRODUCT 5617 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5618 ttyLocker ttyl; 5619 BytecodeCounter::print(); 5620 } 5621 #endif 5622 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5623 // XXX correct this offset for amd64 5624 // This is the value of eip which points to where verify_oop will return. 5625 if (os::message_box(msg, "Execution stopped, print registers?")) { 5626 ttyLocker ttyl; 5627 tty->print_cr("rip = 0x%016lx", pc); 5628 #ifndef PRODUCT 5629 tty->cr(); 5630 findpc(pc); 5631 tty->cr(); 5632 #endif 5633 tty->print_cr("rax = 0x%016lx", regs[15]); 5634 tty->print_cr("rbx = 0x%016lx", regs[12]); 5635 tty->print_cr("rcx = 0x%016lx", regs[14]); 5636 tty->print_cr("rdx = 0x%016lx", regs[13]); 5637 tty->print_cr("rdi = 0x%016lx", regs[8]); 5638 tty->print_cr("rsi = 0x%016lx", regs[9]); 5639 tty->print_cr("rbp = 0x%016lx", regs[10]); 5640 tty->print_cr("rsp = 0x%016lx", regs[11]); 5641 tty->print_cr("r8 = 0x%016lx", regs[7]); 5642 tty->print_cr("r9 = 0x%016lx", regs[6]); 5643 tty->print_cr("r10 = 0x%016lx", regs[5]); 5644 tty->print_cr("r11 = 0x%016lx", regs[4]); 5645 tty->print_cr("r12 = 0x%016lx", regs[3]); 5646 tty->print_cr("r13 = 0x%016lx", regs[2]); 5647 tty->print_cr("r14 = 0x%016lx", regs[1]); 5648 tty->print_cr("r15 = 0x%016lx", regs[0]); 5649 BREAKPOINT; 5650 } 5651 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5652 } else { 5653 ttyLocker ttyl; 5654 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 5655 msg); 5656 } 5657 } 5658 5659 #endif // _LP64 5660 5661 // Now versions that are common to 32/64 bit 5662 5663 void MacroAssembler::addptr(Register dst, int32_t imm32) { 5664 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 5665 } 5666 5667 void MacroAssembler::addptr(Register dst, Register src) { 5668 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 5669 } 5670 5671 void MacroAssembler::addptr(Address dst, Register src) { 5672 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 5673 } 5674 5675 void MacroAssembler::align(int modulus) { 5676 if (offset() % modulus != 0) { 5677 nop(modulus - (offset() % modulus)); 5678 } 5679 } 5680 5681 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 5682 if (reachable(src)) { 5683 andpd(dst, as_Address(src)); 5684 } else { 5685 lea(rscratch1, src); 5686 andpd(dst, Address(rscratch1, 0)); 5687 } 5688 } 5689 5690 void MacroAssembler::andptr(Register dst, int32_t imm32) { 5691 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 5692 } 5693 5694 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 5695 pushf(); 5696 if (os::is_MP()) 5697 lock(); 5698 incrementl(counter_addr); 5699 popf(); 5700 } 5701 5702 // Writes to stack successive pages until offset reached to check for 5703 // stack overflow + shadow pages. This clobbers tmp. 5704 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 5705 movptr(tmp, rsp); 5706 // Bang stack for total size given plus shadow page size. 5707 // Bang one page at a time because large size can bang beyond yellow and 5708 // red zones. 5709 Label loop; 5710 bind(loop); 5711 movl(Address(tmp, (-os::vm_page_size())), size ); 5712 subptr(tmp, os::vm_page_size()); 5713 subl(size, os::vm_page_size()); 5714 jcc(Assembler::greater, loop); 5715 5716 // Bang down shadow pages too. 5717 // The -1 because we already subtracted 1 page. 5718 for (int i = 0; i< StackShadowPages-1; i++) { 5719 // this could be any sized move but this is can be a debugging crumb 5720 // so the bigger the better. 5721 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 5722 } 5723 } 5724 5725 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 5726 assert(UseBiasedLocking, "why call this otherwise?"); 5727 5728 // Check for biased locking unlock case, which is a no-op 5729 // Note: we do not have to check the thread ID for two reasons. 5730 // First, the interpreter checks for IllegalMonitorStateException at 5731 // a higher level. Second, if the bias was revoked while we held the 5732 // lock, the object could not be rebiased toward another thread, so 5733 // the bias bit would be clear. 5734 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 5735 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 5736 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 5737 jcc(Assembler::equal, done); 5738 } 5739 5740 void MacroAssembler::c2bool(Register x) { 5741 // implements x == 0 ? 0 : 1 5742 // note: must only look at least-significant byte of x 5743 // since C-style booleans are stored in one byte 5744 // only! (was bug) 5745 andl(x, 0xFF); 5746 setb(Assembler::notZero, x); 5747 } 5748 5749 // Wouldn't need if AddressLiteral version had new name 5750 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 5751 Assembler::call(L, rtype); 5752 } 5753 5754 void MacroAssembler::call(Register entry) { 5755 Assembler::call(entry); 5756 } 5757 5758 void MacroAssembler::call(AddressLiteral entry) { 5759 if (reachable(entry)) { 5760 Assembler::call_literal(entry.target(), entry.rspec()); 5761 } else { 5762 lea(rscratch1, entry); 5763 Assembler::call(rscratch1); 5764 } 5765 } 5766 5767 // Implementation of call_VM versions 5768 5769 void MacroAssembler::call_VM(Register oop_result, 5770 address entry_point, 5771 bool check_exceptions) { 5772 Label C, E; 5773 call(C, relocInfo::none); 5774 jmp(E); 5775 5776 bind(C); 5777 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 5778 ret(0); 5779 5780 bind(E); 5781 } 5782 5783 void MacroAssembler::call_VM(Register oop_result, 5784 address entry_point, 5785 Register arg_1, 5786 bool check_exceptions) { 5787 Label C, E; 5788 call(C, relocInfo::none); 5789 jmp(E); 5790 5791 bind(C); 5792 pass_arg1(this, arg_1); 5793 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 5794 ret(0); 5795 5796 bind(E); 5797 } 5798 5799 void MacroAssembler::call_VM(Register oop_result, 5800 address entry_point, 5801 Register arg_1, 5802 Register arg_2, 5803 bool check_exceptions) { 5804 Label C, E; 5805 call(C, relocInfo::none); 5806 jmp(E); 5807 5808 bind(C); 5809 5810 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5811 5812 pass_arg2(this, arg_2); 5813 pass_arg1(this, arg_1); 5814 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 5815 ret(0); 5816 5817 bind(E); 5818 } 5819 5820 void MacroAssembler::call_VM(Register oop_result, 5821 address entry_point, 5822 Register arg_1, 5823 Register arg_2, 5824 Register arg_3, 5825 bool check_exceptions) { 5826 Label C, E; 5827 call(C, relocInfo::none); 5828 jmp(E); 5829 5830 bind(C); 5831 5832 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 5833 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 5834 pass_arg3(this, arg_3); 5835 5836 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5837 pass_arg2(this, arg_2); 5838 5839 pass_arg1(this, arg_1); 5840 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 5841 ret(0); 5842 5843 bind(E); 5844 } 5845 5846 void MacroAssembler::call_VM(Register oop_result, 5847 Register last_java_sp, 5848 address entry_point, 5849 int number_of_arguments, 5850 bool check_exceptions) { 5851 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 5852 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 5853 } 5854 5855 void MacroAssembler::call_VM(Register oop_result, 5856 Register last_java_sp, 5857 address entry_point, 5858 Register arg_1, 5859 bool check_exceptions) { 5860 pass_arg1(this, arg_1); 5861 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 5862 } 5863 5864 void MacroAssembler::call_VM(Register oop_result, 5865 Register last_java_sp, 5866 address entry_point, 5867 Register arg_1, 5868 Register arg_2, 5869 bool check_exceptions) { 5870 5871 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5872 pass_arg2(this, arg_2); 5873 pass_arg1(this, arg_1); 5874 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 5875 } 5876 5877 void MacroAssembler::call_VM(Register oop_result, 5878 Register last_java_sp, 5879 address entry_point, 5880 Register arg_1, 5881 Register arg_2, 5882 Register arg_3, 5883 bool check_exceptions) { 5884 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 5885 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 5886 pass_arg3(this, arg_3); 5887 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5888 pass_arg2(this, arg_2); 5889 pass_arg1(this, arg_1); 5890 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 5891 } 5892 5893 void MacroAssembler::call_VM_base(Register oop_result, 5894 Register java_thread, 5895 Register last_java_sp, 5896 address entry_point, 5897 int number_of_arguments, 5898 bool check_exceptions) { 5899 // determine java_thread register 5900 if (!java_thread->is_valid()) { 5901 #ifdef _LP64 5902 java_thread = r15_thread; 5903 #else 5904 java_thread = rdi; 5905 get_thread(java_thread); 5906 #endif // LP64 5907 } 5908 // determine last_java_sp register 5909 if (!last_java_sp->is_valid()) { 5910 last_java_sp = rsp; 5911 } 5912 // debugging support 5913 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 5914 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 5915 #ifdef ASSERT 5916 LP64_ONLY(if (UseCompressedOops) verify_heapbase("call_VM_base");) 5917 #endif // ASSERT 5918 5919 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 5920 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 5921 5922 // push java thread (becomes first argument of C function) 5923 5924 NOT_LP64(push(java_thread); number_of_arguments++); 5925 LP64_ONLY(mov(c_rarg0, r15_thread)); 5926 5927 // set last Java frame before call 5928 assert(last_java_sp != rbp, "can't use ebp/rbp"); 5929 5930 // Only interpreter should have to set fp 5931 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 5932 5933 // do the call, remove parameters 5934 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 5935 5936 // restore the thread (cannot use the pushed argument since arguments 5937 // may be overwritten by C code generated by an optimizing compiler); 5938 // however can use the register value directly if it is callee saved. 5939 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 5940 // rdi & rsi (also r15) are callee saved -> nothing to do 5941 #ifdef ASSERT 5942 guarantee(java_thread != rax, "change this code"); 5943 push(rax); 5944 { Label L; 5945 get_thread(rax); 5946 cmpptr(java_thread, rax); 5947 jcc(Assembler::equal, L); 5948 stop("MacroAssembler::call_VM_base: rdi not callee saved?"); 5949 bind(L); 5950 } 5951 pop(rax); 5952 #endif 5953 } else { 5954 get_thread(java_thread); 5955 } 5956 // reset last Java frame 5957 // Only interpreter should have to clear fp 5958 reset_last_Java_frame(java_thread, true, false); 5959 5960 #ifndef CC_INTERP 5961 // C++ interp handles this in the interpreter 5962 check_and_handle_popframe(java_thread); 5963 check_and_handle_earlyret(java_thread); 5964 #endif /* CC_INTERP */ 5965 5966 if (check_exceptions) { 5967 // check for pending exceptions (java_thread is set upon return) 5968 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 5969 #ifndef _LP64 5970 jump_cc(Assembler::notEqual, 5971 RuntimeAddress(StubRoutines::forward_exception_entry())); 5972 #else 5973 // This used to conditionally jump to forward_exception however it is 5974 // possible if we relocate that the branch will not reach. So we must jump 5975 // around so we can always reach 5976 5977 Label ok; 5978 jcc(Assembler::equal, ok); 5979 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 5980 bind(ok); 5981 #endif // LP64 5982 } 5983 5984 // get oop result if there is one and reset the value in the thread 5985 if (oop_result->is_valid()) { 5986 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 5987 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 5988 verify_oop(oop_result, "broken oop in call_VM_base"); 5989 } 5990 } 5991 5992 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 5993 5994 // Calculate the value for last_Java_sp 5995 // somewhat subtle. call_VM does an intermediate call 5996 // which places a return address on the stack just under the 5997 // stack pointer as the user finsihed with it. This allows 5998 // use to retrieve last_Java_pc from last_Java_sp[-1]. 5999 // On 32bit we then have to push additional args on the stack to accomplish 6000 // the actual requested call. On 64bit call_VM only can use register args 6001 // so the only extra space is the return address that call_VM created. 6002 // This hopefully explains the calculations here. 6003 6004 #ifdef _LP64 6005 // We've pushed one address, correct last_Java_sp 6006 lea(rax, Address(rsp, wordSize)); 6007 #else 6008 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 6009 #endif // LP64 6010 6011 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 6012 6013 } 6014 6015 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 6016 call_VM_leaf_base(entry_point, number_of_arguments); 6017 } 6018 6019 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 6020 pass_arg0(this, arg_0); 6021 call_VM_leaf(entry_point, 1); 6022 } 6023 6024 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 6025 6026 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6027 pass_arg1(this, arg_1); 6028 pass_arg0(this, arg_0); 6029 call_VM_leaf(entry_point, 2); 6030 } 6031 6032 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 6033 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6034 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6035 pass_arg2(this, arg_2); 6036 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6037 pass_arg1(this, arg_1); 6038 pass_arg0(this, arg_0); 6039 call_VM_leaf(entry_point, 3); 6040 } 6041 6042 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 6043 } 6044 6045 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 6046 } 6047 6048 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 6049 if (reachable(src1)) { 6050 cmpl(as_Address(src1), imm); 6051 } else { 6052 lea(rscratch1, src1); 6053 cmpl(Address(rscratch1, 0), imm); 6054 } 6055 } 6056 6057 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 6058 assert(!src2.is_lval(), "use cmpptr"); 6059 if (reachable(src2)) { 6060 cmpl(src1, as_Address(src2)); 6061 } else { 6062 lea(rscratch1, src2); 6063 cmpl(src1, Address(rscratch1, 0)); 6064 } 6065 } 6066 6067 void MacroAssembler::cmp32(Register src1, int32_t imm) { 6068 Assembler::cmpl(src1, imm); 6069 } 6070 6071 void MacroAssembler::cmp32(Register src1, Address src2) { 6072 Assembler::cmpl(src1, src2); 6073 } 6074 6075 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6076 ucomisd(opr1, opr2); 6077 6078 Label L; 6079 if (unordered_is_less) { 6080 movl(dst, -1); 6081 jcc(Assembler::parity, L); 6082 jcc(Assembler::below , L); 6083 movl(dst, 0); 6084 jcc(Assembler::equal , L); 6085 increment(dst); 6086 } else { // unordered is greater 6087 movl(dst, 1); 6088 jcc(Assembler::parity, L); 6089 jcc(Assembler::above , L); 6090 movl(dst, 0); 6091 jcc(Assembler::equal , L); 6092 decrementl(dst); 6093 } 6094 bind(L); 6095 } 6096 6097 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6098 ucomiss(opr1, opr2); 6099 6100 Label L; 6101 if (unordered_is_less) { 6102 movl(dst, -1); 6103 jcc(Assembler::parity, L); 6104 jcc(Assembler::below , L); 6105 movl(dst, 0); 6106 jcc(Assembler::equal , L); 6107 increment(dst); 6108 } else { // unordered is greater 6109 movl(dst, 1); 6110 jcc(Assembler::parity, L); 6111 jcc(Assembler::above , L); 6112 movl(dst, 0); 6113 jcc(Assembler::equal , L); 6114 decrementl(dst); 6115 } 6116 bind(L); 6117 } 6118 6119 6120 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 6121 if (reachable(src1)) { 6122 cmpb(as_Address(src1), imm); 6123 } else { 6124 lea(rscratch1, src1); 6125 cmpb(Address(rscratch1, 0), imm); 6126 } 6127 } 6128 6129 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 6130 #ifdef _LP64 6131 if (src2.is_lval()) { 6132 movptr(rscratch1, src2); 6133 Assembler::cmpq(src1, rscratch1); 6134 } else if (reachable(src2)) { 6135 cmpq(src1, as_Address(src2)); 6136 } else { 6137 lea(rscratch1, src2); 6138 Assembler::cmpq(src1, Address(rscratch1, 0)); 6139 } 6140 #else 6141 if (src2.is_lval()) { 6142 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 6143 } else { 6144 cmpl(src1, as_Address(src2)); 6145 } 6146 #endif // _LP64 6147 } 6148 6149 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 6150 assert(src2.is_lval(), "not a mem-mem compare"); 6151 #ifdef _LP64 6152 // moves src2's literal address 6153 movptr(rscratch1, src2); 6154 Assembler::cmpq(src1, rscratch1); 6155 #else 6156 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 6157 #endif // _LP64 6158 } 6159 6160 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 6161 if (reachable(adr)) { 6162 if (os::is_MP()) 6163 lock(); 6164 cmpxchgptr(reg, as_Address(adr)); 6165 } else { 6166 lea(rscratch1, adr); 6167 if (os::is_MP()) 6168 lock(); 6169 cmpxchgptr(reg, Address(rscratch1, 0)); 6170 } 6171 } 6172 6173 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 6174 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 6175 } 6176 6177 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 6178 if (reachable(src)) { 6179 comisd(dst, as_Address(src)); 6180 } else { 6181 lea(rscratch1, src); 6182 comisd(dst, Address(rscratch1, 0)); 6183 } 6184 } 6185 6186 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 6187 if (reachable(src)) { 6188 comiss(dst, as_Address(src)); 6189 } else { 6190 lea(rscratch1, src); 6191 comiss(dst, Address(rscratch1, 0)); 6192 } 6193 } 6194 6195 6196 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 6197 Condition negated_cond = negate_condition(cond); 6198 Label L; 6199 jcc(negated_cond, L); 6200 atomic_incl(counter_addr); 6201 bind(L); 6202 } 6203 6204 int MacroAssembler::corrected_idivl(Register reg) { 6205 // Full implementation of Java idiv and irem; checks for 6206 // special case as described in JVM spec., p.243 & p.271. 6207 // The function returns the (pc) offset of the idivl 6208 // instruction - may be needed for implicit exceptions. 6209 // 6210 // normal case special case 6211 // 6212 // input : rax,: dividend min_int 6213 // reg: divisor (may not be rax,/rdx) -1 6214 // 6215 // output: rax,: quotient (= rax, idiv reg) min_int 6216 // rdx: remainder (= rax, irem reg) 0 6217 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 6218 const int min_int = 0x80000000; 6219 Label normal_case, special_case; 6220 6221 // check for special case 6222 cmpl(rax, min_int); 6223 jcc(Assembler::notEqual, normal_case); 6224 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 6225 cmpl(reg, -1); 6226 jcc(Assembler::equal, special_case); 6227 6228 // handle normal case 6229 bind(normal_case); 6230 cdql(); 6231 int idivl_offset = offset(); 6232 idivl(reg); 6233 6234 // normal and special case exit 6235 bind(special_case); 6236 6237 return idivl_offset; 6238 } 6239 6240 6241 6242 void MacroAssembler::decrementl(Register reg, int value) { 6243 if (value == min_jint) {subl(reg, value) ; return; } 6244 if (value < 0) { incrementl(reg, -value); return; } 6245 if (value == 0) { ; return; } 6246 if (value == 1 && UseIncDec) { decl(reg) ; return; } 6247 /* else */ { subl(reg, value) ; return; } 6248 } 6249 6250 void MacroAssembler::decrementl(Address dst, int value) { 6251 if (value == min_jint) {subl(dst, value) ; return; } 6252 if (value < 0) { incrementl(dst, -value); return; } 6253 if (value == 0) { ; return; } 6254 if (value == 1 && UseIncDec) { decl(dst) ; return; } 6255 /* else */ { subl(dst, value) ; return; } 6256 } 6257 6258 void MacroAssembler::division_with_shift (Register reg, int shift_value) { 6259 assert (shift_value > 0, "illegal shift value"); 6260 Label _is_positive; 6261 testl (reg, reg); 6262 jcc (Assembler::positive, _is_positive); 6263 int offset = (1 << shift_value) - 1 ; 6264 6265 if (offset == 1) { 6266 incrementl(reg); 6267 } else { 6268 addl(reg, offset); 6269 } 6270 6271 bind (_is_positive); 6272 sarl(reg, shift_value); 6273 } 6274 6275 // !defined(COMPILER2) is because of stupid core builds 6276 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 6277 void MacroAssembler::empty_FPU_stack() { 6278 if (VM_Version::supports_mmx()) { 6279 emms(); 6280 } else { 6281 for (int i = 8; i-- > 0; ) ffree(i); 6282 } 6283 } 6284 #endif // !LP64 || C1 || !C2 6285 6286 6287 // Defines obj, preserves var_size_in_bytes 6288 void MacroAssembler::eden_allocate(Register obj, 6289 Register var_size_in_bytes, 6290 int con_size_in_bytes, 6291 Register t1, 6292 Label& slow_case) { 6293 assert(obj == rax, "obj must be in rax, for cmpxchg"); 6294 assert_different_registers(obj, var_size_in_bytes, t1); 6295 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 6296 jmp(slow_case); 6297 } else { 6298 Register end = t1; 6299 Label retry; 6300 bind(retry); 6301 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 6302 movptr(obj, heap_top); 6303 if (var_size_in_bytes == noreg) { 6304 lea(end, Address(obj, con_size_in_bytes)); 6305 } else { 6306 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 6307 } 6308 // if end < obj then we wrapped around => object too long => slow case 6309 cmpptr(end, obj); 6310 jcc(Assembler::below, slow_case); 6311 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 6312 jcc(Assembler::above, slow_case); 6313 // Compare obj with the top addr, and if still equal, store the new top addr in 6314 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 6315 // it otherwise. Use lock prefix for atomicity on MPs. 6316 locked_cmpxchgptr(end, heap_top); 6317 jcc(Assembler::notEqual, retry); 6318 } 6319 } 6320 6321 void MacroAssembler::enter() { 6322 push(rbp); 6323 mov(rbp, rsp); 6324 } 6325 6326 void MacroAssembler::fcmp(Register tmp) { 6327 fcmp(tmp, 1, true, true); 6328 } 6329 6330 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 6331 assert(!pop_right || pop_left, "usage error"); 6332 if (VM_Version::supports_cmov()) { 6333 assert(tmp == noreg, "unneeded temp"); 6334 if (pop_left) { 6335 fucomip(index); 6336 } else { 6337 fucomi(index); 6338 } 6339 if (pop_right) { 6340 fpop(); 6341 } 6342 } else { 6343 assert(tmp != noreg, "need temp"); 6344 if (pop_left) { 6345 if (pop_right) { 6346 fcompp(); 6347 } else { 6348 fcomp(index); 6349 } 6350 } else { 6351 fcom(index); 6352 } 6353 // convert FPU condition into eflags condition via rax, 6354 save_rax(tmp); 6355 fwait(); fnstsw_ax(); 6356 sahf(); 6357 restore_rax(tmp); 6358 } 6359 // condition codes set as follows: 6360 // 6361 // CF (corresponds to C0) if x < y 6362 // PF (corresponds to C2) if unordered 6363 // ZF (corresponds to C3) if x = y 6364 } 6365 6366 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 6367 fcmp2int(dst, unordered_is_less, 1, true, true); 6368 } 6369 6370 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 6371 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 6372 Label L; 6373 if (unordered_is_less) { 6374 movl(dst, -1); 6375 jcc(Assembler::parity, L); 6376 jcc(Assembler::below , L); 6377 movl(dst, 0); 6378 jcc(Assembler::equal , L); 6379 increment(dst); 6380 } else { // unordered is greater 6381 movl(dst, 1); 6382 jcc(Assembler::parity, L); 6383 jcc(Assembler::above , L); 6384 movl(dst, 0); 6385 jcc(Assembler::equal , L); 6386 decrementl(dst); 6387 } 6388 bind(L); 6389 } 6390 6391 void MacroAssembler::fld_d(AddressLiteral src) { 6392 fld_d(as_Address(src)); 6393 } 6394 6395 void MacroAssembler::fld_s(AddressLiteral src) { 6396 fld_s(as_Address(src)); 6397 } 6398 6399 void MacroAssembler::fld_x(AddressLiteral src) { 6400 Assembler::fld_x(as_Address(src)); 6401 } 6402 6403 void MacroAssembler::fldcw(AddressLiteral src) { 6404 Assembler::fldcw(as_Address(src)); 6405 } 6406 6407 void MacroAssembler::fpop() { 6408 ffree(); 6409 fincstp(); 6410 } 6411 6412 void MacroAssembler::fremr(Register tmp) { 6413 save_rax(tmp); 6414 { Label L; 6415 bind(L); 6416 fprem(); 6417 fwait(); fnstsw_ax(); 6418 #ifdef _LP64 6419 testl(rax, 0x400); 6420 jcc(Assembler::notEqual, L); 6421 #else 6422 sahf(); 6423 jcc(Assembler::parity, L); 6424 #endif // _LP64 6425 } 6426 restore_rax(tmp); 6427 // Result is in ST0. 6428 // Note: fxch & fpop to get rid of ST1 6429 // (otherwise FPU stack could overflow eventually) 6430 fxch(1); 6431 fpop(); 6432 } 6433 6434 6435 void MacroAssembler::incrementl(AddressLiteral dst) { 6436 if (reachable(dst)) { 6437 incrementl(as_Address(dst)); 6438 } else { 6439 lea(rscratch1, dst); 6440 incrementl(Address(rscratch1, 0)); 6441 } 6442 } 6443 6444 void MacroAssembler::incrementl(ArrayAddress dst) { 6445 incrementl(as_Address(dst)); 6446 } 6447 6448 void MacroAssembler::incrementl(Register reg, int value) { 6449 if (value == min_jint) {addl(reg, value) ; return; } 6450 if (value < 0) { decrementl(reg, -value); return; } 6451 if (value == 0) { ; return; } 6452 if (value == 1 && UseIncDec) { incl(reg) ; return; } 6453 /* else */ { addl(reg, value) ; return; } 6454 } 6455 6456 void MacroAssembler::incrementl(Address dst, int value) { 6457 if (value == min_jint) {addl(dst, value) ; return; } 6458 if (value < 0) { decrementl(dst, -value); return; } 6459 if (value == 0) { ; return; } 6460 if (value == 1 && UseIncDec) { incl(dst) ; return; } 6461 /* else */ { addl(dst, value) ; return; } 6462 } 6463 6464 void MacroAssembler::jump(AddressLiteral dst) { 6465 if (reachable(dst)) { 6466 jmp_literal(dst.target(), dst.rspec()); 6467 } else { 6468 lea(rscratch1, dst); 6469 jmp(rscratch1); 6470 } 6471 } 6472 6473 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 6474 if (reachable(dst)) { 6475 InstructionMark im(this); 6476 relocate(dst.reloc()); 6477 const int short_size = 2; 6478 const int long_size = 6; 6479 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos); 6480 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 6481 // 0111 tttn #8-bit disp 6482 emit_byte(0x70 | cc); 6483 emit_byte((offs - short_size) & 0xFF); 6484 } else { 6485 // 0000 1111 1000 tttn #32-bit disp 6486 emit_byte(0x0F); 6487 emit_byte(0x80 | cc); 6488 emit_long(offs - long_size); 6489 } 6490 } else { 6491 #ifdef ASSERT 6492 warning("reversing conditional branch"); 6493 #endif /* ASSERT */ 6494 Label skip; 6495 jccb(reverse[cc], skip); 6496 lea(rscratch1, dst); 6497 Assembler::jmp(rscratch1); 6498 bind(skip); 6499 } 6500 } 6501 6502 void MacroAssembler::ldmxcsr(AddressLiteral src) { 6503 if (reachable(src)) { 6504 Assembler::ldmxcsr(as_Address(src)); 6505 } else { 6506 lea(rscratch1, src); 6507 Assembler::ldmxcsr(Address(rscratch1, 0)); 6508 } 6509 } 6510 6511 int MacroAssembler::load_signed_byte(Register dst, Address src) { 6512 int off; 6513 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6514 off = offset(); 6515 movsbl(dst, src); // movsxb 6516 } else { 6517 off = load_unsigned_byte(dst, src); 6518 shll(dst, 24); 6519 sarl(dst, 24); 6520 } 6521 return off; 6522 } 6523 6524 // Note: load_signed_short used to be called load_signed_word. 6525 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler 6526 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. 6527 // The term "word" in HotSpot means a 32- or 64-bit machine word. 6528 int MacroAssembler::load_signed_short(Register dst, Address src) { 6529 int off; 6530 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6531 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 6532 // version but this is what 64bit has always done. This seems to imply 6533 // that users are only using 32bits worth. 6534 off = offset(); 6535 movswl(dst, src); // movsxw 6536 } else { 6537 off = load_unsigned_short(dst, src); 6538 shll(dst, 16); 6539 sarl(dst, 16); 6540 } 6541 return off; 6542 } 6543 6544 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 6545 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 6546 // and "3.9 Partial Register Penalties", p. 22). 6547 int off; 6548 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 6549 off = offset(); 6550 movzbl(dst, src); // movzxb 6551 } else { 6552 xorl(dst, dst); 6553 off = offset(); 6554 movb(dst, src); 6555 } 6556 return off; 6557 } 6558 6559 // Note: load_unsigned_short used to be called load_unsigned_word. 6560 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 6561 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 6562 // and "3.9 Partial Register Penalties", p. 22). 6563 int off; 6564 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 6565 off = offset(); 6566 movzwl(dst, src); // movzxw 6567 } else { 6568 xorl(dst, dst); 6569 off = offset(); 6570 movw(dst, src); 6571 } 6572 return off; 6573 } 6574 6575 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 6576 switch (size_in_bytes) { 6577 #ifndef _LP64 6578 case 8: 6579 assert(dst2 != noreg, "second dest register required"); 6580 movl(dst, src); 6581 movl(dst2, src.plus_disp(BytesPerInt)); 6582 break; 6583 #else 6584 case 8: movq(dst, src); break; 6585 #endif 6586 case 4: movl(dst, src); break; 6587 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 6588 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 6589 default: ShouldNotReachHere(); 6590 } 6591 } 6592 6593 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 6594 switch (size_in_bytes) { 6595 #ifndef _LP64 6596 case 8: 6597 assert(src2 != noreg, "second source register required"); 6598 movl(dst, src); 6599 movl(dst.plus_disp(BytesPerInt), src2); 6600 break; 6601 #else 6602 case 8: movq(dst, src); break; 6603 #endif 6604 case 4: movl(dst, src); break; 6605 case 2: movw(dst, src); break; 6606 case 1: movb(dst, src); break; 6607 default: ShouldNotReachHere(); 6608 } 6609 } 6610 6611 void MacroAssembler::mov32(AddressLiteral dst, Register src) { 6612 if (reachable(dst)) { 6613 movl(as_Address(dst), src); 6614 } else { 6615 lea(rscratch1, dst); 6616 movl(Address(rscratch1, 0), src); 6617 } 6618 } 6619 6620 void MacroAssembler::mov32(Register dst, AddressLiteral src) { 6621 if (reachable(src)) { 6622 movl(dst, as_Address(src)); 6623 } else { 6624 lea(rscratch1, src); 6625 movl(dst, Address(rscratch1, 0)); 6626 } 6627 } 6628 6629 // C++ bool manipulation 6630 6631 void MacroAssembler::movbool(Register dst, Address src) { 6632 if(sizeof(bool) == 1) 6633 movb(dst, src); 6634 else if(sizeof(bool) == 2) 6635 movw(dst, src); 6636 else if(sizeof(bool) == 4) 6637 movl(dst, src); 6638 else 6639 // unsupported 6640 ShouldNotReachHere(); 6641 } 6642 6643 void MacroAssembler::movbool(Address dst, bool boolconst) { 6644 if(sizeof(bool) == 1) 6645 movb(dst, (int) boolconst); 6646 else if(sizeof(bool) == 2) 6647 movw(dst, (int) boolconst); 6648 else if(sizeof(bool) == 4) 6649 movl(dst, (int) boolconst); 6650 else 6651 // unsupported 6652 ShouldNotReachHere(); 6653 } 6654 6655 void MacroAssembler::movbool(Address dst, Register src) { 6656 if(sizeof(bool) == 1) 6657 movb(dst, src); 6658 else if(sizeof(bool) == 2) 6659 movw(dst, src); 6660 else if(sizeof(bool) == 4) 6661 movl(dst, src); 6662 else 6663 // unsupported 6664 ShouldNotReachHere(); 6665 } 6666 6667 void MacroAssembler::movbyte(ArrayAddress dst, int src) { 6668 movb(as_Address(dst), src); 6669 } 6670 6671 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 6672 if (reachable(src)) { 6673 if (UseXmmLoadAndClearUpper) { 6674 movsd (dst, as_Address(src)); 6675 } else { 6676 movlpd(dst, as_Address(src)); 6677 } 6678 } else { 6679 lea(rscratch1, src); 6680 if (UseXmmLoadAndClearUpper) { 6681 movsd (dst, Address(rscratch1, 0)); 6682 } else { 6683 movlpd(dst, Address(rscratch1, 0)); 6684 } 6685 } 6686 } 6687 6688 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 6689 if (reachable(src)) { 6690 movss(dst, as_Address(src)); 6691 } else { 6692 lea(rscratch1, src); 6693 movss(dst, Address(rscratch1, 0)); 6694 } 6695 } 6696 6697 void MacroAssembler::movptr(Register dst, Register src) { 6698 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6699 } 6700 6701 void MacroAssembler::movptr(Register dst, Address src) { 6702 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6703 } 6704 6705 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 6706 void MacroAssembler::movptr(Register dst, intptr_t src) { 6707 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 6708 } 6709 6710 void MacroAssembler::movptr(Address dst, Register src) { 6711 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6712 } 6713 6714 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 6715 if (reachable(src)) { 6716 movss(dst, as_Address(src)); 6717 } else { 6718 lea(rscratch1, src); 6719 movss(dst, Address(rscratch1, 0)); 6720 } 6721 } 6722 6723 void MacroAssembler::null_check(Register reg, int offset) { 6724 if (needs_explicit_null_check(offset)) { 6725 // provoke OS NULL exception if reg = NULL by 6726 // accessing M[reg] w/o changing any (non-CC) registers 6727 // NOTE: cmpl is plenty here to provoke a segv 6728 cmpptr(rax, Address(reg, 0)); 6729 // Note: should probably use testl(rax, Address(reg, 0)); 6730 // may be shorter code (however, this version of 6731 // testl needs to be implemented first) 6732 } else { 6733 // nothing to do, (later) access of M[reg + offset] 6734 // will provoke OS NULL exception if reg = NULL 6735 } 6736 } 6737 6738 void MacroAssembler::os_breakpoint() { 6739 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 6740 // (e.g., MSVC can't call ps() otherwise) 6741 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 6742 } 6743 6744 void MacroAssembler::pop_CPU_state() { 6745 pop_FPU_state(); 6746 pop_IU_state(); 6747 } 6748 6749 void MacroAssembler::pop_FPU_state() { 6750 NOT_LP64(frstor(Address(rsp, 0));) 6751 LP64_ONLY(fxrstor(Address(rsp, 0));) 6752 addptr(rsp, FPUStateSizeInWords * wordSize); 6753 } 6754 6755 void MacroAssembler::pop_IU_state() { 6756 popa(); 6757 LP64_ONLY(addq(rsp, 8)); 6758 popf(); 6759 } 6760 6761 // Save Integer and Float state 6762 // Warning: Stack must be 16 byte aligned (64bit) 6763 void MacroAssembler::push_CPU_state() { 6764 push_IU_state(); 6765 push_FPU_state(); 6766 } 6767 6768 void MacroAssembler::push_FPU_state() { 6769 subptr(rsp, FPUStateSizeInWords * wordSize); 6770 #ifndef _LP64 6771 fnsave(Address(rsp, 0)); 6772 fwait(); 6773 #else 6774 fxsave(Address(rsp, 0)); 6775 #endif // LP64 6776 } 6777 6778 void MacroAssembler::push_IU_state() { 6779 // Push flags first because pusha kills them 6780 pushf(); 6781 // Make sure rsp stays 16-byte aligned 6782 LP64_ONLY(subq(rsp, 8)); 6783 pusha(); 6784 } 6785 6786 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 6787 // determine java_thread register 6788 if (!java_thread->is_valid()) { 6789 java_thread = rdi; 6790 get_thread(java_thread); 6791 } 6792 // we must set sp to zero to clear frame 6793 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 6794 if (clear_fp) { 6795 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 6796 } 6797 6798 if (clear_pc) 6799 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 6800 6801 } 6802 6803 void MacroAssembler::restore_rax(Register tmp) { 6804 if (tmp == noreg) pop(rax); 6805 else if (tmp != rax) mov(rax, tmp); 6806 } 6807 6808 void MacroAssembler::round_to(Register reg, int modulus) { 6809 addptr(reg, modulus - 1); 6810 andptr(reg, -modulus); 6811 } 6812 6813 void MacroAssembler::save_rax(Register tmp) { 6814 if (tmp == noreg) push(rax); 6815 else if (tmp != rax) mov(tmp, rax); 6816 } 6817 6818 // Write serialization page so VM thread can do a pseudo remote membar. 6819 // We use the current thread pointer to calculate a thread specific 6820 // offset to write to within the page. This minimizes bus traffic 6821 // due to cache line collision. 6822 void MacroAssembler::serialize_memory(Register thread, Register tmp) { 6823 movl(tmp, thread); 6824 shrl(tmp, os::get_serialize_page_shift_count()); 6825 andl(tmp, (os::vm_page_size() - sizeof(int))); 6826 6827 Address index(noreg, tmp, Address::times_1); 6828 ExternalAddress page(os::get_memory_serialize_page()); 6829 6830 // Size of store must match masking code above 6831 movl(as_Address(ArrayAddress(page, index)), tmp); 6832 } 6833 6834 // Calls to C land 6835 // 6836 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded 6837 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 6838 // has to be reset to 0. This is required to allow proper stack traversal. 6839 void MacroAssembler::set_last_Java_frame(Register java_thread, 6840 Register last_java_sp, 6841 Register last_java_fp, 6842 address last_java_pc) { 6843 // determine java_thread register 6844 if (!java_thread->is_valid()) { 6845 java_thread = rdi; 6846 get_thread(java_thread); 6847 } 6848 // determine last_java_sp register 6849 if (!last_java_sp->is_valid()) { 6850 last_java_sp = rsp; 6851 } 6852 6853 // last_java_fp is optional 6854 6855 if (last_java_fp->is_valid()) { 6856 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 6857 } 6858 6859 // last_java_pc is optional 6860 6861 if (last_java_pc != NULL) { 6862 lea(Address(java_thread, 6863 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 6864 InternalAddress(last_java_pc)); 6865 6866 } 6867 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 6868 } 6869 6870 void MacroAssembler::shlptr(Register dst, int imm8) { 6871 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 6872 } 6873 6874 void MacroAssembler::shrptr(Register dst, int imm8) { 6875 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 6876 } 6877 6878 void MacroAssembler::sign_extend_byte(Register reg) { 6879 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 6880 movsbl(reg, reg); // movsxb 6881 } else { 6882 shll(reg, 24); 6883 sarl(reg, 24); 6884 } 6885 } 6886 6887 void MacroAssembler::sign_extend_short(Register reg) { 6888 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6889 movswl(reg, reg); // movsxw 6890 } else { 6891 shll(reg, 16); 6892 sarl(reg, 16); 6893 } 6894 } 6895 6896 void MacroAssembler::testl(Register dst, AddressLiteral src) { 6897 assert(reachable(src), "Address should be reachable"); 6898 testl(dst, as_Address(src)); 6899 } 6900 6901 ////////////////////////////////////////////////////////////////////////////////// 6902 #ifndef SERIALGC 6903 6904 void MacroAssembler::g1_write_barrier_pre(Register obj, 6905 #ifndef _LP64 6906 Register thread, 6907 #endif 6908 Register tmp, 6909 Register tmp2, 6910 bool tosca_live) { 6911 LP64_ONLY(Register thread = r15_thread;) 6912 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 6913 PtrQueue::byte_offset_of_active())); 6914 6915 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 6916 PtrQueue::byte_offset_of_index())); 6917 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 6918 PtrQueue::byte_offset_of_buf())); 6919 6920 6921 Label done; 6922 Label runtime; 6923 6924 // if (!marking_in_progress) goto done; 6925 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 6926 cmpl(in_progress, 0); 6927 } else { 6928 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 6929 cmpb(in_progress, 0); 6930 } 6931 jcc(Assembler::equal, done); 6932 6933 // if (x.f == NULL) goto done; 6934 #ifdef _LP64 6935 load_heap_oop(tmp2, Address(obj, 0)); 6936 #else 6937 movptr(tmp2, Address(obj, 0)); 6938 #endif 6939 cmpptr(tmp2, (int32_t) NULL_WORD); 6940 jcc(Assembler::equal, done); 6941 6942 // Can we store original value in the thread's buffer? 6943 6944 #ifdef _LP64 6945 movslq(tmp, index); 6946 cmpq(tmp, 0); 6947 #else 6948 cmpl(index, 0); 6949 #endif 6950 jcc(Assembler::equal, runtime); 6951 #ifdef _LP64 6952 subq(tmp, wordSize); 6953 movl(index, tmp); 6954 addq(tmp, buffer); 6955 #else 6956 subl(index, wordSize); 6957 movl(tmp, buffer); 6958 addl(tmp, index); 6959 #endif 6960 movptr(Address(tmp, 0), tmp2); 6961 jmp(done); 6962 bind(runtime); 6963 // save the live input values 6964 if(tosca_live) push(rax); 6965 push(obj); 6966 #ifdef _LP64 6967 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, r15_thread); 6968 #else 6969 push(thread); 6970 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread); 6971 pop(thread); 6972 #endif 6973 pop(obj); 6974 if(tosca_live) pop(rax); 6975 bind(done); 6976 6977 } 6978 6979 void MacroAssembler::g1_write_barrier_post(Register store_addr, 6980 Register new_val, 6981 #ifndef _LP64 6982 Register thread, 6983 #endif 6984 Register tmp, 6985 Register tmp2) { 6986 6987 LP64_ONLY(Register thread = r15_thread;) 6988 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 6989 PtrQueue::byte_offset_of_index())); 6990 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 6991 PtrQueue::byte_offset_of_buf())); 6992 BarrierSet* bs = Universe::heap()->barrier_set(); 6993 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 6994 Label done; 6995 Label runtime; 6996 6997 // Does store cross heap regions? 6998 6999 movptr(tmp, store_addr); 7000 xorptr(tmp, new_val); 7001 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 7002 jcc(Assembler::equal, done); 7003 7004 // crosses regions, storing NULL? 7005 7006 cmpptr(new_val, (int32_t) NULL_WORD); 7007 jcc(Assembler::equal, done); 7008 7009 // storing region crossing non-NULL, is card already dirty? 7010 7011 ExternalAddress cardtable((address) ct->byte_map_base); 7012 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 7013 #ifdef _LP64 7014 const Register card_addr = tmp; 7015 7016 movq(card_addr, store_addr); 7017 shrq(card_addr, CardTableModRefBS::card_shift); 7018 7019 lea(tmp2, cardtable); 7020 7021 // get the address of the card 7022 addq(card_addr, tmp2); 7023 #else 7024 const Register card_index = tmp; 7025 7026 movl(card_index, store_addr); 7027 shrl(card_index, CardTableModRefBS::card_shift); 7028 7029 Address index(noreg, card_index, Address::times_1); 7030 const Register card_addr = tmp; 7031 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 7032 #endif 7033 cmpb(Address(card_addr, 0), 0); 7034 jcc(Assembler::equal, done); 7035 7036 // storing a region crossing, non-NULL oop, card is clean. 7037 // dirty card and log. 7038 7039 movb(Address(card_addr, 0), 0); 7040 7041 cmpl(queue_index, 0); 7042 jcc(Assembler::equal, runtime); 7043 subl(queue_index, wordSize); 7044 movptr(tmp2, buffer); 7045 #ifdef _LP64 7046 movslq(rscratch1, queue_index); 7047 addq(tmp2, rscratch1); 7048 movq(Address(tmp2, 0), card_addr); 7049 #else 7050 addl(tmp2, queue_index); 7051 movl(Address(tmp2, 0), card_index); 7052 #endif 7053 jmp(done); 7054 7055 bind(runtime); 7056 // save the live input values 7057 push(store_addr); 7058 push(new_val); 7059 #ifdef _LP64 7060 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 7061 #else 7062 push(thread); 7063 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 7064 pop(thread); 7065 #endif 7066 pop(new_val); 7067 pop(store_addr); 7068 7069 bind(done); 7070 7071 } 7072 7073 #endif // SERIALGC 7074 ////////////////////////////////////////////////////////////////////////////////// 7075 7076 7077 void MacroAssembler::store_check(Register obj) { 7078 // Does a store check for the oop in register obj. The content of 7079 // register obj is destroyed afterwards. 7080 store_check_part_1(obj); 7081 store_check_part_2(obj); 7082 } 7083 7084 void MacroAssembler::store_check(Register obj, Address dst) { 7085 store_check(obj); 7086 } 7087 7088 7089 // split the store check operation so that other instructions can be scheduled inbetween 7090 void MacroAssembler::store_check_part_1(Register obj) { 7091 BarrierSet* bs = Universe::heap()->barrier_set(); 7092 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 7093 shrptr(obj, CardTableModRefBS::card_shift); 7094 } 7095 7096 void MacroAssembler::store_check_part_2(Register obj) { 7097 BarrierSet* bs = Universe::heap()->barrier_set(); 7098 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 7099 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 7100 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 7101 7102 // The calculation for byte_map_base is as follows: 7103 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 7104 // So this essentially converts an address to a displacement and 7105 // it will never need to be relocated. On 64bit however the value may be too 7106 // large for a 32bit displacement 7107 7108 intptr_t disp = (intptr_t) ct->byte_map_base; 7109 if (is_simm32(disp)) { 7110 Address cardtable(noreg, obj, Address::times_1, disp); 7111 movb(cardtable, 0); 7112 } else { 7113 // By doing it as an ExternalAddress disp could be converted to a rip-relative 7114 // displacement and done in a single instruction given favorable mapping and 7115 // a smarter version of as_Address. Worst case it is two instructions which 7116 // is no worse off then loading disp into a register and doing as a simple 7117 // Address() as above. 7118 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 7119 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 7120 // in some cases we'll get a single instruction version. 7121 7122 ExternalAddress cardtable((address)disp); 7123 Address index(noreg, obj, Address::times_1); 7124 movb(as_Address(ArrayAddress(cardtable, index)), 0); 7125 } 7126 } 7127 7128 void MacroAssembler::subptr(Register dst, int32_t imm32) { 7129 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 7130 } 7131 7132 void MacroAssembler::subptr(Register dst, Register src) { 7133 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 7134 } 7135 7136 // C++ bool manipulation 7137 void MacroAssembler::testbool(Register dst) { 7138 if(sizeof(bool) == 1) 7139 testb(dst, 0xff); 7140 else if(sizeof(bool) == 2) { 7141 // testw implementation needed for two byte bools 7142 ShouldNotReachHere(); 7143 } else if(sizeof(bool) == 4) 7144 testl(dst, dst); 7145 else 7146 // unsupported 7147 ShouldNotReachHere(); 7148 } 7149 7150 void MacroAssembler::testptr(Register dst, Register src) { 7151 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 7152 } 7153 7154 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 7155 void MacroAssembler::tlab_allocate(Register obj, 7156 Register var_size_in_bytes, 7157 int con_size_in_bytes, 7158 Register t1, 7159 Register t2, 7160 Label& slow_case) { 7161 assert_different_registers(obj, t1, t2); 7162 assert_different_registers(obj, var_size_in_bytes, t1); 7163 Register end = t2; 7164 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 7165 7166 verify_tlab(); 7167 7168 NOT_LP64(get_thread(thread)); 7169 7170 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 7171 if (var_size_in_bytes == noreg) { 7172 lea(end, Address(obj, con_size_in_bytes)); 7173 } else { 7174 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 7175 } 7176 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 7177 jcc(Assembler::above, slow_case); 7178 7179 // update the tlab top pointer 7180 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 7181 7182 // recover var_size_in_bytes if necessary 7183 if (var_size_in_bytes == end) { 7184 subptr(var_size_in_bytes, obj); 7185 } 7186 verify_tlab(); 7187 } 7188 7189 // Preserves rbx, and rdx. 7190 Register MacroAssembler::tlab_refill(Label& retry, 7191 Label& try_eden, 7192 Label& slow_case) { 7193 Register top = rax; 7194 Register t1 = rcx; 7195 Register t2 = rsi; 7196 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 7197 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 7198 Label do_refill, discard_tlab; 7199 7200 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 7201 // No allocation in the shared eden. 7202 jmp(slow_case); 7203 } 7204 7205 NOT_LP64(get_thread(thread_reg)); 7206 7207 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 7208 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 7209 7210 // calculate amount of free space 7211 subptr(t1, top); 7212 shrptr(t1, LogHeapWordSize); 7213 7214 // Retain tlab and allocate object in shared space if 7215 // the amount free in the tlab is too large to discard. 7216 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 7217 jcc(Assembler::lessEqual, discard_tlab); 7218 7219 // Retain 7220 // %%% yuck as movptr... 7221 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 7222 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 7223 if (TLABStats) { 7224 // increment number of slow_allocations 7225 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 7226 } 7227 jmp(try_eden); 7228 7229 bind(discard_tlab); 7230 if (TLABStats) { 7231 // increment number of refills 7232 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 7233 // accumulate wastage -- t1 is amount free in tlab 7234 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 7235 } 7236 7237 // if tlab is currently allocated (top or end != null) then 7238 // fill [top, end + alignment_reserve) with array object 7239 testptr(top, top); 7240 jcc(Assembler::zero, do_refill); 7241 7242 // set up the mark word 7243 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 7244 // set the length to the remaining space 7245 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 7246 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 7247 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 7248 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 7249 // set klass to intArrayKlass 7250 // dubious reloc why not an oop reloc? 7251 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); 7252 // store klass last. concurrent gcs assumes klass length is valid if 7253 // klass field is not null. 7254 store_klass(top, t1); 7255 7256 movptr(t1, top); 7257 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 7258 incr_allocated_bytes(thread_reg, t1, 0); 7259 7260 // refill the tlab with an eden allocation 7261 bind(do_refill); 7262 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 7263 shlptr(t1, LogHeapWordSize); 7264 // allocate new tlab, address returned in top 7265 eden_allocate(top, t1, 0, t2, slow_case); 7266 7267 // Check that t1 was preserved in eden_allocate. 7268 #ifdef ASSERT 7269 if (UseTLAB) { 7270 Label ok; 7271 Register tsize = rsi; 7272 assert_different_registers(tsize, thread_reg, t1); 7273 push(tsize); 7274 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 7275 shlptr(tsize, LogHeapWordSize); 7276 cmpptr(t1, tsize); 7277 jcc(Assembler::equal, ok); 7278 stop("assert(t1 != tlab size)"); 7279 should_not_reach_here(); 7280 7281 bind(ok); 7282 pop(tsize); 7283 } 7284 #endif 7285 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 7286 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 7287 addptr(top, t1); 7288 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 7289 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 7290 verify_tlab(); 7291 jmp(retry); 7292 7293 return thread_reg; // for use by caller 7294 } 7295 7296 void MacroAssembler::incr_allocated_bytes(Register thread, 7297 Register var_size_in_bytes, 7298 int con_size_in_bytes, 7299 Register t1) { 7300 #ifdef _LP64 7301 if (var_size_in_bytes->is_valid()) { 7302 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 7303 } else { 7304 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 7305 } 7306 #else 7307 if (!thread->is_valid()) { 7308 assert(t1->is_valid(), "need temp reg"); 7309 thread = t1; 7310 get_thread(thread); 7311 } 7312 7313 if (var_size_in_bytes->is_valid()) { 7314 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 7315 } else { 7316 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 7317 } 7318 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); 7319 #endif 7320 } 7321 7322 static const double pi_4 = 0.7853981633974483; 7323 7324 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 7325 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 7326 // was attempted in this code; unfortunately it appears that the 7327 // switch to 80-bit precision and back causes this to be 7328 // unprofitable compared with simply performing a runtime call if 7329 // the argument is out of the (-pi/4, pi/4) range. 7330 7331 Register tmp = noreg; 7332 if (!VM_Version::supports_cmov()) { 7333 // fcmp needs a temporary so preserve rbx, 7334 tmp = rbx; 7335 push(tmp); 7336 } 7337 7338 Label slow_case, done; 7339 7340 ExternalAddress pi4_adr = (address)&pi_4; 7341 if (reachable(pi4_adr)) { 7342 // x ?<= pi/4 7343 fld_d(pi4_adr); 7344 fld_s(1); // Stack: X PI/4 X 7345 fabs(); // Stack: |X| PI/4 X 7346 fcmp(tmp); 7347 jcc(Assembler::above, slow_case); 7348 7349 // fastest case: -pi/4 <= x <= pi/4 7350 switch(trig) { 7351 case 's': 7352 fsin(); 7353 break; 7354 case 'c': 7355 fcos(); 7356 break; 7357 case 't': 7358 ftan(); 7359 break; 7360 default: 7361 assert(false, "bad intrinsic"); 7362 break; 7363 } 7364 jmp(done); 7365 } 7366 7367 // slow case: runtime call 7368 bind(slow_case); 7369 // Preserve registers across runtime call 7370 pusha(); 7371 int incoming_argument_and_return_value_offset = -1; 7372 if (num_fpu_regs_in_use > 1) { 7373 // Must preserve all other FPU regs (could alternatively convert 7374 // SharedRuntime::dsin and dcos into assembly routines known not to trash 7375 // FPU state, but can not trust C compiler) 7376 NEEDS_CLEANUP; 7377 // NOTE that in this case we also push the incoming argument to 7378 // the stack and restore it later; we also use this stack slot to 7379 // hold the return value from dsin or dcos. 7380 for (int i = 0; i < num_fpu_regs_in_use; i++) { 7381 subptr(rsp, sizeof(jdouble)); 7382 fstp_d(Address(rsp, 0)); 7383 } 7384 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 7385 fld_d(Address(rsp, incoming_argument_and_return_value_offset)); 7386 } 7387 subptr(rsp, sizeof(jdouble)); 7388 fstp_d(Address(rsp, 0)); 7389 #ifdef _LP64 7390 movdbl(xmm0, Address(rsp, 0)); 7391 #endif // _LP64 7392 7393 // NOTE: we must not use call_VM_leaf here because that requires a 7394 // complete interpreter frame in debug mode -- same bug as 4387334 7395 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 7396 // do proper 64bit abi 7397 7398 NEEDS_CLEANUP; 7399 // Need to add stack banging before this runtime call if it needs to 7400 // be taken; however, there is no generic stack banging routine at 7401 // the MacroAssembler level 7402 switch(trig) { 7403 case 's': 7404 { 7405 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0); 7406 } 7407 break; 7408 case 'c': 7409 { 7410 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0); 7411 } 7412 break; 7413 case 't': 7414 { 7415 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0); 7416 } 7417 break; 7418 default: 7419 assert(false, "bad intrinsic"); 7420 break; 7421 } 7422 #ifdef _LP64 7423 movsd(Address(rsp, 0), xmm0); 7424 fld_d(Address(rsp, 0)); 7425 #endif // _LP64 7426 addptr(rsp, sizeof(jdouble)); 7427 if (num_fpu_regs_in_use > 1) { 7428 // Must save return value to stack and then restore entire FPU stack 7429 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 7430 for (int i = 0; i < num_fpu_regs_in_use; i++) { 7431 fld_d(Address(rsp, 0)); 7432 addptr(rsp, sizeof(jdouble)); 7433 } 7434 } 7435 popa(); 7436 7437 // Come here with result in F-TOS 7438 bind(done); 7439 7440 if (tmp != noreg) { 7441 pop(tmp); 7442 } 7443 } 7444 7445 7446 // Look up the method for a megamorphic invokeinterface call. 7447 // The target method is determined by <intf_klass, itable_index>. 7448 // The receiver klass is in recv_klass. 7449 // On success, the result will be in method_result, and execution falls through. 7450 // On failure, execution transfers to the given label. 7451 void MacroAssembler::lookup_interface_method(Register recv_klass, 7452 Register intf_klass, 7453 RegisterOrConstant itable_index, 7454 Register method_result, 7455 Register scan_temp, 7456 Label& L_no_such_interface) { 7457 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 7458 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 7459 "caller must use same register for non-constant itable index as for method"); 7460 7461 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 7462 int vtable_base = instanceKlass::vtable_start_offset() * wordSize; 7463 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 7464 int scan_step = itableOffsetEntry::size() * wordSize; 7465 int vte_size = vtableEntry::size() * wordSize; 7466 Address::ScaleFactor times_vte_scale = Address::times_ptr; 7467 assert(vte_size == wordSize, "else adjust times_vte_scale"); 7468 7469 movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize)); 7470 7471 // %%% Could store the aligned, prescaled offset in the klassoop. 7472 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 7473 if (HeapWordsPerLong > 1) { 7474 // Round up to align_object_offset boundary 7475 // see code for instanceKlass::start_of_itable! 7476 round_to(scan_temp, BytesPerLong); 7477 } 7478 7479 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 7480 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 7481 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 7482 7483 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 7484 // if (scan->interface() == intf) { 7485 // result = (klass + scan->offset() + itable_index); 7486 // } 7487 // } 7488 Label search, found_method; 7489 7490 for (int peel = 1; peel >= 0; peel--) { 7491 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 7492 cmpptr(intf_klass, method_result); 7493 7494 if (peel) { 7495 jccb(Assembler::equal, found_method); 7496 } else { 7497 jccb(Assembler::notEqual, search); 7498 // (invert the test to fall through to found_method...) 7499 } 7500 7501 if (!peel) break; 7502 7503 bind(search); 7504 7505 // Check that the previous entry is non-null. A null entry means that 7506 // the receiver class doesn't implement the interface, and wasn't the 7507 // same as when the caller was compiled. 7508 testptr(method_result, method_result); 7509 jcc(Assembler::zero, L_no_such_interface); 7510 addptr(scan_temp, scan_step); 7511 } 7512 7513 bind(found_method); 7514 7515 // Got a hit. 7516 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 7517 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 7518 } 7519 7520 7521 void MacroAssembler::check_klass_subtype(Register sub_klass, 7522 Register super_klass, 7523 Register temp_reg, 7524 Label& L_success) { 7525 Label L_failure; 7526 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 7527 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 7528 bind(L_failure); 7529 } 7530 7531 7532 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 7533 Register super_klass, 7534 Register temp_reg, 7535 Label* L_success, 7536 Label* L_failure, 7537 Label* L_slow_path, 7538 RegisterOrConstant super_check_offset) { 7539 assert_different_registers(sub_klass, super_klass, temp_reg); 7540 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 7541 if (super_check_offset.is_register()) { 7542 assert_different_registers(sub_klass, super_klass, 7543 super_check_offset.as_register()); 7544 } else if (must_load_sco) { 7545 assert(temp_reg != noreg, "supply either a temp or a register offset"); 7546 } 7547 7548 Label L_fallthrough; 7549 int label_nulls = 0; 7550 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 7551 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 7552 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 7553 assert(label_nulls <= 1, "at most one NULL in the batch"); 7554 7555 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 7556 Klass::secondary_super_cache_offset_in_bytes()); 7557 int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 7558 Klass::super_check_offset_offset_in_bytes()); 7559 Address super_check_offset_addr(super_klass, sco_offset); 7560 7561 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 7562 // range of a jccb. If this routine grows larger, reconsider at 7563 // least some of these. 7564 #define local_jcc(assembler_cond, label) \ 7565 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 7566 else jcc( assembler_cond, label) /*omit semi*/ 7567 7568 // Hacked jmp, which may only be used just before L_fallthrough. 7569 #define final_jmp(label) \ 7570 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 7571 else jmp(label) /*omit semi*/ 7572 7573 // If the pointers are equal, we are done (e.g., String[] elements). 7574 // This self-check enables sharing of secondary supertype arrays among 7575 // non-primary types such as array-of-interface. Otherwise, each such 7576 // type would need its own customized SSA. 7577 // We move this check to the front of the fast path because many 7578 // type checks are in fact trivially successful in this manner, 7579 // so we get a nicely predicted branch right at the start of the check. 7580 cmpptr(sub_klass, super_klass); 7581 local_jcc(Assembler::equal, *L_success); 7582 7583 // Check the supertype display: 7584 if (must_load_sco) { 7585 // Positive movl does right thing on LP64. 7586 movl(temp_reg, super_check_offset_addr); 7587 super_check_offset = RegisterOrConstant(temp_reg); 7588 } 7589 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 7590 cmpptr(super_klass, super_check_addr); // load displayed supertype 7591 7592 // This check has worked decisively for primary supers. 7593 // Secondary supers are sought in the super_cache ('super_cache_addr'). 7594 // (Secondary supers are interfaces and very deeply nested subtypes.) 7595 // This works in the same check above because of a tricky aliasing 7596 // between the super_cache and the primary super display elements. 7597 // (The 'super_check_addr' can address either, as the case requires.) 7598 // Note that the cache is updated below if it does not help us find 7599 // what we need immediately. 7600 // So if it was a primary super, we can just fail immediately. 7601 // Otherwise, it's the slow path for us (no success at this point). 7602 7603 if (super_check_offset.is_register()) { 7604 local_jcc(Assembler::equal, *L_success); 7605 cmpl(super_check_offset.as_register(), sc_offset); 7606 if (L_failure == &L_fallthrough) { 7607 local_jcc(Assembler::equal, *L_slow_path); 7608 } else { 7609 local_jcc(Assembler::notEqual, *L_failure); 7610 final_jmp(*L_slow_path); 7611 } 7612 } else if (super_check_offset.as_constant() == sc_offset) { 7613 // Need a slow path; fast failure is impossible. 7614 if (L_slow_path == &L_fallthrough) { 7615 local_jcc(Assembler::equal, *L_success); 7616 } else { 7617 local_jcc(Assembler::notEqual, *L_slow_path); 7618 final_jmp(*L_success); 7619 } 7620 } else { 7621 // No slow path; it's a fast decision. 7622 if (L_failure == &L_fallthrough) { 7623 local_jcc(Assembler::equal, *L_success); 7624 } else { 7625 local_jcc(Assembler::notEqual, *L_failure); 7626 final_jmp(*L_success); 7627 } 7628 } 7629 7630 bind(L_fallthrough); 7631 7632 #undef local_jcc 7633 #undef final_jmp 7634 } 7635 7636 7637 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 7638 Register super_klass, 7639 Register temp_reg, 7640 Register temp2_reg, 7641 Label* L_success, 7642 Label* L_failure, 7643 bool set_cond_codes) { 7644 assert_different_registers(sub_klass, super_klass, temp_reg); 7645 if (temp2_reg != noreg) 7646 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 7647 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 7648 7649 Label L_fallthrough; 7650 int label_nulls = 0; 7651 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 7652 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 7653 assert(label_nulls <= 1, "at most one NULL in the batch"); 7654 7655 // a couple of useful fields in sub_klass: 7656 int ss_offset = (klassOopDesc::header_size() * HeapWordSize + 7657 Klass::secondary_supers_offset_in_bytes()); 7658 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 7659 Klass::secondary_super_cache_offset_in_bytes()); 7660 Address secondary_supers_addr(sub_klass, ss_offset); 7661 Address super_cache_addr( sub_klass, sc_offset); 7662 7663 // Do a linear scan of the secondary super-klass chain. 7664 // This code is rarely used, so simplicity is a virtue here. 7665 // The repne_scan instruction uses fixed registers, which we must spill. 7666 // Don't worry too much about pre-existing connections with the input regs. 7667 7668 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 7669 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 7670 7671 // Get super_klass value into rax (even if it was in rdi or rcx). 7672 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 7673 if (super_klass != rax || UseCompressedOops) { 7674 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 7675 mov(rax, super_klass); 7676 } 7677 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 7678 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 7679 7680 #ifndef PRODUCT 7681 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 7682 ExternalAddress pst_counter_addr((address) pst_counter); 7683 NOT_LP64( incrementl(pst_counter_addr) ); 7684 LP64_ONLY( lea(rcx, pst_counter_addr) ); 7685 LP64_ONLY( incrementl(Address(rcx, 0)) ); 7686 #endif //PRODUCT 7687 7688 // We will consult the secondary-super array. 7689 movptr(rdi, secondary_supers_addr); 7690 // Load the array length. (Positive movl does right thing on LP64.) 7691 movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 7692 // Skip to start of data. 7693 addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 7694 7695 // Scan RCX words at [RDI] for an occurrence of RAX. 7696 // Set NZ/Z based on last compare. 7697 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 7698 // not change flags (only scas instruction which is repeated sets flags). 7699 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 7700 #ifdef _LP64 7701 // This part is tricky, as values in supers array could be 32 or 64 bit wide 7702 // and we store values in objArrays always encoded, thus we need to encode 7703 // the value of rax before repne. Note that rax is dead after the repne. 7704 if (UseCompressedOops) { 7705 encode_heap_oop_not_null(rax); // Changes flags. 7706 // The superclass is never null; it would be a basic system error if a null 7707 // pointer were to sneak in here. Note that we have already loaded the 7708 // Klass::super_check_offset from the super_klass in the fast path, 7709 // so if there is a null in that register, we are already in the afterlife. 7710 testl(rax,rax); // Set Z = 0 7711 repne_scanl(); 7712 } else 7713 #endif // _LP64 7714 { 7715 testptr(rax,rax); // Set Z = 0 7716 repne_scan(); 7717 } 7718 // Unspill the temp. registers: 7719 if (pushed_rdi) pop(rdi); 7720 if (pushed_rcx) pop(rcx); 7721 if (pushed_rax) pop(rax); 7722 7723 if (set_cond_codes) { 7724 // Special hack for the AD files: rdi is guaranteed non-zero. 7725 assert(!pushed_rdi, "rdi must be left non-NULL"); 7726 // Also, the condition codes are properly set Z/NZ on succeed/failure. 7727 } 7728 7729 if (L_failure == &L_fallthrough) 7730 jccb(Assembler::notEqual, *L_failure); 7731 else jcc(Assembler::notEqual, *L_failure); 7732 7733 // Success. Cache the super we found and proceed in triumph. 7734 movptr(super_cache_addr, super_klass); 7735 7736 if (L_success != &L_fallthrough) { 7737 jmp(*L_success); 7738 } 7739 7740 #undef IS_A_TEMP 7741 7742 bind(L_fallthrough); 7743 } 7744 7745 7746 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 7747 ucomisd(dst, as_Address(src)); 7748 } 7749 7750 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 7751 ucomiss(dst, as_Address(src)); 7752 } 7753 7754 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 7755 if (reachable(src)) { 7756 xorpd(dst, as_Address(src)); 7757 } else { 7758 lea(rscratch1, src); 7759 xorpd(dst, Address(rscratch1, 0)); 7760 } 7761 } 7762 7763 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 7764 if (reachable(src)) { 7765 xorps(dst, as_Address(src)); 7766 } else { 7767 lea(rscratch1, src); 7768 xorps(dst, Address(rscratch1, 0)); 7769 } 7770 } 7771 7772 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 7773 if (VM_Version::supports_cmov()) { 7774 cmovl(cc, dst, src); 7775 } else { 7776 Label L; 7777 jccb(negate_condition(cc), L); 7778 movl(dst, src); 7779 bind(L); 7780 } 7781 } 7782 7783 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { 7784 if (VM_Version::supports_cmov()) { 7785 cmovl(cc, dst, src); 7786 } else { 7787 Label L; 7788 jccb(negate_condition(cc), L); 7789 movl(dst, src); 7790 bind(L); 7791 } 7792 } 7793 7794 void MacroAssembler::verify_oop(Register reg, const char* s) { 7795 if (!VerifyOops) return; 7796 7797 // Pass register number to verify_oop_subroutine 7798 char* b = new char[strlen(s) + 50]; 7799 sprintf(b, "verify_oop: %s: %s", reg->name(), s); 7800 #ifdef _LP64 7801 push(rscratch1); // save r10, trashed by movptr() 7802 #endif 7803 push(rax); // save rax, 7804 push(reg); // pass register argument 7805 ExternalAddress buffer((address) b); 7806 // avoid using pushptr, as it modifies scratch registers 7807 // and our contract is not to modify anything 7808 movptr(rax, buffer.addr()); 7809 push(rax); 7810 // call indirectly to solve generation ordering problem 7811 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 7812 call(rax); 7813 // Caller pops the arguments (oop, message) and restores rax, r10 7814 } 7815 7816 7817 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 7818 Register tmp, 7819 int offset) { 7820 intptr_t value = *delayed_value_addr; 7821 if (value != 0) 7822 return RegisterOrConstant(value + offset); 7823 7824 // load indirectly to solve generation ordering problem 7825 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 7826 7827 #ifdef ASSERT 7828 { Label L; 7829 testptr(tmp, tmp); 7830 if (WizardMode) { 7831 jcc(Assembler::notZero, L); 7832 char* buf = new char[40]; 7833 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 7834 stop(buf); 7835 } else { 7836 jccb(Assembler::notZero, L); 7837 hlt(); 7838 } 7839 bind(L); 7840 } 7841 #endif 7842 7843 if (offset != 0) 7844 addptr(tmp, offset); 7845 7846 return RegisterOrConstant(tmp); 7847 } 7848 7849 7850 // registers on entry: 7851 // - rax ('check' register): required MethodType 7852 // - rcx: method handle 7853 // - rdx, rsi, or ?: killable temp 7854 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg, 7855 Register temp_reg, 7856 Label& wrong_method_type) { 7857 Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)); 7858 // compare method type against that of the receiver 7859 if (UseCompressedOops) { 7860 load_heap_oop(temp_reg, type_addr); 7861 cmpptr(mtype_reg, temp_reg); 7862 } else { 7863 cmpptr(mtype_reg, type_addr); 7864 } 7865 jcc(Assembler::notEqual, wrong_method_type); 7866 } 7867 7868 7869 // A method handle has a "vmslots" field which gives the size of its 7870 // argument list in JVM stack slots. This field is either located directly 7871 // in every method handle, or else is indirectly accessed through the 7872 // method handle's MethodType. This macro hides the distinction. 7873 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg, 7874 Register temp_reg) { 7875 assert_different_registers(vmslots_reg, mh_reg, temp_reg); 7876 // load mh.type.form.vmslots 7877 if (java_lang_invoke_MethodHandle::vmslots_offset_in_bytes() != 0) { 7878 // hoist vmslots into every mh to avoid dependent load chain 7879 movl(vmslots_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmslots_offset_in_bytes, temp_reg))); 7880 } else { 7881 Register temp2_reg = vmslots_reg; 7882 load_heap_oop(temp2_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg))); 7883 load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg))); 7884 movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg))); 7885 } 7886 } 7887 7888 7889 // registers on entry: 7890 // - rcx: method handle 7891 // - rdx: killable temp (interpreted only) 7892 // - rax: killable temp (compiled only) 7893 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) { 7894 assert(mh_reg == rcx, "caller must put MH object in rcx"); 7895 assert_different_registers(mh_reg, temp_reg); 7896 7897 // pick out the interpreted side of the handler 7898 // NOTE: vmentry is not an oop! 7899 movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg))); 7900 7901 // off we go... 7902 jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes())); 7903 7904 // for the various stubs which take control at this point, 7905 // see MethodHandles::generate_method_handle_stub 7906 } 7907 7908 7909 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 7910 int extra_slot_offset) { 7911 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 7912 int stackElementSize = Interpreter::stackElementSize; 7913 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 7914 #ifdef ASSERT 7915 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 7916 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 7917 #endif 7918 Register scale_reg = noreg; 7919 Address::ScaleFactor scale_factor = Address::no_scale; 7920 if (arg_slot.is_constant()) { 7921 offset += arg_slot.as_constant() * stackElementSize; 7922 } else { 7923 scale_reg = arg_slot.as_register(); 7924 scale_factor = Address::times(stackElementSize); 7925 } 7926 offset += wordSize; // return PC is on stack 7927 return Address(rsp, scale_reg, scale_factor, offset); 7928 } 7929 7930 7931 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 7932 if (!VerifyOops) return; 7933 7934 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 7935 // Pass register number to verify_oop_subroutine 7936 char* b = new char[strlen(s) + 50]; 7937 sprintf(b, "verify_oop_addr: %s", s); 7938 7939 #ifdef _LP64 7940 push(rscratch1); // save r10, trashed by movptr() 7941 #endif 7942 push(rax); // save rax, 7943 // addr may contain rsp so we will have to adjust it based on the push 7944 // we just did (and on 64 bit we do two pushes) 7945 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 7946 // stores rax into addr which is backwards of what was intended. 7947 if (addr.uses(rsp)) { 7948 lea(rax, addr); 7949 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); 7950 } else { 7951 pushptr(addr); 7952 } 7953 7954 ExternalAddress buffer((address) b); 7955 // pass msg argument 7956 // avoid using pushptr, as it modifies scratch registers 7957 // and our contract is not to modify anything 7958 movptr(rax, buffer.addr()); 7959 push(rax); 7960 7961 // call indirectly to solve generation ordering problem 7962 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 7963 call(rax); 7964 // Caller pops the arguments (addr, message) and restores rax, r10. 7965 } 7966 7967 void MacroAssembler::verify_tlab() { 7968 #ifdef ASSERT 7969 if (UseTLAB && VerifyOops) { 7970 Label next, ok; 7971 Register t1 = rsi; 7972 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 7973 7974 push(t1); 7975 NOT_LP64(push(thread_reg)); 7976 NOT_LP64(get_thread(thread_reg)); 7977 7978 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 7979 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 7980 jcc(Assembler::aboveEqual, next); 7981 stop("assert(top >= start)"); 7982 should_not_reach_here(); 7983 7984 bind(next); 7985 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 7986 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 7987 jcc(Assembler::aboveEqual, ok); 7988 stop("assert(top <= end)"); 7989 should_not_reach_here(); 7990 7991 bind(ok); 7992 NOT_LP64(pop(thread_reg)); 7993 pop(t1); 7994 } 7995 #endif 7996 } 7997 7998 class ControlWord { 7999 public: 8000 int32_t _value; 8001 8002 int rounding_control() const { return (_value >> 10) & 3 ; } 8003 int precision_control() const { return (_value >> 8) & 3 ; } 8004 bool precision() const { return ((_value >> 5) & 1) != 0; } 8005 bool underflow() const { return ((_value >> 4) & 1) != 0; } 8006 bool overflow() const { return ((_value >> 3) & 1) != 0; } 8007 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 8008 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 8009 bool invalid() const { return ((_value >> 0) & 1) != 0; } 8010 8011 void print() const { 8012 // rounding control 8013 const char* rc; 8014 switch (rounding_control()) { 8015 case 0: rc = "round near"; break; 8016 case 1: rc = "round down"; break; 8017 case 2: rc = "round up "; break; 8018 case 3: rc = "chop "; break; 8019 }; 8020 // precision control 8021 const char* pc; 8022 switch (precision_control()) { 8023 case 0: pc = "24 bits "; break; 8024 case 1: pc = "reserved"; break; 8025 case 2: pc = "53 bits "; break; 8026 case 3: pc = "64 bits "; break; 8027 }; 8028 // flags 8029 char f[9]; 8030 f[0] = ' '; 8031 f[1] = ' '; 8032 f[2] = (precision ()) ? 'P' : 'p'; 8033 f[3] = (underflow ()) ? 'U' : 'u'; 8034 f[4] = (overflow ()) ? 'O' : 'o'; 8035 f[5] = (zero_divide ()) ? 'Z' : 'z'; 8036 f[6] = (denormalized()) ? 'D' : 'd'; 8037 f[7] = (invalid ()) ? 'I' : 'i'; 8038 f[8] = '\x0'; 8039 // output 8040 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 8041 } 8042 8043 }; 8044 8045 class StatusWord { 8046 public: 8047 int32_t _value; 8048 8049 bool busy() const { return ((_value >> 15) & 1) != 0; } 8050 bool C3() const { return ((_value >> 14) & 1) != 0; } 8051 bool C2() const { return ((_value >> 10) & 1) != 0; } 8052 bool C1() const { return ((_value >> 9) & 1) != 0; } 8053 bool C0() const { return ((_value >> 8) & 1) != 0; } 8054 int top() const { return (_value >> 11) & 7 ; } 8055 bool error_status() const { return ((_value >> 7) & 1) != 0; } 8056 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 8057 bool precision() const { return ((_value >> 5) & 1) != 0; } 8058 bool underflow() const { return ((_value >> 4) & 1) != 0; } 8059 bool overflow() const { return ((_value >> 3) & 1) != 0; } 8060 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 8061 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 8062 bool invalid() const { return ((_value >> 0) & 1) != 0; } 8063 8064 void print() const { 8065 // condition codes 8066 char c[5]; 8067 c[0] = (C3()) ? '3' : '-'; 8068 c[1] = (C2()) ? '2' : '-'; 8069 c[2] = (C1()) ? '1' : '-'; 8070 c[3] = (C0()) ? '0' : '-'; 8071 c[4] = '\x0'; 8072 // flags 8073 char f[9]; 8074 f[0] = (error_status()) ? 'E' : '-'; 8075 f[1] = (stack_fault ()) ? 'S' : '-'; 8076 f[2] = (precision ()) ? 'P' : '-'; 8077 f[3] = (underflow ()) ? 'U' : '-'; 8078 f[4] = (overflow ()) ? 'O' : '-'; 8079 f[5] = (zero_divide ()) ? 'Z' : '-'; 8080 f[6] = (denormalized()) ? 'D' : '-'; 8081 f[7] = (invalid ()) ? 'I' : '-'; 8082 f[8] = '\x0'; 8083 // output 8084 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 8085 } 8086 8087 }; 8088 8089 class TagWord { 8090 public: 8091 int32_t _value; 8092 8093 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 8094 8095 void print() const { 8096 printf("%04x", _value & 0xFFFF); 8097 } 8098 8099 }; 8100 8101 class FPU_Register { 8102 public: 8103 int32_t _m0; 8104 int32_t _m1; 8105 int16_t _ex; 8106 8107 bool is_indefinite() const { 8108 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 8109 } 8110 8111 void print() const { 8112 char sign = (_ex < 0) ? '-' : '+'; 8113 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 8114 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 8115 }; 8116 8117 }; 8118 8119 class FPU_State { 8120 public: 8121 enum { 8122 register_size = 10, 8123 number_of_registers = 8, 8124 register_mask = 7 8125 }; 8126 8127 ControlWord _control_word; 8128 StatusWord _status_word; 8129 TagWord _tag_word; 8130 int32_t _error_offset; 8131 int32_t _error_selector; 8132 int32_t _data_offset; 8133 int32_t _data_selector; 8134 int8_t _register[register_size * number_of_registers]; 8135 8136 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 8137 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 8138 8139 const char* tag_as_string(int tag) const { 8140 switch (tag) { 8141 case 0: return "valid"; 8142 case 1: return "zero"; 8143 case 2: return "special"; 8144 case 3: return "empty"; 8145 } 8146 ShouldNotReachHere(); 8147 return NULL; 8148 } 8149 8150 void print() const { 8151 // print computation registers 8152 { int t = _status_word.top(); 8153 for (int i = 0; i < number_of_registers; i++) { 8154 int j = (i - t) & register_mask; 8155 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 8156 st(j)->print(); 8157 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 8158 } 8159 } 8160 printf("\n"); 8161 // print control registers 8162 printf("ctrl = "); _control_word.print(); printf("\n"); 8163 printf("stat = "); _status_word .print(); printf("\n"); 8164 printf("tags = "); _tag_word .print(); printf("\n"); 8165 } 8166 8167 }; 8168 8169 class Flag_Register { 8170 public: 8171 int32_t _value; 8172 8173 bool overflow() const { return ((_value >> 11) & 1) != 0; } 8174 bool direction() const { return ((_value >> 10) & 1) != 0; } 8175 bool sign() const { return ((_value >> 7) & 1) != 0; } 8176 bool zero() const { return ((_value >> 6) & 1) != 0; } 8177 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 8178 bool parity() const { return ((_value >> 2) & 1) != 0; } 8179 bool carry() const { return ((_value >> 0) & 1) != 0; } 8180 8181 void print() const { 8182 // flags 8183 char f[8]; 8184 f[0] = (overflow ()) ? 'O' : '-'; 8185 f[1] = (direction ()) ? 'D' : '-'; 8186 f[2] = (sign ()) ? 'S' : '-'; 8187 f[3] = (zero ()) ? 'Z' : '-'; 8188 f[4] = (auxiliary_carry()) ? 'A' : '-'; 8189 f[5] = (parity ()) ? 'P' : '-'; 8190 f[6] = (carry ()) ? 'C' : '-'; 8191 f[7] = '\x0'; 8192 // output 8193 printf("%08x flags = %s", _value, f); 8194 } 8195 8196 }; 8197 8198 class IU_Register { 8199 public: 8200 int32_t _value; 8201 8202 void print() const { 8203 printf("%08x %11d", _value, _value); 8204 } 8205 8206 }; 8207 8208 class IU_State { 8209 public: 8210 Flag_Register _eflags; 8211 IU_Register _rdi; 8212 IU_Register _rsi; 8213 IU_Register _rbp; 8214 IU_Register _rsp; 8215 IU_Register _rbx; 8216 IU_Register _rdx; 8217 IU_Register _rcx; 8218 IU_Register _rax; 8219 8220 void print() const { 8221 // computation registers 8222 printf("rax, = "); _rax.print(); printf("\n"); 8223 printf("rbx, = "); _rbx.print(); printf("\n"); 8224 printf("rcx = "); _rcx.print(); printf("\n"); 8225 printf("rdx = "); _rdx.print(); printf("\n"); 8226 printf("rdi = "); _rdi.print(); printf("\n"); 8227 printf("rsi = "); _rsi.print(); printf("\n"); 8228 printf("rbp, = "); _rbp.print(); printf("\n"); 8229 printf("rsp = "); _rsp.print(); printf("\n"); 8230 printf("\n"); 8231 // control registers 8232 printf("flgs = "); _eflags.print(); printf("\n"); 8233 } 8234 }; 8235 8236 8237 class CPU_State { 8238 public: 8239 FPU_State _fpu_state; 8240 IU_State _iu_state; 8241 8242 void print() const { 8243 printf("--------------------------------------------------\n"); 8244 _iu_state .print(); 8245 printf("\n"); 8246 _fpu_state.print(); 8247 printf("--------------------------------------------------\n"); 8248 } 8249 8250 }; 8251 8252 8253 static void _print_CPU_state(CPU_State* state) { 8254 state->print(); 8255 }; 8256 8257 8258 void MacroAssembler::print_CPU_state() { 8259 push_CPU_state(); 8260 push(rsp); // pass CPU state 8261 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 8262 addptr(rsp, wordSize); // discard argument 8263 pop_CPU_state(); 8264 } 8265 8266 8267 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 8268 static int counter = 0; 8269 FPU_State* fs = &state->_fpu_state; 8270 counter++; 8271 // For leaf calls, only verify that the top few elements remain empty. 8272 // We only need 1 empty at the top for C2 code. 8273 if( stack_depth < 0 ) { 8274 if( fs->tag_for_st(7) != 3 ) { 8275 printf("FPR7 not empty\n"); 8276 state->print(); 8277 assert(false, "error"); 8278 return false; 8279 } 8280 return true; // All other stack states do not matter 8281 } 8282 8283 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 8284 "bad FPU control word"); 8285 8286 // compute stack depth 8287 int i = 0; 8288 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 8289 int d = i; 8290 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 8291 // verify findings 8292 if (i != FPU_State::number_of_registers) { 8293 // stack not contiguous 8294 printf("%s: stack not contiguous at ST%d\n", s, i); 8295 state->print(); 8296 assert(false, "error"); 8297 return false; 8298 } 8299 // check if computed stack depth corresponds to expected stack depth 8300 if (stack_depth < 0) { 8301 // expected stack depth is -stack_depth or less 8302 if (d > -stack_depth) { 8303 // too many elements on the stack 8304 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 8305 state->print(); 8306 assert(false, "error"); 8307 return false; 8308 } 8309 } else { 8310 // expected stack depth is stack_depth 8311 if (d != stack_depth) { 8312 // wrong stack depth 8313 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 8314 state->print(); 8315 assert(false, "error"); 8316 return false; 8317 } 8318 } 8319 // everything is cool 8320 return true; 8321 } 8322 8323 8324 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 8325 if (!VerifyFPU) return; 8326 push_CPU_state(); 8327 push(rsp); // pass CPU state 8328 ExternalAddress msg((address) s); 8329 // pass message string s 8330 pushptr(msg.addr()); 8331 push(stack_depth); // pass stack depth 8332 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 8333 addptr(rsp, 3 * wordSize); // discard arguments 8334 // check for error 8335 { Label L; 8336 testl(rax, rax); 8337 jcc(Assembler::notZero, L); 8338 int3(); // break if error condition 8339 bind(L); 8340 } 8341 pop_CPU_state(); 8342 } 8343 8344 void MacroAssembler::load_klass(Register dst, Register src) { 8345 #ifdef _LP64 8346 if (UseCompressedOops) { 8347 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8348 decode_heap_oop_not_null(dst); 8349 } else 8350 #endif 8351 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8352 } 8353 8354 void MacroAssembler::load_prototype_header(Register dst, Register src) { 8355 #ifdef _LP64 8356 if (UseCompressedOops) { 8357 assert (Universe::heap() != NULL, "java heap should be initialized"); 8358 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8359 if (Universe::narrow_oop_shift() != 0) { 8360 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8361 if (LogMinObjAlignmentInBytes == Address::times_8) { 8362 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8363 } else { 8364 // OK to use shift since we don't need to preserve flags. 8365 shlq(dst, LogMinObjAlignmentInBytes); 8366 movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8367 } 8368 } else { 8369 movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8370 } 8371 } else 8372 #endif 8373 { 8374 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8375 movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8376 } 8377 } 8378 8379 void MacroAssembler::store_klass(Register dst, Register src) { 8380 #ifdef _LP64 8381 if (UseCompressedOops) { 8382 encode_heap_oop_not_null(src); 8383 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 8384 } else 8385 #endif 8386 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 8387 } 8388 8389 void MacroAssembler::load_heap_oop(Register dst, Address src) { 8390 #ifdef _LP64 8391 if (UseCompressedOops) { 8392 movl(dst, src); 8393 decode_heap_oop(dst); 8394 } else 8395 #endif 8396 movptr(dst, src); 8397 } 8398 8399 // Doesn't do verfication, generates fixed size code 8400 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { 8401 #ifdef _LP64 8402 if (UseCompressedOops) { 8403 movl(dst, src); 8404 decode_heap_oop_not_null(dst); 8405 } else 8406 #endif 8407 movptr(dst, src); 8408 } 8409 8410 void MacroAssembler::store_heap_oop(Address dst, Register src) { 8411 #ifdef _LP64 8412 if (UseCompressedOops) { 8413 assert(!dst.uses(src), "not enough registers"); 8414 encode_heap_oop(src); 8415 movl(dst, src); 8416 } else 8417 #endif 8418 movptr(dst, src); 8419 } 8420 8421 // Used for storing NULLs. 8422 void MacroAssembler::store_heap_oop_null(Address dst) { 8423 #ifdef _LP64 8424 if (UseCompressedOops) { 8425 movl(dst, (int32_t)NULL_WORD); 8426 } else { 8427 movslq(dst, (int32_t)NULL_WORD); 8428 } 8429 #else 8430 movl(dst, (int32_t)NULL_WORD); 8431 #endif 8432 } 8433 8434 #ifdef _LP64 8435 void MacroAssembler::store_klass_gap(Register dst, Register src) { 8436 if (UseCompressedOops) { 8437 // Store to klass gap in destination 8438 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 8439 } 8440 } 8441 8442 #ifdef ASSERT 8443 void MacroAssembler::verify_heapbase(const char* msg) { 8444 assert (UseCompressedOops, "should be compressed"); 8445 assert (Universe::heap() != NULL, "java heap should be initialized"); 8446 if (CheckCompressedOops) { 8447 Label ok; 8448 push(rscratch1); // cmpptr trashes rscratch1 8449 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 8450 jcc(Assembler::equal, ok); 8451 stop(msg); 8452 bind(ok); 8453 pop(rscratch1); 8454 } 8455 } 8456 #endif 8457 8458 // Algorithm must match oop.inline.hpp encode_heap_oop. 8459 void MacroAssembler::encode_heap_oop(Register r) { 8460 #ifdef ASSERT 8461 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 8462 #endif 8463 verify_oop(r, "broken oop in encode_heap_oop"); 8464 if (Universe::narrow_oop_base() == NULL) { 8465 if (Universe::narrow_oop_shift() != 0) { 8466 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8467 shrq(r, LogMinObjAlignmentInBytes); 8468 } 8469 return; 8470 } 8471 testq(r, r); 8472 cmovq(Assembler::equal, r, r12_heapbase); 8473 subq(r, r12_heapbase); 8474 shrq(r, LogMinObjAlignmentInBytes); 8475 } 8476 8477 void MacroAssembler::encode_heap_oop_not_null(Register r) { 8478 #ifdef ASSERT 8479 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 8480 if (CheckCompressedOops) { 8481 Label ok; 8482 testq(r, r); 8483 jcc(Assembler::notEqual, ok); 8484 stop("null oop passed to encode_heap_oop_not_null"); 8485 bind(ok); 8486 } 8487 #endif 8488 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 8489 if (Universe::narrow_oop_base() != NULL) { 8490 subq(r, r12_heapbase); 8491 } 8492 if (Universe::narrow_oop_shift() != 0) { 8493 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8494 shrq(r, LogMinObjAlignmentInBytes); 8495 } 8496 } 8497 8498 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 8499 #ifdef ASSERT 8500 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 8501 if (CheckCompressedOops) { 8502 Label ok; 8503 testq(src, src); 8504 jcc(Assembler::notEqual, ok); 8505 stop("null oop passed to encode_heap_oop_not_null2"); 8506 bind(ok); 8507 } 8508 #endif 8509 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 8510 if (dst != src) { 8511 movq(dst, src); 8512 } 8513 if (Universe::narrow_oop_base() != NULL) { 8514 subq(dst, r12_heapbase); 8515 } 8516 if (Universe::narrow_oop_shift() != 0) { 8517 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8518 shrq(dst, LogMinObjAlignmentInBytes); 8519 } 8520 } 8521 8522 void MacroAssembler::decode_heap_oop(Register r) { 8523 #ifdef ASSERT 8524 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 8525 #endif 8526 if (Universe::narrow_oop_base() == NULL) { 8527 if (Universe::narrow_oop_shift() != 0) { 8528 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8529 shlq(r, LogMinObjAlignmentInBytes); 8530 } 8531 } else { 8532 Label done; 8533 shlq(r, LogMinObjAlignmentInBytes); 8534 jccb(Assembler::equal, done); 8535 addq(r, r12_heapbase); 8536 bind(done); 8537 } 8538 verify_oop(r, "broken oop in decode_heap_oop"); 8539 } 8540 8541 void MacroAssembler::decode_heap_oop_not_null(Register r) { 8542 // Note: it will change flags 8543 assert (UseCompressedOops, "should only be used for compressed headers"); 8544 assert (Universe::heap() != NULL, "java heap should be initialized"); 8545 // Cannot assert, unverified entry point counts instructions (see .ad file) 8546 // vtableStubs also counts instructions in pd_code_size_limit. 8547 // Also do not verify_oop as this is called by verify_oop. 8548 if (Universe::narrow_oop_shift() != 0) { 8549 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8550 shlq(r, LogMinObjAlignmentInBytes); 8551 if (Universe::narrow_oop_base() != NULL) { 8552 addq(r, r12_heapbase); 8553 } 8554 } else { 8555 assert (Universe::narrow_oop_base() == NULL, "sanity"); 8556 } 8557 } 8558 8559 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 8560 // Note: it will change flags 8561 assert (UseCompressedOops, "should only be used for compressed headers"); 8562 assert (Universe::heap() != NULL, "java heap should be initialized"); 8563 // Cannot assert, unverified entry point counts instructions (see .ad file) 8564 // vtableStubs also counts instructions in pd_code_size_limit. 8565 // Also do not verify_oop as this is called by verify_oop. 8566 if (Universe::narrow_oop_shift() != 0) { 8567 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8568 if (LogMinObjAlignmentInBytes == Address::times_8) { 8569 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 8570 } else { 8571 if (dst != src) { 8572 movq(dst, src); 8573 } 8574 shlq(dst, LogMinObjAlignmentInBytes); 8575 if (Universe::narrow_oop_base() != NULL) { 8576 addq(dst, r12_heapbase); 8577 } 8578 } 8579 } else { 8580 assert (Universe::narrow_oop_base() == NULL, "sanity"); 8581 if (dst != src) { 8582 movq(dst, src); 8583 } 8584 } 8585 } 8586 8587 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 8588 assert (UseCompressedOops, "should only be used for compressed headers"); 8589 assert (Universe::heap() != NULL, "java heap should be initialized"); 8590 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8591 int oop_index = oop_recorder()->find_index(obj); 8592 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8593 mov_narrow_oop(dst, oop_index, rspec); 8594 } 8595 8596 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 8597 assert (UseCompressedOops, "should only be used for compressed headers"); 8598 assert (Universe::heap() != NULL, "java heap should be initialized"); 8599 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8600 int oop_index = oop_recorder()->find_index(obj); 8601 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8602 mov_narrow_oop(dst, oop_index, rspec); 8603 } 8604 8605 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 8606 assert (UseCompressedOops, "should only be used for compressed headers"); 8607 assert (Universe::heap() != NULL, "java heap should be initialized"); 8608 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8609 int oop_index = oop_recorder()->find_index(obj); 8610 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8611 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 8612 } 8613 8614 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 8615 assert (UseCompressedOops, "should only be used for compressed headers"); 8616 assert (Universe::heap() != NULL, "java heap should be initialized"); 8617 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8618 int oop_index = oop_recorder()->find_index(obj); 8619 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8620 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 8621 } 8622 8623 void MacroAssembler::reinit_heapbase() { 8624 if (UseCompressedOops) { 8625 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 8626 } 8627 } 8628 #endif // _LP64 8629 8630 // IndexOf for constant substrings with size >= 8 chars 8631 // which don't need to be loaded through stack. 8632 void MacroAssembler::string_indexofC8(Register str1, Register str2, 8633 Register cnt1, Register cnt2, 8634 int int_cnt2, Register result, 8635 XMMRegister vec, Register tmp) { 8636 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 8637 8638 // This method uses pcmpestri inxtruction with bound registers 8639 // inputs: 8640 // xmm - substring 8641 // rax - substring length (elements count) 8642 // mem - scanned string 8643 // rdx - string length (elements count) 8644 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 8645 // outputs: 8646 // rcx - matched index in string 8647 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 8648 8649 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, 8650 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, 8651 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; 8652 8653 // Note, inline_string_indexOf() generates checks: 8654 // if (substr.count > string.count) return -1; 8655 // if (substr.count == 0) return 0; 8656 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); 8657 8658 // Load substring. 8659 movdqu(vec, Address(str2, 0)); 8660 movl(cnt2, int_cnt2); 8661 movptr(result, str1); // string addr 8662 8663 if (int_cnt2 > 8) { 8664 jmpb(SCAN_TO_SUBSTR); 8665 8666 // Reload substr for rescan, this code 8667 // is executed only for large substrings (> 8 chars) 8668 bind(RELOAD_SUBSTR); 8669 movdqu(vec, Address(str2, 0)); 8670 negptr(cnt2); // Jumped here with negative cnt2, convert to positive 8671 8672 bind(RELOAD_STR); 8673 // We came here after the beginning of the substring was 8674 // matched but the rest of it was not so we need to search 8675 // again. Start from the next element after the previous match. 8676 8677 // cnt2 is number of substring reminding elements and 8678 // cnt1 is number of string reminding elements when cmp failed. 8679 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 8680 subl(cnt1, cnt2); 8681 addl(cnt1, int_cnt2); 8682 movl(cnt2, int_cnt2); // Now restore cnt2 8683 8684 decrementl(cnt1); // Shift to next element 8685 cmpl(cnt1, cnt2); 8686 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 8687 8688 addptr(result, 2); 8689 8690 } // (int_cnt2 > 8) 8691 8692 // Scan string for start of substr in 16-byte vectors 8693 bind(SCAN_TO_SUBSTR); 8694 pcmpestri(vec, Address(result, 0), 0x0d); 8695 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 8696 subl(cnt1, 8); 8697 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 8698 cmpl(cnt1, cnt2); 8699 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 8700 addptr(result, 16); 8701 jmpb(SCAN_TO_SUBSTR); 8702 8703 // Found a potential substr 8704 bind(FOUND_CANDIDATE); 8705 // Matched whole vector if first element matched (tmp(rcx) == 0). 8706 if (int_cnt2 == 8) { 8707 jccb(Assembler::overflow, RET_FOUND); // OF == 1 8708 } else { // int_cnt2 > 8 8709 jccb(Assembler::overflow, FOUND_SUBSTR); 8710 } 8711 // After pcmpestri tmp(rcx) contains matched element index 8712 // Compute start addr of substr 8713 lea(result, Address(result, tmp, Address::times_2)); 8714 8715 // Make sure string is still long enough 8716 subl(cnt1, tmp); 8717 cmpl(cnt1, cnt2); 8718 if (int_cnt2 == 8) { 8719 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 8720 } else { // int_cnt2 > 8 8721 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); 8722 } 8723 // Left less then substring. 8724 8725 bind(RET_NOT_FOUND); 8726 movl(result, -1); 8727 jmpb(EXIT); 8728 8729 if (int_cnt2 > 8) { 8730 // This code is optimized for the case when whole substring 8731 // is matched if its head is matched. 8732 bind(MATCH_SUBSTR_HEAD); 8733 pcmpestri(vec, Address(result, 0), 0x0d); 8734 // Reload only string if does not match 8735 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 8736 8737 Label CONT_SCAN_SUBSTR; 8738 // Compare the rest of substring (> 8 chars). 8739 bind(FOUND_SUBSTR); 8740 // First 8 chars are already matched. 8741 negptr(cnt2); 8742 addptr(cnt2, 8); 8743 8744 bind(SCAN_SUBSTR); 8745 subl(cnt1, 8); 8746 cmpl(cnt2, -8); // Do not read beyond substring 8747 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); 8748 // Back-up strings to avoid reading beyond substring: 8749 // cnt1 = cnt1 - cnt2 + 8 8750 addl(cnt1, cnt2); // cnt2 is negative 8751 addl(cnt1, 8); 8752 movl(cnt2, 8); negptr(cnt2); 8753 bind(CONT_SCAN_SUBSTR); 8754 if (int_cnt2 < (int)G) { 8755 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); 8756 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); 8757 } else { 8758 // calculate index in register to avoid integer overflow (int_cnt2*2) 8759 movl(tmp, int_cnt2); 8760 addptr(tmp, cnt2); 8761 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); 8762 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 8763 } 8764 // Need to reload strings pointers if not matched whole vector 8765 jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 8766 addptr(cnt2, 8); 8767 jccb(Assembler::negative, SCAN_SUBSTR); 8768 // Fall through if found full substring 8769 8770 } // (int_cnt2 > 8) 8771 8772 bind(RET_FOUND); 8773 // Found result if we matched full small substring. 8774 // Compute substr offset 8775 subptr(result, str1); 8776 shrl(result, 1); // index 8777 bind(EXIT); 8778 8779 } // string_indexofC8 8780 8781 // Small strings are loaded through stack if they cross page boundary. 8782 void MacroAssembler::string_indexof(Register str1, Register str2, 8783 Register cnt1, Register cnt2, 8784 int int_cnt2, Register result, 8785 XMMRegister vec, Register tmp) { 8786 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 8787 // 8788 // int_cnt2 is length of small (< 8 chars) constant substring 8789 // or (-1) for non constant substring in which case its length 8790 // is in cnt2 register. 8791 // 8792 // Note, inline_string_indexOf() generates checks: 8793 // if (substr.count > string.count) return -1; 8794 // if (substr.count == 0) return 0; 8795 // 8796 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); 8797 8798 // This method uses pcmpestri inxtruction with bound registers 8799 // inputs: 8800 // xmm - substring 8801 // rax - substring length (elements count) 8802 // mem - scanned string 8803 // rdx - string length (elements count) 8804 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 8805 // outputs: 8806 // rcx - matched index in string 8807 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 8808 8809 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, 8810 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, 8811 FOUND_CANDIDATE; 8812 8813 { //======================================================== 8814 // We don't know where these strings are located 8815 // and we can't read beyond them. Load them through stack. 8816 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; 8817 8818 movptr(tmp, rsp); // save old SP 8819 8820 if (int_cnt2 > 0) { // small (< 8 chars) constant substring 8821 if (int_cnt2 == 1) { // One char 8822 load_unsigned_short(result, Address(str2, 0)); 8823 movdl(vec, result); // move 32 bits 8824 } else if (int_cnt2 == 2) { // Two chars 8825 movdl(vec, Address(str2, 0)); // move 32 bits 8826 } else if (int_cnt2 == 4) { // Four chars 8827 movq(vec, Address(str2, 0)); // move 64 bits 8828 } else { // cnt2 = { 3, 5, 6, 7 } 8829 // Array header size is 12 bytes in 32-bit VM 8830 // + 6 bytes for 3 chars == 18 bytes, 8831 // enough space to load vec and shift. 8832 assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity"); 8833 movdqu(vec, Address(str2, (int_cnt2*2)-16)); 8834 psrldq(vec, 16-(int_cnt2*2)); 8835 } 8836 } else { // not constant substring 8837 cmpl(cnt2, 8); 8838 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough 8839 8840 // We can read beyond string if srt+16 does not cross page boundary 8841 // since heaps are aligned and mapped by pages. 8842 assert(os::vm_page_size() < (int)G, "default page should be small"); 8843 movl(result, str2); // We need only low 32 bits 8844 andl(result, (os::vm_page_size()-1)); 8845 cmpl(result, (os::vm_page_size()-16)); 8846 jccb(Assembler::belowEqual, CHECK_STR); 8847 8848 // Move small strings to stack to allow load 16 bytes into vec. 8849 subptr(rsp, 16); 8850 int stk_offset = wordSize-2; 8851 push(cnt2); 8852 8853 bind(COPY_SUBSTR); 8854 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); 8855 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 8856 decrement(cnt2); 8857 jccb(Assembler::notZero, COPY_SUBSTR); 8858 8859 pop(cnt2); 8860 movptr(str2, rsp); // New substring address 8861 } // non constant 8862 8863 bind(CHECK_STR); 8864 cmpl(cnt1, 8); 8865 jccb(Assembler::aboveEqual, BIG_STRINGS); 8866 8867 // Check cross page boundary. 8868 movl(result, str1); // We need only low 32 bits 8869 andl(result, (os::vm_page_size()-1)); 8870 cmpl(result, (os::vm_page_size()-16)); 8871 jccb(Assembler::belowEqual, BIG_STRINGS); 8872 8873 subptr(rsp, 16); 8874 int stk_offset = -2; 8875 if (int_cnt2 < 0) { // not constant 8876 push(cnt2); 8877 stk_offset += wordSize; 8878 } 8879 movl(cnt2, cnt1); 8880 8881 bind(COPY_STR); 8882 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); 8883 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 8884 decrement(cnt2); 8885 jccb(Assembler::notZero, COPY_STR); 8886 8887 if (int_cnt2 < 0) { // not constant 8888 pop(cnt2); 8889 } 8890 movptr(str1, rsp); // New string address 8891 8892 bind(BIG_STRINGS); 8893 // Load substring. 8894 if (int_cnt2 < 0) { // -1 8895 movdqu(vec, Address(str2, 0)); 8896 push(cnt2); // substr count 8897 push(str2); // substr addr 8898 push(str1); // string addr 8899 } else { 8900 // Small (< 8 chars) constant substrings are loaded already. 8901 movl(cnt2, int_cnt2); 8902 } 8903 push(tmp); // original SP 8904 8905 } // Finished loading 8906 8907 //======================================================== 8908 // Start search 8909 // 8910 8911 movptr(result, str1); // string addr 8912 8913 if (int_cnt2 < 0) { // Only for non constant substring 8914 jmpb(SCAN_TO_SUBSTR); 8915 8916 // SP saved at sp+0 8917 // String saved at sp+1*wordSize 8918 // Substr saved at sp+2*wordSize 8919 // Substr count saved at sp+3*wordSize 8920 8921 // Reload substr for rescan, this code 8922 // is executed only for large substrings (> 8 chars) 8923 bind(RELOAD_SUBSTR); 8924 movptr(str2, Address(rsp, 2*wordSize)); 8925 movl(cnt2, Address(rsp, 3*wordSize)); 8926 movdqu(vec, Address(str2, 0)); 8927 // We came here after the beginning of the substring was 8928 // matched but the rest of it was not so we need to search 8929 // again. Start from the next element after the previous match. 8930 subptr(str1, result); // Restore counter 8931 shrl(str1, 1); 8932 addl(cnt1, str1); 8933 decrementl(cnt1); // Shift to next element 8934 cmpl(cnt1, cnt2); 8935 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 8936 8937 addptr(result, 2); 8938 } // non constant 8939 8940 // Scan string for start of substr in 16-byte vectors 8941 bind(SCAN_TO_SUBSTR); 8942 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 8943 pcmpestri(vec, Address(result, 0), 0x0d); 8944 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 8945 subl(cnt1, 8); 8946 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 8947 cmpl(cnt1, cnt2); 8948 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 8949 addptr(result, 16); 8950 8951 bind(ADJUST_STR); 8952 cmpl(cnt1, 8); // Do not read beyond string 8953 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 8954 // Back-up string to avoid reading beyond string. 8955 lea(result, Address(result, cnt1, Address::times_2, -16)); 8956 movl(cnt1, 8); 8957 jmpb(SCAN_TO_SUBSTR); 8958 8959 // Found a potential substr 8960 bind(FOUND_CANDIDATE); 8961 // After pcmpestri tmp(rcx) contains matched element index 8962 8963 // Make sure string is still long enough 8964 subl(cnt1, tmp); 8965 cmpl(cnt1, cnt2); 8966 jccb(Assembler::greaterEqual, FOUND_SUBSTR); 8967 // Left less then substring. 8968 8969 bind(RET_NOT_FOUND); 8970 movl(result, -1); 8971 jmpb(CLEANUP); 8972 8973 bind(FOUND_SUBSTR); 8974 // Compute start addr of substr 8975 lea(result, Address(result, tmp, Address::times_2)); 8976 8977 if (int_cnt2 > 0) { // Constant substring 8978 // Repeat search for small substring (< 8 chars) 8979 // from new point without reloading substring. 8980 // Have to check that we don't read beyond string. 8981 cmpl(tmp, 8-int_cnt2); 8982 jccb(Assembler::greater, ADJUST_STR); 8983 // Fall through if matched whole substring. 8984 } else { // non constant 8985 assert(int_cnt2 == -1, "should be != 0"); 8986 8987 addl(tmp, cnt2); 8988 // Found result if we matched whole substring. 8989 cmpl(tmp, 8); 8990 jccb(Assembler::lessEqual, RET_FOUND); 8991 8992 // Repeat search for small substring (<= 8 chars) 8993 // from new point 'str1' without reloading substring. 8994 cmpl(cnt2, 8); 8995 // Have to check that we don't read beyond string. 8996 jccb(Assembler::lessEqual, ADJUST_STR); 8997 8998 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; 8999 // Compare the rest of substring (> 8 chars). 9000 movptr(str1, result); 9001 9002 cmpl(tmp, cnt2); 9003 // First 8 chars are already matched. 9004 jccb(Assembler::equal, CHECK_NEXT); 9005 9006 bind(SCAN_SUBSTR); 9007 pcmpestri(vec, Address(str1, 0), 0x0d); 9008 // Need to reload strings pointers if not matched whole vector 9009 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 9010 9011 bind(CHECK_NEXT); 9012 subl(cnt2, 8); 9013 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring 9014 addptr(str1, 16); 9015 addptr(str2, 16); 9016 subl(cnt1, 8); 9017 cmpl(cnt2, 8); // Do not read beyond substring 9018 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); 9019 // Back-up strings to avoid reading beyond substring. 9020 lea(str2, Address(str2, cnt2, Address::times_2, -16)); 9021 lea(str1, Address(str1, cnt2, Address::times_2, -16)); 9022 subl(cnt1, cnt2); 9023 movl(cnt2, 8); 9024 addl(cnt1, 8); 9025 bind(CONT_SCAN_SUBSTR); 9026 movdqu(vec, Address(str2, 0)); 9027 jmpb(SCAN_SUBSTR); 9028 9029 bind(RET_FOUND_LONG); 9030 movptr(str1, Address(rsp, wordSize)); 9031 } // non constant 9032 9033 bind(RET_FOUND); 9034 // Compute substr offset 9035 subptr(result, str1); 9036 shrl(result, 1); // index 9037 9038 bind(CLEANUP); 9039 pop(rsp); // restore SP 9040 9041 } // string_indexof 9042 9043 // Compare strings. 9044 void MacroAssembler::string_compare(Register str1, Register str2, 9045 Register cnt1, Register cnt2, Register result, 9046 XMMRegister vec1) { 9047 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 9048 9049 // Compute the minimum of the string lengths and the 9050 // difference of the string lengths (stack). 9051 // Do the conditional move stuff 9052 movl(result, cnt1); 9053 subl(cnt1, cnt2); 9054 push(cnt1); 9055 cmov32(Assembler::lessEqual, cnt2, result); 9056 9057 // Is the minimum length zero? 9058 testl(cnt2, cnt2); 9059 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 9060 9061 // Load first characters 9062 load_unsigned_short(result, Address(str1, 0)); 9063 load_unsigned_short(cnt1, Address(str2, 0)); 9064 9065 // Compare first characters 9066 subl(result, cnt1); 9067 jcc(Assembler::notZero, POP_LABEL); 9068 decrementl(cnt2); 9069 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 9070 9071 { 9072 // Check after comparing first character to see if strings are equivalent 9073 Label LSkip2; 9074 // Check if the strings start at same location 9075 cmpptr(str1, str2); 9076 jccb(Assembler::notEqual, LSkip2); 9077 9078 // Check if the length difference is zero (from stack) 9079 cmpl(Address(rsp, 0), 0x0); 9080 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 9081 9082 // Strings might not be equivalent 9083 bind(LSkip2); 9084 } 9085 9086 Address::ScaleFactor scale = Address::times_2; 9087 int stride = 8; 9088 9089 // Advance to next element 9090 addptr(str1, 16/stride); 9091 addptr(str2, 16/stride); 9092 9093 if (UseSSE42Intrinsics) { 9094 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 9095 int pcmpmask = 0x19; 9096 // Setup to compare 16-byte vectors 9097 movl(result, cnt2); 9098 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 9099 jccb(Assembler::zero, COMPARE_TAIL); 9100 9101 lea(str1, Address(str1, result, scale)); 9102 lea(str2, Address(str2, result, scale)); 9103 negptr(result); 9104 9105 // pcmpestri 9106 // inputs: 9107 // vec1- substring 9108 // rax - negative string length (elements count) 9109 // mem - scaned string 9110 // rdx - string length (elements count) 9111 // pcmpmask - cmp mode: 11000 (string compare with negated result) 9112 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 9113 // outputs: 9114 // rcx - first mismatched element index 9115 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 9116 9117 bind(COMPARE_WIDE_VECTORS); 9118 movdqu(vec1, Address(str1, result, scale)); 9119 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 9120 // After pcmpestri cnt1(rcx) contains mismatched element index 9121 9122 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 9123 addptr(result, stride); 9124 subptr(cnt2, stride); 9125 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); 9126 9127 // compare wide vectors tail 9128 testl(result, result); 9129 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 9130 9131 movl(cnt2, stride); 9132 movl(result, stride); 9133 negptr(result); 9134 movdqu(vec1, Address(str1, result, scale)); 9135 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 9136 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); 9137 9138 // Mismatched characters in the vectors 9139 bind(VECTOR_NOT_EQUAL); 9140 addptr(result, cnt1); 9141 movptr(cnt2, result); 9142 load_unsigned_short(result, Address(str1, cnt2, scale)); 9143 load_unsigned_short(cnt1, Address(str2, cnt2, scale)); 9144 subl(result, cnt1); 9145 jmpb(POP_LABEL); 9146 9147 bind(COMPARE_TAIL); // limit is zero 9148 movl(cnt2, result); 9149 // Fallthru to tail compare 9150 } 9151 9152 // Shift str2 and str1 to the end of the arrays, negate min 9153 lea(str1, Address(str1, cnt2, scale, 0)); 9154 lea(str2, Address(str2, cnt2, scale, 0)); 9155 negptr(cnt2); 9156 9157 // Compare the rest of the elements 9158 bind(WHILE_HEAD_LABEL); 9159 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); 9160 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); 9161 subl(result, cnt1); 9162 jccb(Assembler::notZero, POP_LABEL); 9163 increment(cnt2); 9164 jccb(Assembler::notZero, WHILE_HEAD_LABEL); 9165 9166 // Strings are equal up to min length. Return the length difference. 9167 bind(LENGTH_DIFF_LABEL); 9168 pop(result); 9169 jmpb(DONE_LABEL); 9170 9171 // Discard the stored length difference 9172 bind(POP_LABEL); 9173 pop(cnt1); 9174 9175 // That's it 9176 bind(DONE_LABEL); 9177 } 9178 9179 // Compare char[] arrays aligned to 4 bytes or substrings. 9180 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 9181 Register limit, Register result, Register chr, 9182 XMMRegister vec1, XMMRegister vec2) { 9183 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 9184 9185 int length_offset = arrayOopDesc::length_offset_in_bytes(); 9186 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 9187 9188 // Check the input args 9189 cmpptr(ary1, ary2); 9190 jcc(Assembler::equal, TRUE_LABEL); 9191 9192 if (is_array_equ) { 9193 // Need additional checks for arrays_equals. 9194 testptr(ary1, ary1); 9195 jcc(Assembler::zero, FALSE_LABEL); 9196 testptr(ary2, ary2); 9197 jcc(Assembler::zero, FALSE_LABEL); 9198 9199 // Check the lengths 9200 movl(limit, Address(ary1, length_offset)); 9201 cmpl(limit, Address(ary2, length_offset)); 9202 jcc(Assembler::notEqual, FALSE_LABEL); 9203 } 9204 9205 // count == 0 9206 testl(limit, limit); 9207 jcc(Assembler::zero, TRUE_LABEL); 9208 9209 if (is_array_equ) { 9210 // Load array address 9211 lea(ary1, Address(ary1, base_offset)); 9212 lea(ary2, Address(ary2, base_offset)); 9213 } 9214 9215 shll(limit, 1); // byte count != 0 9216 movl(result, limit); // copy 9217 9218 if (UseSSE42Intrinsics) { 9219 // With SSE4.2, use double quad vector compare 9220 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 9221 9222 // Compare 16-byte vectors 9223 andl(result, 0x0000000e); // tail count (in bytes) 9224 andl(limit, 0xfffffff0); // vector count (in bytes) 9225 jccb(Assembler::zero, COMPARE_TAIL); 9226 9227 lea(ary1, Address(ary1, limit, Address::times_1)); 9228 lea(ary2, Address(ary2, limit, Address::times_1)); 9229 negptr(limit); 9230 9231 bind(COMPARE_WIDE_VECTORS); 9232 movdqu(vec1, Address(ary1, limit, Address::times_1)); 9233 movdqu(vec2, Address(ary2, limit, Address::times_1)); 9234 pxor(vec1, vec2); 9235 9236 ptest(vec1, vec1); 9237 jccb(Assembler::notZero, FALSE_LABEL); 9238 addptr(limit, 16); 9239 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 9240 9241 testl(result, result); 9242 jccb(Assembler::zero, TRUE_LABEL); 9243 9244 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); 9245 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); 9246 pxor(vec1, vec2); 9247 9248 ptest(vec1, vec1); 9249 jccb(Assembler::notZero, FALSE_LABEL); 9250 jmpb(TRUE_LABEL); 9251 9252 bind(COMPARE_TAIL); // limit is zero 9253 movl(limit, result); 9254 // Fallthru to tail compare 9255 } 9256 9257 // Compare 4-byte vectors 9258 andl(limit, 0xfffffffc); // vector count (in bytes) 9259 jccb(Assembler::zero, COMPARE_CHAR); 9260 9261 lea(ary1, Address(ary1, limit, Address::times_1)); 9262 lea(ary2, Address(ary2, limit, Address::times_1)); 9263 negptr(limit); 9264 9265 bind(COMPARE_VECTORS); 9266 movl(chr, Address(ary1, limit, Address::times_1)); 9267 cmpl(chr, Address(ary2, limit, Address::times_1)); 9268 jccb(Assembler::notEqual, FALSE_LABEL); 9269 addptr(limit, 4); 9270 jcc(Assembler::notZero, COMPARE_VECTORS); 9271 9272 // Compare trailing char (final 2 bytes), if any 9273 bind(COMPARE_CHAR); 9274 testl(result, 0x2); // tail char 9275 jccb(Assembler::zero, TRUE_LABEL); 9276 load_unsigned_short(chr, Address(ary1, 0)); 9277 load_unsigned_short(limit, Address(ary2, 0)); 9278 cmpl(chr, limit); 9279 jccb(Assembler::notEqual, FALSE_LABEL); 9280 9281 bind(TRUE_LABEL); 9282 movl(result, 1); // return true 9283 jmpb(DONE); 9284 9285 bind(FALSE_LABEL); 9286 xorl(result, result); // return false 9287 9288 // That's it 9289 bind(DONE); 9290 } 9291 9292 #ifdef PRODUCT 9293 #define BLOCK_COMMENT(str) /* nothing */ 9294 #else 9295 #define BLOCK_COMMENT(str) block_comment(str) 9296 #endif 9297 9298 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 9299 void MacroAssembler::generate_fill(BasicType t, bool aligned, 9300 Register to, Register value, Register count, 9301 Register rtmp, XMMRegister xtmp) { 9302 assert_different_registers(to, value, count, rtmp); 9303 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 9304 Label L_fill_2_bytes, L_fill_4_bytes; 9305 9306 int shift = -1; 9307 switch (t) { 9308 case T_BYTE: 9309 shift = 2; 9310 break; 9311 case T_SHORT: 9312 shift = 1; 9313 break; 9314 case T_INT: 9315 shift = 0; 9316 break; 9317 default: ShouldNotReachHere(); 9318 } 9319 9320 if (t == T_BYTE) { 9321 andl(value, 0xff); 9322 movl(rtmp, value); 9323 shll(rtmp, 8); 9324 orl(value, rtmp); 9325 } 9326 if (t == T_SHORT) { 9327 andl(value, 0xffff); 9328 } 9329 if (t == T_BYTE || t == T_SHORT) { 9330 movl(rtmp, value); 9331 shll(rtmp, 16); 9332 orl(value, rtmp); 9333 } 9334 9335 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 9336 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 9337 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 9338 // align source address at 4 bytes address boundary 9339 if (t == T_BYTE) { 9340 // One byte misalignment happens only for byte arrays 9341 testptr(to, 1); 9342 jccb(Assembler::zero, L_skip_align1); 9343 movb(Address(to, 0), value); 9344 increment(to); 9345 decrement(count); 9346 BIND(L_skip_align1); 9347 } 9348 // Two bytes misalignment happens only for byte and short (char) arrays 9349 testptr(to, 2); 9350 jccb(Assembler::zero, L_skip_align2); 9351 movw(Address(to, 0), value); 9352 addptr(to, 2); 9353 subl(count, 1<<(shift-1)); 9354 BIND(L_skip_align2); 9355 } 9356 if (UseSSE < 2) { 9357 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 9358 // Fill 32-byte chunks 9359 subl(count, 8 << shift); 9360 jcc(Assembler::less, L_check_fill_8_bytes); 9361 align(16); 9362 9363 BIND(L_fill_32_bytes_loop); 9364 9365 for (int i = 0; i < 32; i += 4) { 9366 movl(Address(to, i), value); 9367 } 9368 9369 addptr(to, 32); 9370 subl(count, 8 << shift); 9371 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 9372 BIND(L_check_fill_8_bytes); 9373 addl(count, 8 << shift); 9374 jccb(Assembler::zero, L_exit); 9375 jmpb(L_fill_8_bytes); 9376 9377 // 9378 // length is too short, just fill qwords 9379 // 9380 BIND(L_fill_8_bytes_loop); 9381 movl(Address(to, 0), value); 9382 movl(Address(to, 4), value); 9383 addptr(to, 8); 9384 BIND(L_fill_8_bytes); 9385 subl(count, 1 << (shift + 1)); 9386 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 9387 // fall through to fill 4 bytes 9388 } else { 9389 Label L_fill_32_bytes; 9390 if (!UseUnalignedLoadStores) { 9391 // align to 8 bytes, we know we are 4 byte aligned to start 9392 testptr(to, 4); 9393 jccb(Assembler::zero, L_fill_32_bytes); 9394 movl(Address(to, 0), value); 9395 addptr(to, 4); 9396 subl(count, 1<<shift); 9397 } 9398 BIND(L_fill_32_bytes); 9399 { 9400 assert( UseSSE >= 2, "supported cpu only" ); 9401 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 9402 // Fill 32-byte chunks 9403 movdl(xtmp, value); 9404 pshufd(xtmp, xtmp, 0); 9405 9406 subl(count, 8 << shift); 9407 jcc(Assembler::less, L_check_fill_8_bytes); 9408 align(16); 9409 9410 BIND(L_fill_32_bytes_loop); 9411 9412 if (UseUnalignedLoadStores) { 9413 movdqu(Address(to, 0), xtmp); 9414 movdqu(Address(to, 16), xtmp); 9415 } else { 9416 movq(Address(to, 0), xtmp); 9417 movq(Address(to, 8), xtmp); 9418 movq(Address(to, 16), xtmp); 9419 movq(Address(to, 24), xtmp); 9420 } 9421 9422 addptr(to, 32); 9423 subl(count, 8 << shift); 9424 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 9425 BIND(L_check_fill_8_bytes); 9426 addl(count, 8 << shift); 9427 jccb(Assembler::zero, L_exit); 9428 jmpb(L_fill_8_bytes); 9429 9430 // 9431 // length is too short, just fill qwords 9432 // 9433 BIND(L_fill_8_bytes_loop); 9434 movq(Address(to, 0), xtmp); 9435 addptr(to, 8); 9436 BIND(L_fill_8_bytes); 9437 subl(count, 1 << (shift + 1)); 9438 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 9439 } 9440 } 9441 // fill trailing 4 bytes 9442 BIND(L_fill_4_bytes); 9443 testl(count, 1<<shift); 9444 jccb(Assembler::zero, L_fill_2_bytes); 9445 movl(Address(to, 0), value); 9446 if (t == T_BYTE || t == T_SHORT) { 9447 addptr(to, 4); 9448 BIND(L_fill_2_bytes); 9449 // fill trailing 2 bytes 9450 testl(count, 1<<(shift-1)); 9451 jccb(Assembler::zero, L_fill_byte); 9452 movw(Address(to, 0), value); 9453 if (t == T_BYTE) { 9454 addptr(to, 2); 9455 BIND(L_fill_byte); 9456 // fill trailing byte 9457 testl(count, 1); 9458 jccb(Assembler::zero, L_exit); 9459 movb(Address(to, 0), value); 9460 } else { 9461 BIND(L_fill_byte); 9462 } 9463 } else { 9464 BIND(L_fill_2_bytes); 9465 } 9466 BIND(L_exit); 9467 } 9468 #undef BIND 9469 #undef BLOCK_COMMENT 9470 9471 9472 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 9473 switch (cond) { 9474 // Note some conditions are synonyms for others 9475 case Assembler::zero: return Assembler::notZero; 9476 case Assembler::notZero: return Assembler::zero; 9477 case Assembler::less: return Assembler::greaterEqual; 9478 case Assembler::lessEqual: return Assembler::greater; 9479 case Assembler::greater: return Assembler::lessEqual; 9480 case Assembler::greaterEqual: return Assembler::less; 9481 case Assembler::below: return Assembler::aboveEqual; 9482 case Assembler::belowEqual: return Assembler::above; 9483 case Assembler::above: return Assembler::belowEqual; 9484 case Assembler::aboveEqual: return Assembler::below; 9485 case Assembler::overflow: return Assembler::noOverflow; 9486 case Assembler::noOverflow: return Assembler::overflow; 9487 case Assembler::negative: return Assembler::positive; 9488 case Assembler::positive: return Assembler::negative; 9489 case Assembler::parity: return Assembler::noParity; 9490 case Assembler::noParity: return Assembler::parity; 9491 } 9492 ShouldNotReachHere(); return Assembler::overflow; 9493 } 9494 9495 SkipIfEqual::SkipIfEqual( 9496 MacroAssembler* masm, const bool* flag_addr, bool value) { 9497 _masm = masm; 9498 _masm->cmp8(ExternalAddress((address)flag_addr), value); 9499 _masm->jcc(Assembler::equal, _label); 9500 } 9501 9502 SkipIfEqual::~SkipIfEqual() { 9503 _masm->bind(_label); 9504 }