1 /* 2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "assembler_x86.inline.hpp" 27 #include "gc_interface/collectedHeap.inline.hpp" 28 #include "interpreter/interpreter.hpp" 29 #include "memory/cardTableModRefBS.hpp" 30 #include "memory/resourceArea.hpp" 31 #include "prims/methodHandles.hpp" 32 #include "runtime/biasedLocking.hpp" 33 #include "runtime/interfaceSupport.hpp" 34 #include "runtime/objectMonitor.hpp" 35 #include "runtime/os.hpp" 36 #include "runtime/sharedRuntime.hpp" 37 #include "runtime/stubRoutines.hpp" 38 #ifndef SERIALGC 39 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 40 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 41 #include "gc_implementation/g1/heapRegion.hpp" 42 #endif 43 44 // Implementation of AddressLiteral 45 46 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { 47 _is_lval = false; 48 _target = target; 49 switch (rtype) { 50 case relocInfo::oop_type: 51 // Oops are a special case. Normally they would be their own section 52 // but in cases like icBuffer they are literals in the code stream that 53 // we don't have a section for. We use none so that we get a literal address 54 // which is always patchable. 55 break; 56 case relocInfo::external_word_type: 57 _rspec = external_word_Relocation::spec(target); 58 break; 59 case relocInfo::internal_word_type: 60 _rspec = internal_word_Relocation::spec(target); 61 break; 62 case relocInfo::opt_virtual_call_type: 63 _rspec = opt_virtual_call_Relocation::spec(); 64 break; 65 case relocInfo::static_call_type: 66 _rspec = static_call_Relocation::spec(); 67 break; 68 case relocInfo::runtime_call_type: 69 _rspec = runtime_call_Relocation::spec(); 70 break; 71 case relocInfo::poll_type: 72 case relocInfo::poll_return_type: 73 _rspec = Relocation::spec_simple(rtype); 74 break; 75 case relocInfo::none: 76 break; 77 default: 78 ShouldNotReachHere(); 79 break; 80 } 81 } 82 83 // Implementation of Address 84 85 #ifdef _LP64 86 87 Address Address::make_array(ArrayAddress adr) { 88 // Not implementable on 64bit machines 89 // Should have been handled higher up the call chain. 90 ShouldNotReachHere(); 91 return Address(); 92 } 93 94 // exceedingly dangerous constructor 95 Address::Address(int disp, address loc, relocInfo::relocType rtype) { 96 _base = noreg; 97 _index = noreg; 98 _scale = no_scale; 99 _disp = disp; 100 switch (rtype) { 101 case relocInfo::external_word_type: 102 _rspec = external_word_Relocation::spec(loc); 103 break; 104 case relocInfo::internal_word_type: 105 _rspec = internal_word_Relocation::spec(loc); 106 break; 107 case relocInfo::runtime_call_type: 108 // HMM 109 _rspec = runtime_call_Relocation::spec(); 110 break; 111 case relocInfo::poll_type: 112 case relocInfo::poll_return_type: 113 _rspec = Relocation::spec_simple(rtype); 114 break; 115 case relocInfo::none: 116 break; 117 default: 118 ShouldNotReachHere(); 119 } 120 } 121 #else // LP64 122 123 Address Address::make_array(ArrayAddress adr) { 124 AddressLiteral base = adr.base(); 125 Address index = adr.index(); 126 assert(index._disp == 0, "must not have disp"); // maybe it can? 127 Address array(index._base, index._index, index._scale, (intptr_t) base.target()); 128 array._rspec = base._rspec; 129 return array; 130 } 131 132 // exceedingly dangerous constructor 133 Address::Address(address loc, RelocationHolder spec) { 134 _base = noreg; 135 _index = noreg; 136 _scale = no_scale; 137 _disp = (intptr_t) loc; 138 _rspec = spec; 139 } 140 141 #endif // _LP64 142 143 144 145 // Convert the raw encoding form into the form expected by the constructor for 146 // Address. An index of 4 (rsp) corresponds to having no index, so convert 147 // that to noreg for the Address constructor. 148 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) { 149 RelocationHolder rspec; 150 if (disp_is_oop) { 151 rspec = Relocation::spec_simple(relocInfo::oop_type); 152 } 153 bool valid_index = index != rsp->encoding(); 154 if (valid_index) { 155 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); 156 madr._rspec = rspec; 157 return madr; 158 } else { 159 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); 160 madr._rspec = rspec; 161 return madr; 162 } 163 } 164 165 // Implementation of Assembler 166 167 int AbstractAssembler::code_fill_byte() { 168 return (u_char)'\xF4'; // hlt 169 } 170 171 // make this go away someday 172 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { 173 if (rtype == relocInfo::none) 174 emit_long(data); 175 else emit_data(data, Relocation::spec_simple(rtype), format); 176 } 177 178 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { 179 assert(imm_operand == 0, "default format must be immediate in this file"); 180 assert(inst_mark() != NULL, "must be inside InstructionMark"); 181 if (rspec.type() != relocInfo::none) { 182 #ifdef ASSERT 183 check_relocation(rspec, format); 184 #endif 185 // Do not use AbstractAssembler::relocate, which is not intended for 186 // embedded words. Instead, relocate to the enclosing instruction. 187 188 // hack. call32 is too wide for mask so use disp32 189 if (format == call32_operand) 190 code_section()->relocate(inst_mark(), rspec, disp32_operand); 191 else 192 code_section()->relocate(inst_mark(), rspec, format); 193 } 194 emit_long(data); 195 } 196 197 static int encode(Register r) { 198 int enc = r->encoding(); 199 if (enc >= 8) { 200 enc -= 8; 201 } 202 return enc; 203 } 204 205 static int encode(XMMRegister r) { 206 int enc = r->encoding(); 207 if (enc >= 8) { 208 enc -= 8; 209 } 210 return enc; 211 } 212 213 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { 214 assert(dst->has_byte_register(), "must have byte register"); 215 assert(isByte(op1) && isByte(op2), "wrong opcode"); 216 assert(isByte(imm8), "not a byte"); 217 assert((op1 & 0x01) == 0, "should be 8bit operation"); 218 emit_byte(op1); 219 emit_byte(op2 | encode(dst)); 220 emit_byte(imm8); 221 } 222 223 224 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 225 assert(isByte(op1) && isByte(op2), "wrong opcode"); 226 assert((op1 & 0x01) == 1, "should be 32bit operation"); 227 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 228 if (is8bit(imm32)) { 229 emit_byte(op1 | 0x02); // set sign bit 230 emit_byte(op2 | encode(dst)); 231 emit_byte(imm32 & 0xFF); 232 } else { 233 emit_byte(op1); 234 emit_byte(op2 | encode(dst)); 235 emit_long(imm32); 236 } 237 } 238 239 // immediate-to-memory forms 240 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 241 assert((op1 & 0x01) == 1, "should be 32bit operation"); 242 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 243 if (is8bit(imm32)) { 244 emit_byte(op1 | 0x02); // set sign bit 245 emit_operand(rm, adr, 1); 246 emit_byte(imm32 & 0xFF); 247 } else { 248 emit_byte(op1); 249 emit_operand(rm, adr, 4); 250 emit_long(imm32); 251 } 252 } 253 254 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) { 255 LP64_ONLY(ShouldNotReachHere()); 256 assert(isByte(op1) && isByte(op2), "wrong opcode"); 257 assert((op1 & 0x01) == 1, "should be 32bit operation"); 258 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 259 InstructionMark im(this); 260 emit_byte(op1); 261 emit_byte(op2 | encode(dst)); 262 emit_data((intptr_t)obj, relocInfo::oop_type, 0); 263 } 264 265 266 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { 267 assert(isByte(op1) && isByte(op2), "wrong opcode"); 268 emit_byte(op1); 269 emit_byte(op2 | encode(dst) << 3 | encode(src)); 270 } 271 272 273 void Assembler::emit_operand(Register reg, Register base, Register index, 274 Address::ScaleFactor scale, int disp, 275 RelocationHolder const& rspec, 276 int rip_relative_correction) { 277 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type(); 278 279 // Encode the registers as needed in the fields they are used in 280 281 int regenc = encode(reg) << 3; 282 int indexenc = index->is_valid() ? encode(index) << 3 : 0; 283 int baseenc = base->is_valid() ? encode(base) : 0; 284 285 if (base->is_valid()) { 286 if (index->is_valid()) { 287 assert(scale != Address::no_scale, "inconsistent address"); 288 // [base + index*scale + disp] 289 if (disp == 0 && rtype == relocInfo::none && 290 base != rbp LP64_ONLY(&& base != r13)) { 291 // [base + index*scale] 292 // [00 reg 100][ss index base] 293 assert(index != rsp, "illegal addressing mode"); 294 emit_byte(0x04 | regenc); 295 emit_byte(scale << 6 | indexenc | baseenc); 296 } else if (is8bit(disp) && rtype == relocInfo::none) { 297 // [base + index*scale + imm8] 298 // [01 reg 100][ss index base] imm8 299 assert(index != rsp, "illegal addressing mode"); 300 emit_byte(0x44 | regenc); 301 emit_byte(scale << 6 | indexenc | baseenc); 302 emit_byte(disp & 0xFF); 303 } else { 304 // [base + index*scale + disp32] 305 // [10 reg 100][ss index base] disp32 306 assert(index != rsp, "illegal addressing mode"); 307 emit_byte(0x84 | regenc); 308 emit_byte(scale << 6 | indexenc | baseenc); 309 emit_data(disp, rspec, disp32_operand); 310 } 311 } else if (base == rsp LP64_ONLY(|| base == r12)) { 312 // [rsp + disp] 313 if (disp == 0 && rtype == relocInfo::none) { 314 // [rsp] 315 // [00 reg 100][00 100 100] 316 emit_byte(0x04 | regenc); 317 emit_byte(0x24); 318 } else if (is8bit(disp) && rtype == relocInfo::none) { 319 // [rsp + imm8] 320 // [01 reg 100][00 100 100] disp8 321 emit_byte(0x44 | regenc); 322 emit_byte(0x24); 323 emit_byte(disp & 0xFF); 324 } else { 325 // [rsp + imm32] 326 // [10 reg 100][00 100 100] disp32 327 emit_byte(0x84 | regenc); 328 emit_byte(0x24); 329 emit_data(disp, rspec, disp32_operand); 330 } 331 } else { 332 // [base + disp] 333 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode"); 334 if (disp == 0 && rtype == relocInfo::none && 335 base != rbp LP64_ONLY(&& base != r13)) { 336 // [base] 337 // [00 reg base] 338 emit_byte(0x00 | regenc | baseenc); 339 } else if (is8bit(disp) && rtype == relocInfo::none) { 340 // [base + disp8] 341 // [01 reg base] disp8 342 emit_byte(0x40 | regenc | baseenc); 343 emit_byte(disp & 0xFF); 344 } else { 345 // [base + disp32] 346 // [10 reg base] disp32 347 emit_byte(0x80 | regenc | baseenc); 348 emit_data(disp, rspec, disp32_operand); 349 } 350 } 351 } else { 352 if (index->is_valid()) { 353 assert(scale != Address::no_scale, "inconsistent address"); 354 // [index*scale + disp] 355 // [00 reg 100][ss index 101] disp32 356 assert(index != rsp, "illegal addressing mode"); 357 emit_byte(0x04 | regenc); 358 emit_byte(scale << 6 | indexenc | 0x05); 359 emit_data(disp, rspec, disp32_operand); 360 } else if (rtype != relocInfo::none ) { 361 // [disp] (64bit) RIP-RELATIVE (32bit) abs 362 // [00 000 101] disp32 363 364 emit_byte(0x05 | regenc); 365 // Note that the RIP-rel. correction applies to the generated 366 // disp field, but _not_ to the target address in the rspec. 367 368 // disp was created by converting the target address minus the pc 369 // at the start of the instruction. That needs more correction here. 370 // intptr_t disp = target - next_ip; 371 assert(inst_mark() != NULL, "must be inside InstructionMark"); 372 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction; 373 int64_t adjusted = disp; 374 // Do rip-rel adjustment for 64bit 375 LP64_ONLY(adjusted -= (next_ip - inst_mark())); 376 assert(is_simm32(adjusted), 377 "must be 32bit offset (RIP relative address)"); 378 emit_data((int32_t) adjusted, rspec, disp32_operand); 379 380 } else { 381 // 32bit never did this, did everything as the rip-rel/disp code above 382 // [disp] ABSOLUTE 383 // [00 reg 100][00 100 101] disp32 384 emit_byte(0x04 | regenc); 385 emit_byte(0x25); 386 emit_data(disp, rspec, disp32_operand); 387 } 388 } 389 } 390 391 void Assembler::emit_operand(XMMRegister reg, Register base, Register index, 392 Address::ScaleFactor scale, int disp, 393 RelocationHolder const& rspec) { 394 emit_operand((Register)reg, base, index, scale, disp, rspec); 395 } 396 397 // Secret local extension to Assembler::WhichOperand: 398 #define end_pc_operand (_WhichOperand_limit) 399 400 address Assembler::locate_operand(address inst, WhichOperand which) { 401 // Decode the given instruction, and return the address of 402 // an embedded 32-bit operand word. 403 404 // If "which" is disp32_operand, selects the displacement portion 405 // of an effective address specifier. 406 // If "which" is imm64_operand, selects the trailing immediate constant. 407 // If "which" is call32_operand, selects the displacement of a call or jump. 408 // Caller is responsible for ensuring that there is such an operand, 409 // and that it is 32/64 bits wide. 410 411 // If "which" is end_pc_operand, find the end of the instruction. 412 413 address ip = inst; 414 bool is_64bit = false; 415 416 debug_only(bool has_disp32 = false); 417 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn 418 419 again_after_prefix: 420 switch (0xFF & *ip++) { 421 422 // These convenience macros generate groups of "case" labels for the switch. 423 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3 424 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \ 425 case (x)+4: case (x)+5: case (x)+6: case (x)+7 426 #define REP16(x) REP8((x)+0): \ 427 case REP8((x)+8) 428 429 case CS_segment: 430 case SS_segment: 431 case DS_segment: 432 case ES_segment: 433 case FS_segment: 434 case GS_segment: 435 // Seems dubious 436 LP64_ONLY(assert(false, "shouldn't have that prefix")); 437 assert(ip == inst+1, "only one prefix allowed"); 438 goto again_after_prefix; 439 440 case 0x67: 441 case REX: 442 case REX_B: 443 case REX_X: 444 case REX_XB: 445 case REX_R: 446 case REX_RB: 447 case REX_RX: 448 case REX_RXB: 449 NOT_LP64(assert(false, "64bit prefixes")); 450 goto again_after_prefix; 451 452 case REX_W: 453 case REX_WB: 454 case REX_WX: 455 case REX_WXB: 456 case REX_WR: 457 case REX_WRB: 458 case REX_WRX: 459 case REX_WRXB: 460 NOT_LP64(assert(false, "64bit prefixes")); 461 is_64bit = true; 462 goto again_after_prefix; 463 464 case 0xFF: // pushq a; decl a; incl a; call a; jmp a 465 case 0x88: // movb a, r 466 case 0x89: // movl a, r 467 case 0x8A: // movb r, a 468 case 0x8B: // movl r, a 469 case 0x8F: // popl a 470 debug_only(has_disp32 = true); 471 break; 472 473 case 0x68: // pushq #32 474 if (which == end_pc_operand) { 475 return ip + 4; 476 } 477 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate"); 478 return ip; // not produced by emit_operand 479 480 case 0x66: // movw ... (size prefix) 481 again_after_size_prefix2: 482 switch (0xFF & *ip++) { 483 case REX: 484 case REX_B: 485 case REX_X: 486 case REX_XB: 487 case REX_R: 488 case REX_RB: 489 case REX_RX: 490 case REX_RXB: 491 case REX_W: 492 case REX_WB: 493 case REX_WX: 494 case REX_WXB: 495 case REX_WR: 496 case REX_WRB: 497 case REX_WRX: 498 case REX_WRXB: 499 NOT_LP64(assert(false, "64bit prefix found")); 500 goto again_after_size_prefix2; 501 case 0x8B: // movw r, a 502 case 0x89: // movw a, r 503 debug_only(has_disp32 = true); 504 break; 505 case 0xC7: // movw a, #16 506 debug_only(has_disp32 = true); 507 tail_size = 2; // the imm16 508 break; 509 case 0x0F: // several SSE/SSE2 variants 510 ip--; // reparse the 0x0F 511 goto again_after_prefix; 512 default: 513 ShouldNotReachHere(); 514 } 515 break; 516 517 case REP8(0xB8): // movl/q r, #32/#64(oop?) 518 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4); 519 // these asserts are somewhat nonsensical 520 #ifndef _LP64 521 assert(which == imm_operand || which == disp32_operand, ""); 522 #else 523 assert((which == call32_operand || which == imm_operand) && is_64bit || 524 which == narrow_oop_operand && !is_64bit, ""); 525 #endif // _LP64 526 return ip; 527 528 case 0x69: // imul r, a, #32 529 case 0xC7: // movl a, #32(oop?) 530 tail_size = 4; 531 debug_only(has_disp32 = true); // has both kinds of operands! 532 break; 533 534 case 0x0F: // movx..., etc. 535 switch (0xFF & *ip++) { 536 case 0x12: // movlps 537 case 0x28: // movaps 538 case 0x2E: // ucomiss 539 case 0x2F: // comiss 540 case 0x54: // andps 541 case 0x55: // andnps 542 case 0x56: // orps 543 case 0x57: // xorps 544 case 0x6E: // movd 545 case 0x7E: // movd 546 case 0xAE: // ldmxcsr a 547 // 64bit side says it these have both operands but that doesn't 548 // appear to be true 549 debug_only(has_disp32 = true); 550 break; 551 552 case 0xAD: // shrd r, a, %cl 553 case 0xAF: // imul r, a 554 case 0xBE: // movsbl r, a (movsxb) 555 case 0xBF: // movswl r, a (movsxw) 556 case 0xB6: // movzbl r, a (movzxb) 557 case 0xB7: // movzwl r, a (movzxw) 558 case REP16(0x40): // cmovl cc, r, a 559 case 0xB0: // cmpxchgb 560 case 0xB1: // cmpxchg 561 case 0xC1: // xaddl 562 case 0xC7: // cmpxchg8 563 case REP16(0x90): // setcc a 564 debug_only(has_disp32 = true); 565 // fall out of the switch to decode the address 566 break; 567 568 case 0xAC: // shrd r, a, #8 569 debug_only(has_disp32 = true); 570 tail_size = 1; // the imm8 571 break; 572 573 case REP16(0x80): // jcc rdisp32 574 if (which == end_pc_operand) return ip + 4; 575 assert(which == call32_operand, "jcc has no disp32 or imm"); 576 return ip; 577 default: 578 ShouldNotReachHere(); 579 } 580 break; 581 582 case 0x81: // addl a, #32; addl r, #32 583 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 584 // on 32bit in the case of cmpl, the imm might be an oop 585 tail_size = 4; 586 debug_only(has_disp32 = true); // has both kinds of operands! 587 break; 588 589 case 0x83: // addl a, #8; addl r, #8 590 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 591 debug_only(has_disp32 = true); // has both kinds of operands! 592 tail_size = 1; 593 break; 594 595 case 0x9B: 596 switch (0xFF & *ip++) { 597 case 0xD9: // fnstcw a 598 debug_only(has_disp32 = true); 599 break; 600 default: 601 ShouldNotReachHere(); 602 } 603 break; 604 605 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a 606 case REP4(0x10): // adc... 607 case REP4(0x20): // and... 608 case REP4(0x30): // xor... 609 case REP4(0x08): // or... 610 case REP4(0x18): // sbb... 611 case REP4(0x28): // sub... 612 case 0xF7: // mull a 613 case 0x8D: // lea r, a 614 case 0x87: // xchg r, a 615 case REP4(0x38): // cmp... 616 case 0x85: // test r, a 617 debug_only(has_disp32 = true); // has both kinds of operands! 618 break; 619 620 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8 621 case 0xC6: // movb a, #8 622 case 0x80: // cmpb a, #8 623 case 0x6B: // imul r, a, #8 624 debug_only(has_disp32 = true); // has both kinds of operands! 625 tail_size = 1; // the imm8 626 break; 627 628 case 0xE8: // call rdisp32 629 case 0xE9: // jmp rdisp32 630 if (which == end_pc_operand) return ip + 4; 631 assert(which == call32_operand, "call has no disp32 or imm"); 632 return ip; 633 634 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 635 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 636 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a 637 case 0xDD: // fld_d a; fst_d a; fstp_d a 638 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a 639 case 0xDF: // fild_d a; fistp_d a 640 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a 641 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a 642 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a 643 debug_only(has_disp32 = true); 644 break; 645 646 case 0xF0: // Lock 647 assert(os::is_MP(), "only on MP"); 648 goto again_after_prefix; 649 650 case 0xF3: // For SSE 651 case 0xF2: // For SSE2 652 switch (0xFF & *ip++) { 653 case REX: 654 case REX_B: 655 case REX_X: 656 case REX_XB: 657 case REX_R: 658 case REX_RB: 659 case REX_RX: 660 case REX_RXB: 661 case REX_W: 662 case REX_WB: 663 case REX_WX: 664 case REX_WXB: 665 case REX_WR: 666 case REX_WRB: 667 case REX_WRX: 668 case REX_WRXB: 669 NOT_LP64(assert(false, "found 64bit prefix")); 670 ip++; 671 default: 672 ip++; 673 } 674 debug_only(has_disp32 = true); // has both kinds of operands! 675 break; 676 677 default: 678 ShouldNotReachHere(); 679 680 #undef REP8 681 #undef REP16 682 } 683 684 assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); 685 #ifdef _LP64 686 assert(which != imm_operand, "instruction is not a movq reg, imm64"); 687 #else 688 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field"); 689 assert(which != imm_operand || has_disp32, "instruction has no imm32 field"); 690 #endif // LP64 691 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field"); 692 693 // parse the output of emit_operand 694 int op2 = 0xFF & *ip++; 695 int base = op2 & 0x07; 696 int op3 = -1; 697 const int b100 = 4; 698 const int b101 = 5; 699 if (base == b100 && (op2 >> 6) != 3) { 700 op3 = 0xFF & *ip++; 701 base = op3 & 0x07; // refetch the base 702 } 703 // now ip points at the disp (if any) 704 705 switch (op2 >> 6) { 706 case 0: 707 // [00 reg 100][ss index base] 708 // [00 reg 100][00 100 esp] 709 // [00 reg base] 710 // [00 reg 100][ss index 101][disp32] 711 // [00 reg 101] [disp32] 712 713 if (base == b101) { 714 if (which == disp32_operand) 715 return ip; // caller wants the disp32 716 ip += 4; // skip the disp32 717 } 718 break; 719 720 case 1: 721 // [01 reg 100][ss index base][disp8] 722 // [01 reg 100][00 100 esp][disp8] 723 // [01 reg base] [disp8] 724 ip += 1; // skip the disp8 725 break; 726 727 case 2: 728 // [10 reg 100][ss index base][disp32] 729 // [10 reg 100][00 100 esp][disp32] 730 // [10 reg base] [disp32] 731 if (which == disp32_operand) 732 return ip; // caller wants the disp32 733 ip += 4; // skip the disp32 734 break; 735 736 case 3: 737 // [11 reg base] (not a memory addressing mode) 738 break; 739 } 740 741 if (which == end_pc_operand) { 742 return ip + tail_size; 743 } 744 745 #ifdef _LP64 746 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32"); 747 #else 748 assert(which == imm_operand, "instruction has only an imm field"); 749 #endif // LP64 750 return ip; 751 } 752 753 address Assembler::locate_next_instruction(address inst) { 754 // Secretly share code with locate_operand: 755 return locate_operand(inst, end_pc_operand); 756 } 757 758 759 #ifdef ASSERT 760 void Assembler::check_relocation(RelocationHolder const& rspec, int format) { 761 address inst = inst_mark(); 762 assert(inst != NULL && inst < pc(), "must point to beginning of instruction"); 763 address opnd; 764 765 Relocation* r = rspec.reloc(); 766 if (r->type() == relocInfo::none) { 767 return; 768 } else if (r->is_call() || format == call32_operand) { 769 // assert(format == imm32_operand, "cannot specify a nonzero format"); 770 opnd = locate_operand(inst, call32_operand); 771 } else if (r->is_data()) { 772 assert(format == imm_operand || format == disp32_operand 773 LP64_ONLY(|| format == narrow_oop_operand), "format ok"); 774 opnd = locate_operand(inst, (WhichOperand)format); 775 } else { 776 assert(format == imm_operand, "cannot specify a format"); 777 return; 778 } 779 assert(opnd == pc(), "must put operand where relocs can find it"); 780 } 781 #endif // ASSERT 782 783 void Assembler::emit_operand32(Register reg, Address adr) { 784 assert(reg->encoding() < 8, "no extended registers"); 785 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 786 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 787 adr._rspec); 788 } 789 790 void Assembler::emit_operand(Register reg, Address adr, 791 int rip_relative_correction) { 792 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 793 adr._rspec, 794 rip_relative_correction); 795 } 796 797 void Assembler::emit_operand(XMMRegister reg, Address adr) { 798 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 799 adr._rspec); 800 } 801 802 // MMX operations 803 void Assembler::emit_operand(MMXRegister reg, Address adr) { 804 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 805 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 806 } 807 808 // work around gcc (3.2.1-7a) bug 809 void Assembler::emit_operand(Address adr, MMXRegister reg) { 810 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 811 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 812 } 813 814 815 void Assembler::emit_farith(int b1, int b2, int i) { 816 assert(isByte(b1) && isByte(b2), "wrong opcode"); 817 assert(0 <= i && i < 8, "illegal stack offset"); 818 emit_byte(b1); 819 emit_byte(b2 + i); 820 } 821 822 823 // Now the Assembler instructions (identical for 32/64 bits) 824 825 void Assembler::adcl(Address dst, int32_t imm32) { 826 InstructionMark im(this); 827 prefix(dst); 828 emit_arith_operand(0x81, rdx, dst, imm32); 829 } 830 831 void Assembler::adcl(Address dst, Register src) { 832 InstructionMark im(this); 833 prefix(dst, src); 834 emit_byte(0x11); 835 emit_operand(src, dst); 836 } 837 838 void Assembler::adcl(Register dst, int32_t imm32) { 839 prefix(dst); 840 emit_arith(0x81, 0xD0, dst, imm32); 841 } 842 843 void Assembler::adcl(Register dst, Address src) { 844 InstructionMark im(this); 845 prefix(src, dst); 846 emit_byte(0x13); 847 emit_operand(dst, src); 848 } 849 850 void Assembler::adcl(Register dst, Register src) { 851 (void) prefix_and_encode(dst->encoding(), src->encoding()); 852 emit_arith(0x13, 0xC0, dst, src); 853 } 854 855 void Assembler::addl(Address dst, int32_t imm32) { 856 InstructionMark im(this); 857 prefix(dst); 858 emit_arith_operand(0x81, rax, dst, imm32); 859 } 860 861 void Assembler::addl(Address dst, Register src) { 862 InstructionMark im(this); 863 prefix(dst, src); 864 emit_byte(0x01); 865 emit_operand(src, dst); 866 } 867 868 void Assembler::addl(Register dst, int32_t imm32) { 869 prefix(dst); 870 emit_arith(0x81, 0xC0, dst, imm32); 871 } 872 873 void Assembler::addl(Register dst, Address src) { 874 InstructionMark im(this); 875 prefix(src, dst); 876 emit_byte(0x03); 877 emit_operand(dst, src); 878 } 879 880 void Assembler::addl(Register dst, Register src) { 881 (void) prefix_and_encode(dst->encoding(), src->encoding()); 882 emit_arith(0x03, 0xC0, dst, src); 883 } 884 885 void Assembler::addr_nop_4() { 886 // 4 bytes: NOP DWORD PTR [EAX+0] 887 emit_byte(0x0F); 888 emit_byte(0x1F); 889 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 890 emit_byte(0); // 8-bits offset (1 byte) 891 } 892 893 void Assembler::addr_nop_5() { 894 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 895 emit_byte(0x0F); 896 emit_byte(0x1F); 897 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 898 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 899 emit_byte(0); // 8-bits offset (1 byte) 900 } 901 902 void Assembler::addr_nop_7() { 903 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 904 emit_byte(0x0F); 905 emit_byte(0x1F); 906 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 907 emit_long(0); // 32-bits offset (4 bytes) 908 } 909 910 void Assembler::addr_nop_8() { 911 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 912 emit_byte(0x0F); 913 emit_byte(0x1F); 914 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 915 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 916 emit_long(0); // 32-bits offset (4 bytes) 917 } 918 919 void Assembler::addsd(XMMRegister dst, XMMRegister src) { 920 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 921 emit_byte(0xF2); 922 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 923 emit_byte(0x0F); 924 emit_byte(0x58); 925 emit_byte(0xC0 | encode); 926 } 927 928 void Assembler::addsd(XMMRegister dst, Address src) { 929 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 930 InstructionMark im(this); 931 emit_byte(0xF2); 932 prefix(src, dst); 933 emit_byte(0x0F); 934 emit_byte(0x58); 935 emit_operand(dst, src); 936 } 937 938 void Assembler::addss(XMMRegister dst, XMMRegister src) { 939 NOT_LP64(assert(VM_Version::supports_sse(), "")); 940 emit_byte(0xF3); 941 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 942 emit_byte(0x0F); 943 emit_byte(0x58); 944 emit_byte(0xC0 | encode); 945 } 946 947 void Assembler::addss(XMMRegister dst, Address src) { 948 NOT_LP64(assert(VM_Version::supports_sse(), "")); 949 InstructionMark im(this); 950 emit_byte(0xF3); 951 prefix(src, dst); 952 emit_byte(0x0F); 953 emit_byte(0x58); 954 emit_operand(dst, src); 955 } 956 957 void Assembler::andl(Register dst, int32_t imm32) { 958 prefix(dst); 959 emit_arith(0x81, 0xE0, dst, imm32); 960 } 961 962 void Assembler::andl(Register dst, Address src) { 963 InstructionMark im(this); 964 prefix(src, dst); 965 emit_byte(0x23); 966 emit_operand(dst, src); 967 } 968 969 void Assembler::andl(Register dst, Register src) { 970 (void) prefix_and_encode(dst->encoding(), src->encoding()); 971 emit_arith(0x23, 0xC0, dst, src); 972 } 973 974 void Assembler::andpd(XMMRegister dst, Address src) { 975 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 976 InstructionMark im(this); 977 emit_byte(0x66); 978 prefix(src, dst); 979 emit_byte(0x0F); 980 emit_byte(0x54); 981 emit_operand(dst, src); 982 } 983 984 void Assembler::bsfl(Register dst, Register src) { 985 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 986 emit_byte(0x0F); 987 emit_byte(0xBC); 988 emit_byte(0xC0 | encode); 989 } 990 991 void Assembler::bsrl(Register dst, Register src) { 992 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 993 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 994 emit_byte(0x0F); 995 emit_byte(0xBD); 996 emit_byte(0xC0 | encode); 997 } 998 999 void Assembler::bswapl(Register reg) { // bswap 1000 int encode = prefix_and_encode(reg->encoding()); 1001 emit_byte(0x0F); 1002 emit_byte(0xC8 | encode); 1003 } 1004 1005 void Assembler::call(Label& L, relocInfo::relocType rtype) { 1006 // suspect disp32 is always good 1007 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); 1008 1009 if (L.is_bound()) { 1010 const int long_size = 5; 1011 int offs = (int)( target(L) - pc() ); 1012 assert(offs <= 0, "assembler error"); 1013 InstructionMark im(this); 1014 // 1110 1000 #32-bit disp 1015 emit_byte(0xE8); 1016 emit_data(offs - long_size, rtype, operand); 1017 } else { 1018 InstructionMark im(this); 1019 // 1110 1000 #32-bit disp 1020 L.add_patch_at(code(), locator()); 1021 1022 emit_byte(0xE8); 1023 emit_data(int(0), rtype, operand); 1024 } 1025 } 1026 1027 void Assembler::call(Register dst) { 1028 // This was originally using a 32bit register encoding 1029 // and surely we want 64bit! 1030 // this is a 32bit encoding but in 64bit mode the default 1031 // operand size is 64bit so there is no need for the 1032 // wide prefix. So prefix only happens if we use the 1033 // new registers. Much like push/pop. 1034 int x = offset(); 1035 // this may be true but dbx disassembles it as if it 1036 // were 32bits... 1037 // int encode = prefix_and_encode(dst->encoding()); 1038 // if (offset() != x) assert(dst->encoding() >= 8, "what?"); 1039 int encode = prefixq_and_encode(dst->encoding()); 1040 1041 emit_byte(0xFF); 1042 emit_byte(0xD0 | encode); 1043 } 1044 1045 1046 void Assembler::call(Address adr) { 1047 InstructionMark im(this); 1048 prefix(adr); 1049 emit_byte(0xFF); 1050 emit_operand(rdx, adr); 1051 } 1052 1053 void Assembler::call_literal(address entry, RelocationHolder const& rspec) { 1054 assert(entry != NULL, "call most probably wrong"); 1055 InstructionMark im(this); 1056 emit_byte(0xE8); 1057 intptr_t disp = entry - (_code_pos + sizeof(int32_t)); 1058 assert(is_simm32(disp), "must be 32bit offset (call2)"); 1059 // Technically, should use call32_operand, but this format is 1060 // implied by the fact that we're emitting a call instruction. 1061 1062 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand); 1063 emit_data((int) disp, rspec, operand); 1064 } 1065 1066 void Assembler::cdql() { 1067 emit_byte(0x99); 1068 } 1069 1070 void Assembler::cmovl(Condition cc, Register dst, Register src) { 1071 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1072 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1073 emit_byte(0x0F); 1074 emit_byte(0x40 | cc); 1075 emit_byte(0xC0 | encode); 1076 } 1077 1078 1079 void Assembler::cmovl(Condition cc, Register dst, Address src) { 1080 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1081 prefix(src, dst); 1082 emit_byte(0x0F); 1083 emit_byte(0x40 | cc); 1084 emit_operand(dst, src); 1085 } 1086 1087 void Assembler::cmpb(Address dst, int imm8) { 1088 InstructionMark im(this); 1089 prefix(dst); 1090 emit_byte(0x80); 1091 emit_operand(rdi, dst, 1); 1092 emit_byte(imm8); 1093 } 1094 1095 void Assembler::cmpl(Address dst, int32_t imm32) { 1096 InstructionMark im(this); 1097 prefix(dst); 1098 emit_byte(0x81); 1099 emit_operand(rdi, dst, 4); 1100 emit_long(imm32); 1101 } 1102 1103 void Assembler::cmpl(Register dst, int32_t imm32) { 1104 prefix(dst); 1105 emit_arith(0x81, 0xF8, dst, imm32); 1106 } 1107 1108 void Assembler::cmpl(Register dst, Register src) { 1109 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1110 emit_arith(0x3B, 0xC0, dst, src); 1111 } 1112 1113 1114 void Assembler::cmpl(Register dst, Address src) { 1115 InstructionMark im(this); 1116 prefix(src, dst); 1117 emit_byte(0x3B); 1118 emit_operand(dst, src); 1119 } 1120 1121 void Assembler::cmpw(Address dst, int imm16) { 1122 InstructionMark im(this); 1123 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers"); 1124 emit_byte(0x66); 1125 emit_byte(0x81); 1126 emit_operand(rdi, dst, 2); 1127 emit_word(imm16); 1128 } 1129 1130 // The 32-bit cmpxchg compares the value at adr with the contents of rax, 1131 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1132 // The ZF is set if the compared values were equal, and cleared otherwise. 1133 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg 1134 if (Atomics & 2) { 1135 // caveat: no instructionmark, so this isn't relocatable. 1136 // Emit a synthetic, non-atomic, CAS equivalent. 1137 // Beware. The synthetic form sets all ICCs, not just ZF. 1138 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r) 1139 cmpl(rax, adr); 1140 movl(rax, adr); 1141 if (reg != rax) { 1142 Label L ; 1143 jcc(Assembler::notEqual, L); 1144 movl(adr, reg); 1145 bind(L); 1146 } 1147 } else { 1148 InstructionMark im(this); 1149 prefix(adr, reg); 1150 emit_byte(0x0F); 1151 emit_byte(0xB1); 1152 emit_operand(reg, adr); 1153 } 1154 } 1155 1156 void Assembler::comisd(XMMRegister dst, Address src) { 1157 // NOTE: dbx seems to decode this as comiss even though the 1158 // 0x66 is there. Strangly ucomisd comes out correct 1159 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1160 emit_byte(0x66); 1161 comiss(dst, src); 1162 } 1163 1164 void Assembler::comiss(XMMRegister dst, Address src) { 1165 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1166 1167 InstructionMark im(this); 1168 prefix(src, dst); 1169 emit_byte(0x0F); 1170 emit_byte(0x2F); 1171 emit_operand(dst, src); 1172 } 1173 1174 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 1175 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1176 emit_byte(0xF3); 1177 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1178 emit_byte(0x0F); 1179 emit_byte(0xE6); 1180 emit_byte(0xC0 | encode); 1181 } 1182 1183 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 1184 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1185 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1186 emit_byte(0x0F); 1187 emit_byte(0x5B); 1188 emit_byte(0xC0 | encode); 1189 } 1190 1191 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 1192 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1193 emit_byte(0xF2); 1194 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1195 emit_byte(0x0F); 1196 emit_byte(0x5A); 1197 emit_byte(0xC0 | encode); 1198 } 1199 1200 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { 1201 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1202 emit_byte(0xF2); 1203 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1204 emit_byte(0x0F); 1205 emit_byte(0x2A); 1206 emit_byte(0xC0 | encode); 1207 } 1208 1209 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { 1210 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1211 emit_byte(0xF3); 1212 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1213 emit_byte(0x0F); 1214 emit_byte(0x2A); 1215 emit_byte(0xC0 | encode); 1216 } 1217 1218 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { 1219 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1220 emit_byte(0xF3); 1221 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1222 emit_byte(0x0F); 1223 emit_byte(0x5A); 1224 emit_byte(0xC0 | encode); 1225 } 1226 1227 void Assembler::cvttsd2sil(Register dst, XMMRegister src) { 1228 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1229 emit_byte(0xF2); 1230 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1231 emit_byte(0x0F); 1232 emit_byte(0x2C); 1233 emit_byte(0xC0 | encode); 1234 } 1235 1236 void Assembler::cvttss2sil(Register dst, XMMRegister src) { 1237 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1238 emit_byte(0xF3); 1239 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1240 emit_byte(0x0F); 1241 emit_byte(0x2C); 1242 emit_byte(0xC0 | encode); 1243 } 1244 1245 void Assembler::decl(Address dst) { 1246 // Don't use it directly. Use MacroAssembler::decrement() instead. 1247 InstructionMark im(this); 1248 prefix(dst); 1249 emit_byte(0xFF); 1250 emit_operand(rcx, dst); 1251 } 1252 1253 void Assembler::divsd(XMMRegister dst, Address src) { 1254 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1255 InstructionMark im(this); 1256 emit_byte(0xF2); 1257 prefix(src, dst); 1258 emit_byte(0x0F); 1259 emit_byte(0x5E); 1260 emit_operand(dst, src); 1261 } 1262 1263 void Assembler::divsd(XMMRegister dst, XMMRegister src) { 1264 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1265 emit_byte(0xF2); 1266 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1267 emit_byte(0x0F); 1268 emit_byte(0x5E); 1269 emit_byte(0xC0 | encode); 1270 } 1271 1272 void Assembler::divss(XMMRegister dst, Address src) { 1273 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1274 InstructionMark im(this); 1275 emit_byte(0xF3); 1276 prefix(src, dst); 1277 emit_byte(0x0F); 1278 emit_byte(0x5E); 1279 emit_operand(dst, src); 1280 } 1281 1282 void Assembler::divss(XMMRegister dst, XMMRegister src) { 1283 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1284 emit_byte(0xF3); 1285 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1286 emit_byte(0x0F); 1287 emit_byte(0x5E); 1288 emit_byte(0xC0 | encode); 1289 } 1290 1291 void Assembler::emms() { 1292 NOT_LP64(assert(VM_Version::supports_mmx(), "")); 1293 emit_byte(0x0F); 1294 emit_byte(0x77); 1295 } 1296 1297 void Assembler::hlt() { 1298 emit_byte(0xF4); 1299 } 1300 1301 void Assembler::idivl(Register src) { 1302 int encode = prefix_and_encode(src->encoding()); 1303 emit_byte(0xF7); 1304 emit_byte(0xF8 | encode); 1305 } 1306 1307 void Assembler::divl(Register src) { // Unsigned 1308 int encode = prefix_and_encode(src->encoding()); 1309 emit_byte(0xF7); 1310 emit_byte(0xF0 | encode); 1311 } 1312 1313 void Assembler::imull(Register dst, Register src) { 1314 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1315 emit_byte(0x0F); 1316 emit_byte(0xAF); 1317 emit_byte(0xC0 | encode); 1318 } 1319 1320 1321 void Assembler::imull(Register dst, Register src, int value) { 1322 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1323 if (is8bit(value)) { 1324 emit_byte(0x6B); 1325 emit_byte(0xC0 | encode); 1326 emit_byte(value & 0xFF); 1327 } else { 1328 emit_byte(0x69); 1329 emit_byte(0xC0 | encode); 1330 emit_long(value); 1331 } 1332 } 1333 1334 void Assembler::incl(Address dst) { 1335 // Don't use it directly. Use MacroAssembler::increment() instead. 1336 InstructionMark im(this); 1337 prefix(dst); 1338 emit_byte(0xFF); 1339 emit_operand(rax, dst); 1340 } 1341 1342 void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) { 1343 InstructionMark im(this); 1344 relocate(rtype); 1345 assert((0 <= cc) && (cc < 16), "illegal cc"); 1346 if (L.is_bound()) { 1347 address dst = target(L); 1348 assert(dst != NULL, "jcc most probably wrong"); 1349 1350 const int short_size = 2; 1351 const int long_size = 6; 1352 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos; 1353 if (rtype == relocInfo::none && is8bit(offs - short_size)) { 1354 // 0111 tttn #8-bit disp 1355 emit_byte(0x70 | cc); 1356 emit_byte((offs - short_size) & 0xFF); 1357 } else { 1358 // 0000 1111 1000 tttn #32-bit disp 1359 assert(is_simm32(offs - long_size), 1360 "must be 32bit offset (call4)"); 1361 emit_byte(0x0F); 1362 emit_byte(0x80 | cc); 1363 emit_long(offs - long_size); 1364 } 1365 } else { 1366 // Note: could eliminate cond. jumps to this jump if condition 1367 // is the same however, seems to be rather unlikely case. 1368 // Note: use jccb() if label to be bound is very close to get 1369 // an 8-bit displacement 1370 L.add_patch_at(code(), locator()); 1371 emit_byte(0x0F); 1372 emit_byte(0x80 | cc); 1373 emit_long(0); 1374 } 1375 } 1376 1377 void Assembler::jccb(Condition cc, Label& L) { 1378 if (L.is_bound()) { 1379 const int short_size = 2; 1380 address entry = target(L); 1381 assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)), 1382 "Dispacement too large for a short jmp"); 1383 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; 1384 // 0111 tttn #8-bit disp 1385 emit_byte(0x70 | cc); 1386 emit_byte((offs - short_size) & 0xFF); 1387 } else { 1388 InstructionMark im(this); 1389 L.add_patch_at(code(), locator()); 1390 emit_byte(0x70 | cc); 1391 emit_byte(0); 1392 } 1393 } 1394 1395 void Assembler::jmp(Address adr) { 1396 InstructionMark im(this); 1397 prefix(adr); 1398 emit_byte(0xFF); 1399 emit_operand(rsp, adr); 1400 } 1401 1402 void Assembler::jmp(Label& L, relocInfo::relocType rtype) { 1403 if (L.is_bound()) { 1404 address entry = target(L); 1405 assert(entry != NULL, "jmp most probably wrong"); 1406 InstructionMark im(this); 1407 const int short_size = 2; 1408 const int long_size = 5; 1409 intptr_t offs = entry - _code_pos; 1410 if (rtype == relocInfo::none && is8bit(offs - short_size)) { 1411 emit_byte(0xEB); 1412 emit_byte((offs - short_size) & 0xFF); 1413 } else { 1414 emit_byte(0xE9); 1415 emit_long(offs - long_size); 1416 } 1417 } else { 1418 // By default, forward jumps are always 32-bit displacements, since 1419 // we can't yet know where the label will be bound. If you're sure that 1420 // the forward jump will not run beyond 256 bytes, use jmpb to 1421 // force an 8-bit displacement. 1422 InstructionMark im(this); 1423 relocate(rtype); 1424 L.add_patch_at(code(), locator()); 1425 emit_byte(0xE9); 1426 emit_long(0); 1427 } 1428 } 1429 1430 void Assembler::jmp(Register entry) { 1431 int encode = prefix_and_encode(entry->encoding()); 1432 emit_byte(0xFF); 1433 emit_byte(0xE0 | encode); 1434 } 1435 1436 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { 1437 InstructionMark im(this); 1438 emit_byte(0xE9); 1439 assert(dest != NULL, "must have a target"); 1440 intptr_t disp = dest - (_code_pos + sizeof(int32_t)); 1441 assert(is_simm32(disp), "must be 32bit offset (jmp)"); 1442 emit_data(disp, rspec.reloc(), call32_operand); 1443 } 1444 1445 void Assembler::jmpb(Label& L) { 1446 if (L.is_bound()) { 1447 const int short_size = 2; 1448 address entry = target(L); 1449 assert(is8bit((entry - _code_pos) + short_size), 1450 "Dispacement too large for a short jmp"); 1451 assert(entry != NULL, "jmp most probably wrong"); 1452 intptr_t offs = entry - _code_pos; 1453 emit_byte(0xEB); 1454 emit_byte((offs - short_size) & 0xFF); 1455 } else { 1456 InstructionMark im(this); 1457 L.add_patch_at(code(), locator()); 1458 emit_byte(0xEB); 1459 emit_byte(0); 1460 } 1461 } 1462 1463 void Assembler::ldmxcsr( Address src) { 1464 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1465 InstructionMark im(this); 1466 prefix(src); 1467 emit_byte(0x0F); 1468 emit_byte(0xAE); 1469 emit_operand(as_Register(2), src); 1470 } 1471 1472 void Assembler::leal(Register dst, Address src) { 1473 InstructionMark im(this); 1474 #ifdef _LP64 1475 emit_byte(0x67); // addr32 1476 prefix(src, dst); 1477 #endif // LP64 1478 emit_byte(0x8D); 1479 emit_operand(dst, src); 1480 } 1481 1482 void Assembler::lock() { 1483 if (Atomics & 1) { 1484 // Emit either nothing, a NOP, or a NOP: prefix 1485 emit_byte(0x90) ; 1486 } else { 1487 emit_byte(0xF0); 1488 } 1489 } 1490 1491 void Assembler::lzcntl(Register dst, Register src) { 1492 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 1493 emit_byte(0xF3); 1494 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1495 emit_byte(0x0F); 1496 emit_byte(0xBD); 1497 emit_byte(0xC0 | encode); 1498 } 1499 1500 // Emit mfence instruction 1501 void Assembler::mfence() { 1502 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) 1503 emit_byte( 0x0F ); 1504 emit_byte( 0xAE ); 1505 emit_byte( 0xF0 ); 1506 } 1507 1508 void Assembler::mov(Register dst, Register src) { 1509 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 1510 } 1511 1512 void Assembler::movapd(XMMRegister dst, XMMRegister src) { 1513 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1514 int dstenc = dst->encoding(); 1515 int srcenc = src->encoding(); 1516 emit_byte(0x66); 1517 if (dstenc < 8) { 1518 if (srcenc >= 8) { 1519 prefix(REX_B); 1520 srcenc -= 8; 1521 } 1522 } else { 1523 if (srcenc < 8) { 1524 prefix(REX_R); 1525 } else { 1526 prefix(REX_RB); 1527 srcenc -= 8; 1528 } 1529 dstenc -= 8; 1530 } 1531 emit_byte(0x0F); 1532 emit_byte(0x28); 1533 emit_byte(0xC0 | dstenc << 3 | srcenc); 1534 } 1535 1536 void Assembler::movaps(XMMRegister dst, XMMRegister src) { 1537 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1538 int dstenc = dst->encoding(); 1539 int srcenc = src->encoding(); 1540 if (dstenc < 8) { 1541 if (srcenc >= 8) { 1542 prefix(REX_B); 1543 srcenc -= 8; 1544 } 1545 } else { 1546 if (srcenc < 8) { 1547 prefix(REX_R); 1548 } else { 1549 prefix(REX_RB); 1550 srcenc -= 8; 1551 } 1552 dstenc -= 8; 1553 } 1554 emit_byte(0x0F); 1555 emit_byte(0x28); 1556 emit_byte(0xC0 | dstenc << 3 | srcenc); 1557 } 1558 1559 void Assembler::movb(Register dst, Address src) { 1560 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 1561 InstructionMark im(this); 1562 prefix(src, dst, true); 1563 emit_byte(0x8A); 1564 emit_operand(dst, src); 1565 } 1566 1567 1568 void Assembler::movb(Address dst, int imm8) { 1569 InstructionMark im(this); 1570 prefix(dst); 1571 emit_byte(0xC6); 1572 emit_operand(rax, dst, 1); 1573 emit_byte(imm8); 1574 } 1575 1576 1577 void Assembler::movb(Address dst, Register src) { 1578 assert(src->has_byte_register(), "must have byte register"); 1579 InstructionMark im(this); 1580 prefix(dst, src, true); 1581 emit_byte(0x88); 1582 emit_operand(src, dst); 1583 } 1584 1585 void Assembler::movdl(XMMRegister dst, Register src) { 1586 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1587 emit_byte(0x66); 1588 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1589 emit_byte(0x0F); 1590 emit_byte(0x6E); 1591 emit_byte(0xC0 | encode); 1592 } 1593 1594 void Assembler::movdl(Register dst, XMMRegister src) { 1595 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1596 emit_byte(0x66); 1597 // swap src/dst to get correct prefix 1598 int encode = prefix_and_encode(src->encoding(), dst->encoding()); 1599 emit_byte(0x0F); 1600 emit_byte(0x7E); 1601 emit_byte(0xC0 | encode); 1602 } 1603 1604 void Assembler::movdl(XMMRegister dst, Address src) { 1605 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1606 InstructionMark im(this); 1607 emit_byte(0x66); 1608 prefix(src, dst); 1609 emit_byte(0x0F); 1610 emit_byte(0x6E); 1611 emit_operand(dst, src); 1612 } 1613 1614 1615 void Assembler::movdqa(XMMRegister dst, Address src) { 1616 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1617 InstructionMark im(this); 1618 emit_byte(0x66); 1619 prefix(src, dst); 1620 emit_byte(0x0F); 1621 emit_byte(0x6F); 1622 emit_operand(dst, src); 1623 } 1624 1625 void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 1626 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1627 emit_byte(0x66); 1628 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 1629 emit_byte(0x0F); 1630 emit_byte(0x6F); 1631 emit_byte(0xC0 | encode); 1632 } 1633 1634 void Assembler::movdqa(Address dst, XMMRegister src) { 1635 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1636 InstructionMark im(this); 1637 emit_byte(0x66); 1638 prefix(dst, src); 1639 emit_byte(0x0F); 1640 emit_byte(0x7F); 1641 emit_operand(src, dst); 1642 } 1643 1644 void Assembler::movdqu(XMMRegister dst, Address src) { 1645 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1646 InstructionMark im(this); 1647 emit_byte(0xF3); 1648 prefix(src, dst); 1649 emit_byte(0x0F); 1650 emit_byte(0x6F); 1651 emit_operand(dst, src); 1652 } 1653 1654 void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 1655 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1656 emit_byte(0xF3); 1657 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 1658 emit_byte(0x0F); 1659 emit_byte(0x6F); 1660 emit_byte(0xC0 | encode); 1661 } 1662 1663 void Assembler::movdqu(Address dst, XMMRegister src) { 1664 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1665 InstructionMark im(this); 1666 emit_byte(0xF3); 1667 prefix(dst, src); 1668 emit_byte(0x0F); 1669 emit_byte(0x7F); 1670 emit_operand(src, dst); 1671 } 1672 1673 // Uses zero extension on 64bit 1674 1675 void Assembler::movl(Register dst, int32_t imm32) { 1676 int encode = prefix_and_encode(dst->encoding()); 1677 emit_byte(0xB8 | encode); 1678 emit_long(imm32); 1679 } 1680 1681 void Assembler::movl(Register dst, Register src) { 1682 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1683 emit_byte(0x8B); 1684 emit_byte(0xC0 | encode); 1685 } 1686 1687 void Assembler::movl(Register dst, Address src) { 1688 InstructionMark im(this); 1689 prefix(src, dst); 1690 emit_byte(0x8B); 1691 emit_operand(dst, src); 1692 } 1693 1694 void Assembler::movl(Address dst, int32_t imm32) { 1695 InstructionMark im(this); 1696 prefix(dst); 1697 emit_byte(0xC7); 1698 emit_operand(rax, dst, 4); 1699 emit_long(imm32); 1700 } 1701 1702 void Assembler::movl(Address dst, Register src) { 1703 InstructionMark im(this); 1704 prefix(dst, src); 1705 emit_byte(0x89); 1706 emit_operand(src, dst); 1707 } 1708 1709 // New cpus require to use movsd and movss to avoid partial register stall 1710 // when loading from memory. But for old Opteron use movlpd instead of movsd. 1711 // The selection is done in MacroAssembler::movdbl() and movflt(). 1712 void Assembler::movlpd(XMMRegister dst, Address src) { 1713 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1714 InstructionMark im(this); 1715 emit_byte(0x66); 1716 prefix(src, dst); 1717 emit_byte(0x0F); 1718 emit_byte(0x12); 1719 emit_operand(dst, src); 1720 } 1721 1722 void Assembler::movq( MMXRegister dst, Address src ) { 1723 assert( VM_Version::supports_mmx(), "" ); 1724 emit_byte(0x0F); 1725 emit_byte(0x6F); 1726 emit_operand(dst, src); 1727 } 1728 1729 void Assembler::movq( Address dst, MMXRegister src ) { 1730 assert( VM_Version::supports_mmx(), "" ); 1731 emit_byte(0x0F); 1732 emit_byte(0x7F); 1733 // workaround gcc (3.2.1-7a) bug 1734 // In that version of gcc with only an emit_operand(MMX, Address) 1735 // gcc will tail jump and try and reverse the parameters completely 1736 // obliterating dst in the process. By having a version available 1737 // that doesn't need to swap the args at the tail jump the bug is 1738 // avoided. 1739 emit_operand(dst, src); 1740 } 1741 1742 void Assembler::movq(XMMRegister dst, Address src) { 1743 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1744 InstructionMark im(this); 1745 emit_byte(0xF3); 1746 prefix(src, dst); 1747 emit_byte(0x0F); 1748 emit_byte(0x7E); 1749 emit_operand(dst, src); 1750 } 1751 1752 void Assembler::movq(Address dst, XMMRegister src) { 1753 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1754 InstructionMark im(this); 1755 emit_byte(0x66); 1756 prefix(dst, src); 1757 emit_byte(0x0F); 1758 emit_byte(0xD6); 1759 emit_operand(src, dst); 1760 } 1761 1762 void Assembler::movsbl(Register dst, Address src) { // movsxb 1763 InstructionMark im(this); 1764 prefix(src, dst); 1765 emit_byte(0x0F); 1766 emit_byte(0xBE); 1767 emit_operand(dst, src); 1768 } 1769 1770 void Assembler::movsbl(Register dst, Register src) { // movsxb 1771 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1772 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1773 emit_byte(0x0F); 1774 emit_byte(0xBE); 1775 emit_byte(0xC0 | encode); 1776 } 1777 1778 void Assembler::movsd(XMMRegister dst, XMMRegister src) { 1779 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1780 emit_byte(0xF2); 1781 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1782 emit_byte(0x0F); 1783 emit_byte(0x10); 1784 emit_byte(0xC0 | encode); 1785 } 1786 1787 void Assembler::movsd(XMMRegister dst, Address src) { 1788 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1789 InstructionMark im(this); 1790 emit_byte(0xF2); 1791 prefix(src, dst); 1792 emit_byte(0x0F); 1793 emit_byte(0x10); 1794 emit_operand(dst, src); 1795 } 1796 1797 void Assembler::movsd(Address dst, XMMRegister src) { 1798 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1799 InstructionMark im(this); 1800 emit_byte(0xF2); 1801 prefix(dst, src); 1802 emit_byte(0x0F); 1803 emit_byte(0x11); 1804 emit_operand(src, dst); 1805 } 1806 1807 void Assembler::movss(XMMRegister dst, XMMRegister src) { 1808 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1809 emit_byte(0xF3); 1810 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1811 emit_byte(0x0F); 1812 emit_byte(0x10); 1813 emit_byte(0xC0 | encode); 1814 } 1815 1816 void Assembler::movss(XMMRegister dst, Address src) { 1817 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1818 InstructionMark im(this); 1819 emit_byte(0xF3); 1820 prefix(src, dst); 1821 emit_byte(0x0F); 1822 emit_byte(0x10); 1823 emit_operand(dst, src); 1824 } 1825 1826 void Assembler::movss(Address dst, XMMRegister src) { 1827 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1828 InstructionMark im(this); 1829 emit_byte(0xF3); 1830 prefix(dst, src); 1831 emit_byte(0x0F); 1832 emit_byte(0x11); 1833 emit_operand(src, dst); 1834 } 1835 1836 void Assembler::movswl(Register dst, Address src) { // movsxw 1837 InstructionMark im(this); 1838 prefix(src, dst); 1839 emit_byte(0x0F); 1840 emit_byte(0xBF); 1841 emit_operand(dst, src); 1842 } 1843 1844 void Assembler::movswl(Register dst, Register src) { // movsxw 1845 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1846 emit_byte(0x0F); 1847 emit_byte(0xBF); 1848 emit_byte(0xC0 | encode); 1849 } 1850 1851 void Assembler::movw(Address dst, int imm16) { 1852 InstructionMark im(this); 1853 1854 emit_byte(0x66); // switch to 16-bit mode 1855 prefix(dst); 1856 emit_byte(0xC7); 1857 emit_operand(rax, dst, 2); 1858 emit_word(imm16); 1859 } 1860 1861 void Assembler::movw(Register dst, Address src) { 1862 InstructionMark im(this); 1863 emit_byte(0x66); 1864 prefix(src, dst); 1865 emit_byte(0x8B); 1866 emit_operand(dst, src); 1867 } 1868 1869 void Assembler::movw(Address dst, Register src) { 1870 InstructionMark im(this); 1871 emit_byte(0x66); 1872 prefix(dst, src); 1873 emit_byte(0x89); 1874 emit_operand(src, dst); 1875 } 1876 1877 void Assembler::movzbl(Register dst, Address src) { // movzxb 1878 InstructionMark im(this); 1879 prefix(src, dst); 1880 emit_byte(0x0F); 1881 emit_byte(0xB6); 1882 emit_operand(dst, src); 1883 } 1884 1885 void Assembler::movzbl(Register dst, Register src) { // movzxb 1886 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1887 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1888 emit_byte(0x0F); 1889 emit_byte(0xB6); 1890 emit_byte(0xC0 | encode); 1891 } 1892 1893 void Assembler::movzwl(Register dst, Address src) { // movzxw 1894 InstructionMark im(this); 1895 prefix(src, dst); 1896 emit_byte(0x0F); 1897 emit_byte(0xB7); 1898 emit_operand(dst, src); 1899 } 1900 1901 void Assembler::movzwl(Register dst, Register src) { // movzxw 1902 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1903 emit_byte(0x0F); 1904 emit_byte(0xB7); 1905 emit_byte(0xC0 | encode); 1906 } 1907 1908 void Assembler::mull(Address src) { 1909 InstructionMark im(this); 1910 prefix(src); 1911 emit_byte(0xF7); 1912 emit_operand(rsp, src); 1913 } 1914 1915 void Assembler::mull(Register src) { 1916 int encode = prefix_and_encode(src->encoding()); 1917 emit_byte(0xF7); 1918 emit_byte(0xE0 | encode); 1919 } 1920 1921 void Assembler::mulsd(XMMRegister dst, Address src) { 1922 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1923 InstructionMark im(this); 1924 emit_byte(0xF2); 1925 prefix(src, dst); 1926 emit_byte(0x0F); 1927 emit_byte(0x59); 1928 emit_operand(dst, src); 1929 } 1930 1931 void Assembler::mulsd(XMMRegister dst, XMMRegister src) { 1932 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1933 emit_byte(0xF2); 1934 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1935 emit_byte(0x0F); 1936 emit_byte(0x59); 1937 emit_byte(0xC0 | encode); 1938 } 1939 1940 void Assembler::mulss(XMMRegister dst, Address src) { 1941 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1942 InstructionMark im(this); 1943 emit_byte(0xF3); 1944 prefix(src, dst); 1945 emit_byte(0x0F); 1946 emit_byte(0x59); 1947 emit_operand(dst, src); 1948 } 1949 1950 void Assembler::mulss(XMMRegister dst, XMMRegister src) { 1951 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1952 emit_byte(0xF3); 1953 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1954 emit_byte(0x0F); 1955 emit_byte(0x59); 1956 emit_byte(0xC0 | encode); 1957 } 1958 1959 void Assembler::negl(Register dst) { 1960 int encode = prefix_and_encode(dst->encoding()); 1961 emit_byte(0xF7); 1962 emit_byte(0xD8 | encode); 1963 } 1964 1965 void Assembler::nop(int i) { 1966 #ifdef ASSERT 1967 assert(i > 0, " "); 1968 // The fancy nops aren't currently recognized by debuggers making it a 1969 // pain to disassemble code while debugging. If asserts are on clearly 1970 // speed is not an issue so simply use the single byte traditional nop 1971 // to do alignment. 1972 1973 for (; i > 0 ; i--) emit_byte(0x90); 1974 return; 1975 1976 #endif // ASSERT 1977 1978 if (UseAddressNop && VM_Version::is_intel()) { 1979 // 1980 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 1981 // 1: 0x90 1982 // 2: 0x66 0x90 1983 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 1984 // 4: 0x0F 0x1F 0x40 0x00 1985 // 5: 0x0F 0x1F 0x44 0x00 0x00 1986 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 1987 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1988 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1989 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1990 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1991 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1992 1993 // The rest coding is Intel specific - don't use consecutive address nops 1994 1995 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1996 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1997 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1998 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1999 2000 while(i >= 15) { 2001 // For Intel don't generate consecutive addess nops (mix with regular nops) 2002 i -= 15; 2003 emit_byte(0x66); // size prefix 2004 emit_byte(0x66); // size prefix 2005 emit_byte(0x66); // size prefix 2006 addr_nop_8(); 2007 emit_byte(0x66); // size prefix 2008 emit_byte(0x66); // size prefix 2009 emit_byte(0x66); // size prefix 2010 emit_byte(0x90); // nop 2011 } 2012 switch (i) { 2013 case 14: 2014 emit_byte(0x66); // size prefix 2015 case 13: 2016 emit_byte(0x66); // size prefix 2017 case 12: 2018 addr_nop_8(); 2019 emit_byte(0x66); // size prefix 2020 emit_byte(0x66); // size prefix 2021 emit_byte(0x66); // size prefix 2022 emit_byte(0x90); // nop 2023 break; 2024 case 11: 2025 emit_byte(0x66); // size prefix 2026 case 10: 2027 emit_byte(0x66); // size prefix 2028 case 9: 2029 emit_byte(0x66); // size prefix 2030 case 8: 2031 addr_nop_8(); 2032 break; 2033 case 7: 2034 addr_nop_7(); 2035 break; 2036 case 6: 2037 emit_byte(0x66); // size prefix 2038 case 5: 2039 addr_nop_5(); 2040 break; 2041 case 4: 2042 addr_nop_4(); 2043 break; 2044 case 3: 2045 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2046 emit_byte(0x66); // size prefix 2047 case 2: 2048 emit_byte(0x66); // size prefix 2049 case 1: 2050 emit_byte(0x90); // nop 2051 break; 2052 default: 2053 assert(i == 0, " "); 2054 } 2055 return; 2056 } 2057 if (UseAddressNop && VM_Version::is_amd()) { 2058 // 2059 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD. 2060 // 1: 0x90 2061 // 2: 0x66 0x90 2062 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2063 // 4: 0x0F 0x1F 0x40 0x00 2064 // 5: 0x0F 0x1F 0x44 0x00 0x00 2065 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2066 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2067 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2068 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2069 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2070 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2071 2072 // The rest coding is AMD specific - use consecutive address nops 2073 2074 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2075 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2076 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2077 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2078 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2079 // Size prefixes (0x66) are added for larger sizes 2080 2081 while(i >= 22) { 2082 i -= 11; 2083 emit_byte(0x66); // size prefix 2084 emit_byte(0x66); // size prefix 2085 emit_byte(0x66); // size prefix 2086 addr_nop_8(); 2087 } 2088 // Generate first nop for size between 21-12 2089 switch (i) { 2090 case 21: 2091 i -= 1; 2092 emit_byte(0x66); // size prefix 2093 case 20: 2094 case 19: 2095 i -= 1; 2096 emit_byte(0x66); // size prefix 2097 case 18: 2098 case 17: 2099 i -= 1; 2100 emit_byte(0x66); // size prefix 2101 case 16: 2102 case 15: 2103 i -= 8; 2104 addr_nop_8(); 2105 break; 2106 case 14: 2107 case 13: 2108 i -= 7; 2109 addr_nop_7(); 2110 break; 2111 case 12: 2112 i -= 6; 2113 emit_byte(0x66); // size prefix 2114 addr_nop_5(); 2115 break; 2116 default: 2117 assert(i < 12, " "); 2118 } 2119 2120 // Generate second nop for size between 11-1 2121 switch (i) { 2122 case 11: 2123 emit_byte(0x66); // size prefix 2124 case 10: 2125 emit_byte(0x66); // size prefix 2126 case 9: 2127 emit_byte(0x66); // size prefix 2128 case 8: 2129 addr_nop_8(); 2130 break; 2131 case 7: 2132 addr_nop_7(); 2133 break; 2134 case 6: 2135 emit_byte(0x66); // size prefix 2136 case 5: 2137 addr_nop_5(); 2138 break; 2139 case 4: 2140 addr_nop_4(); 2141 break; 2142 case 3: 2143 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2144 emit_byte(0x66); // size prefix 2145 case 2: 2146 emit_byte(0x66); // size prefix 2147 case 1: 2148 emit_byte(0x90); // nop 2149 break; 2150 default: 2151 assert(i == 0, " "); 2152 } 2153 return; 2154 } 2155 2156 // Using nops with size prefixes "0x66 0x90". 2157 // From AMD Optimization Guide: 2158 // 1: 0x90 2159 // 2: 0x66 0x90 2160 // 3: 0x66 0x66 0x90 2161 // 4: 0x66 0x66 0x66 0x90 2162 // 5: 0x66 0x66 0x90 0x66 0x90 2163 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2164 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2165 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2166 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2167 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2168 // 2169 while(i > 12) { 2170 i -= 4; 2171 emit_byte(0x66); // size prefix 2172 emit_byte(0x66); 2173 emit_byte(0x66); 2174 emit_byte(0x90); // nop 2175 } 2176 // 1 - 12 nops 2177 if(i > 8) { 2178 if(i > 9) { 2179 i -= 1; 2180 emit_byte(0x66); 2181 } 2182 i -= 3; 2183 emit_byte(0x66); 2184 emit_byte(0x66); 2185 emit_byte(0x90); 2186 } 2187 // 1 - 8 nops 2188 if(i > 4) { 2189 if(i > 6) { 2190 i -= 1; 2191 emit_byte(0x66); 2192 } 2193 i -= 3; 2194 emit_byte(0x66); 2195 emit_byte(0x66); 2196 emit_byte(0x90); 2197 } 2198 switch (i) { 2199 case 4: 2200 emit_byte(0x66); 2201 case 3: 2202 emit_byte(0x66); 2203 case 2: 2204 emit_byte(0x66); 2205 case 1: 2206 emit_byte(0x90); 2207 break; 2208 default: 2209 assert(i == 0, " "); 2210 } 2211 } 2212 2213 void Assembler::notl(Register dst) { 2214 int encode = prefix_and_encode(dst->encoding()); 2215 emit_byte(0xF7); 2216 emit_byte(0xD0 | encode ); 2217 } 2218 2219 void Assembler::orl(Address dst, int32_t imm32) { 2220 InstructionMark im(this); 2221 prefix(dst); 2222 emit_arith_operand(0x81, rcx, dst, imm32); 2223 } 2224 2225 void Assembler::orl(Register dst, int32_t imm32) { 2226 prefix(dst); 2227 emit_arith(0x81, 0xC8, dst, imm32); 2228 } 2229 2230 void Assembler::orl(Register dst, Address src) { 2231 InstructionMark im(this); 2232 prefix(src, dst); 2233 emit_byte(0x0B); 2234 emit_operand(dst, src); 2235 } 2236 2237 void Assembler::orl(Register dst, Register src) { 2238 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2239 emit_arith(0x0B, 0xC0, dst, src); 2240 } 2241 2242 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { 2243 assert(VM_Version::supports_sse4_2(), ""); 2244 2245 InstructionMark im(this); 2246 emit_byte(0x66); 2247 prefix(src, dst); 2248 emit_byte(0x0F); 2249 emit_byte(0x3A); 2250 emit_byte(0x61); 2251 emit_operand(dst, src); 2252 emit_byte(imm8); 2253 } 2254 2255 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { 2256 assert(VM_Version::supports_sse4_2(), ""); 2257 2258 emit_byte(0x66); 2259 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 2260 emit_byte(0x0F); 2261 emit_byte(0x3A); 2262 emit_byte(0x61); 2263 emit_byte(0xC0 | encode); 2264 emit_byte(imm8); 2265 } 2266 2267 // generic 2268 void Assembler::pop(Register dst) { 2269 int encode = prefix_and_encode(dst->encoding()); 2270 emit_byte(0x58 | encode); 2271 } 2272 2273 void Assembler::popcntl(Register dst, Address src) { 2274 assert(VM_Version::supports_popcnt(), "must support"); 2275 InstructionMark im(this); 2276 emit_byte(0xF3); 2277 prefix(src, dst); 2278 emit_byte(0x0F); 2279 emit_byte(0xB8); 2280 emit_operand(dst, src); 2281 } 2282 2283 void Assembler::popcntl(Register dst, Register src) { 2284 assert(VM_Version::supports_popcnt(), "must support"); 2285 emit_byte(0xF3); 2286 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2287 emit_byte(0x0F); 2288 emit_byte(0xB8); 2289 emit_byte(0xC0 | encode); 2290 } 2291 2292 void Assembler::popf() { 2293 emit_byte(0x9D); 2294 } 2295 2296 #ifndef _LP64 // no 32bit push/pop on amd64 2297 void Assembler::popl(Address dst) { 2298 // NOTE: this will adjust stack by 8byte on 64bits 2299 InstructionMark im(this); 2300 prefix(dst); 2301 emit_byte(0x8F); 2302 emit_operand(rax, dst); 2303 } 2304 #endif 2305 2306 void Assembler::prefetch_prefix(Address src) { 2307 prefix(src); 2308 emit_byte(0x0F); 2309 } 2310 2311 void Assembler::prefetchnta(Address src) { 2312 NOT_LP64(assert(VM_Version::supports_sse2(), "must support")); 2313 InstructionMark im(this); 2314 prefetch_prefix(src); 2315 emit_byte(0x18); 2316 emit_operand(rax, src); // 0, src 2317 } 2318 2319 void Assembler::prefetchr(Address src) { 2320 NOT_LP64(assert(VM_Version::supports_3dnow_prefetch(), "must support")); 2321 InstructionMark im(this); 2322 prefetch_prefix(src); 2323 emit_byte(0x0D); 2324 emit_operand(rax, src); // 0, src 2325 } 2326 2327 void Assembler::prefetcht0(Address src) { 2328 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2329 InstructionMark im(this); 2330 prefetch_prefix(src); 2331 emit_byte(0x18); 2332 emit_operand(rcx, src); // 1, src 2333 } 2334 2335 void Assembler::prefetcht1(Address src) { 2336 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2337 InstructionMark im(this); 2338 prefetch_prefix(src); 2339 emit_byte(0x18); 2340 emit_operand(rdx, src); // 2, src 2341 } 2342 2343 void Assembler::prefetcht2(Address src) { 2344 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2345 InstructionMark im(this); 2346 prefetch_prefix(src); 2347 emit_byte(0x18); 2348 emit_operand(rbx, src); // 3, src 2349 } 2350 2351 void Assembler::prefetchw(Address src) { 2352 NOT_LP64(assert(VM_Version::supports_3dnow_prefetch(), "must support")); 2353 InstructionMark im(this); 2354 prefetch_prefix(src); 2355 emit_byte(0x0D); 2356 emit_operand(rcx, src); // 1, src 2357 } 2358 2359 void Assembler::prefix(Prefix p) { 2360 a_byte(p); 2361 } 2362 2363 void Assembler::por(XMMRegister dst, XMMRegister src) { 2364 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2365 2366 emit_byte(0x66); 2367 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2368 emit_byte(0x0F); 2369 2370 emit_byte(0xEB); 2371 emit_byte(0xC0 | encode); 2372 } 2373 2374 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 2375 assert(isByte(mode), "invalid value"); 2376 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2377 2378 emit_byte(0x66); 2379 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2380 emit_byte(0x0F); 2381 emit_byte(0x70); 2382 emit_byte(0xC0 | encode); 2383 emit_byte(mode & 0xFF); 2384 2385 } 2386 2387 void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 2388 assert(isByte(mode), "invalid value"); 2389 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2390 2391 InstructionMark im(this); 2392 emit_byte(0x66); 2393 prefix(src, dst); 2394 emit_byte(0x0F); 2395 emit_byte(0x70); 2396 emit_operand(dst, src); 2397 emit_byte(mode & 0xFF); 2398 } 2399 2400 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 2401 assert(isByte(mode), "invalid value"); 2402 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2403 2404 emit_byte(0xF2); 2405 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2406 emit_byte(0x0F); 2407 emit_byte(0x70); 2408 emit_byte(0xC0 | encode); 2409 emit_byte(mode & 0xFF); 2410 } 2411 2412 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 2413 assert(isByte(mode), "invalid value"); 2414 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2415 2416 InstructionMark im(this); 2417 emit_byte(0xF2); 2418 prefix(src, dst); // QQ new 2419 emit_byte(0x0F); 2420 emit_byte(0x70); 2421 emit_operand(dst, src); 2422 emit_byte(mode & 0xFF); 2423 } 2424 2425 void Assembler::psrlq(XMMRegister dst, int shift) { 2426 // Shift 64 bit value logically right by specified number of bits. 2427 // HMM Table D-1 says sse2 or mmx. 2428 // Do not confuse it with psrldq SSE2 instruction which 2429 // shifts 128 bit value in xmm register by number of bytes. 2430 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2431 2432 int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding()); 2433 emit_byte(0x66); 2434 emit_byte(0x0F); 2435 emit_byte(0x73); 2436 emit_byte(0xC0 | encode); 2437 emit_byte(shift); 2438 } 2439 2440 void Assembler::psrldq(XMMRegister dst, int shift) { 2441 // Shift 128 bit value in xmm register by number of bytes. 2442 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2443 2444 int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding()); 2445 emit_byte(0x66); 2446 emit_byte(0x0F); 2447 emit_byte(0x73); 2448 emit_byte(0xC0 | encode); 2449 emit_byte(shift); 2450 } 2451 2452 void Assembler::ptest(XMMRegister dst, Address src) { 2453 assert(VM_Version::supports_sse4_1(), ""); 2454 2455 InstructionMark im(this); 2456 emit_byte(0x66); 2457 prefix(src, dst); 2458 emit_byte(0x0F); 2459 emit_byte(0x38); 2460 emit_byte(0x17); 2461 emit_operand(dst, src); 2462 } 2463 2464 void Assembler::ptest(XMMRegister dst, XMMRegister src) { 2465 assert(VM_Version::supports_sse4_1(), ""); 2466 2467 emit_byte(0x66); 2468 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 2469 emit_byte(0x0F); 2470 emit_byte(0x38); 2471 emit_byte(0x17); 2472 emit_byte(0xC0 | encode); 2473 } 2474 2475 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 2476 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2477 emit_byte(0x66); 2478 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2479 emit_byte(0x0F); 2480 emit_byte(0x60); 2481 emit_byte(0xC0 | encode); 2482 } 2483 2484 void Assembler::push(int32_t imm32) { 2485 // in 64bits we push 64bits onto the stack but only 2486 // take a 32bit immediate 2487 emit_byte(0x68); 2488 emit_long(imm32); 2489 } 2490 2491 void Assembler::push(Register src) { 2492 int encode = prefix_and_encode(src->encoding()); 2493 2494 emit_byte(0x50 | encode); 2495 } 2496 2497 void Assembler::pushf() { 2498 emit_byte(0x9C); 2499 } 2500 2501 #ifndef _LP64 // no 32bit push/pop on amd64 2502 void Assembler::pushl(Address src) { 2503 // Note this will push 64bit on 64bit 2504 InstructionMark im(this); 2505 prefix(src); 2506 emit_byte(0xFF); 2507 emit_operand(rsi, src); 2508 } 2509 #endif 2510 2511 void Assembler::pxor(XMMRegister dst, Address src) { 2512 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2513 InstructionMark im(this); 2514 emit_byte(0x66); 2515 prefix(src, dst); 2516 emit_byte(0x0F); 2517 emit_byte(0xEF); 2518 emit_operand(dst, src); 2519 } 2520 2521 void Assembler::pxor(XMMRegister dst, XMMRegister src) { 2522 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2523 InstructionMark im(this); 2524 emit_byte(0x66); 2525 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2526 emit_byte(0x0F); 2527 emit_byte(0xEF); 2528 emit_byte(0xC0 | encode); 2529 } 2530 2531 void Assembler::rcll(Register dst, int imm8) { 2532 assert(isShiftCount(imm8), "illegal shift count"); 2533 int encode = prefix_and_encode(dst->encoding()); 2534 if (imm8 == 1) { 2535 emit_byte(0xD1); 2536 emit_byte(0xD0 | encode); 2537 } else { 2538 emit_byte(0xC1); 2539 emit_byte(0xD0 | encode); 2540 emit_byte(imm8); 2541 } 2542 } 2543 2544 // copies data from [esi] to [edi] using rcx pointer sized words 2545 // generic 2546 void Assembler::rep_mov() { 2547 emit_byte(0xF3); 2548 // MOVSQ 2549 LP64_ONLY(prefix(REX_W)); 2550 emit_byte(0xA5); 2551 } 2552 2553 // sets rcx pointer sized words with rax, value at [edi] 2554 // generic 2555 void Assembler::rep_set() { // rep_set 2556 emit_byte(0xF3); 2557 // STOSQ 2558 LP64_ONLY(prefix(REX_W)); 2559 emit_byte(0xAB); 2560 } 2561 2562 // scans rcx pointer sized words at [edi] for occurance of rax, 2563 // generic 2564 void Assembler::repne_scan() { // repne_scan 2565 emit_byte(0xF2); 2566 // SCASQ 2567 LP64_ONLY(prefix(REX_W)); 2568 emit_byte(0xAF); 2569 } 2570 2571 #ifdef _LP64 2572 // scans rcx 4 byte words at [edi] for occurance of rax, 2573 // generic 2574 void Assembler::repne_scanl() { // repne_scan 2575 emit_byte(0xF2); 2576 // SCASL 2577 emit_byte(0xAF); 2578 } 2579 #endif 2580 2581 void Assembler::ret(int imm16) { 2582 if (imm16 == 0) { 2583 emit_byte(0xC3); 2584 } else { 2585 emit_byte(0xC2); 2586 emit_word(imm16); 2587 } 2588 } 2589 2590 void Assembler::sahf() { 2591 #ifdef _LP64 2592 // Not supported in 64bit mode 2593 ShouldNotReachHere(); 2594 #endif 2595 emit_byte(0x9E); 2596 } 2597 2598 void Assembler::sarl(Register dst, int imm8) { 2599 int encode = prefix_and_encode(dst->encoding()); 2600 assert(isShiftCount(imm8), "illegal shift count"); 2601 if (imm8 == 1) { 2602 emit_byte(0xD1); 2603 emit_byte(0xF8 | encode); 2604 } else { 2605 emit_byte(0xC1); 2606 emit_byte(0xF8 | encode); 2607 emit_byte(imm8); 2608 } 2609 } 2610 2611 void Assembler::sarl(Register dst) { 2612 int encode = prefix_and_encode(dst->encoding()); 2613 emit_byte(0xD3); 2614 emit_byte(0xF8 | encode); 2615 } 2616 2617 void Assembler::sbbl(Address dst, int32_t imm32) { 2618 InstructionMark im(this); 2619 prefix(dst); 2620 emit_arith_operand(0x81, rbx, dst, imm32); 2621 } 2622 2623 void Assembler::sbbl(Register dst, int32_t imm32) { 2624 prefix(dst); 2625 emit_arith(0x81, 0xD8, dst, imm32); 2626 } 2627 2628 2629 void Assembler::sbbl(Register dst, Address src) { 2630 InstructionMark im(this); 2631 prefix(src, dst); 2632 emit_byte(0x1B); 2633 emit_operand(dst, src); 2634 } 2635 2636 void Assembler::sbbl(Register dst, Register src) { 2637 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2638 emit_arith(0x1B, 0xC0, dst, src); 2639 } 2640 2641 void Assembler::setb(Condition cc, Register dst) { 2642 assert(0 <= cc && cc < 16, "illegal cc"); 2643 int encode = prefix_and_encode(dst->encoding(), true); 2644 emit_byte(0x0F); 2645 emit_byte(0x90 | cc); 2646 emit_byte(0xC0 | encode); 2647 } 2648 2649 void Assembler::shll(Register dst, int imm8) { 2650 assert(isShiftCount(imm8), "illegal shift count"); 2651 int encode = prefix_and_encode(dst->encoding()); 2652 if (imm8 == 1 ) { 2653 emit_byte(0xD1); 2654 emit_byte(0xE0 | encode); 2655 } else { 2656 emit_byte(0xC1); 2657 emit_byte(0xE0 | encode); 2658 emit_byte(imm8); 2659 } 2660 } 2661 2662 void Assembler::shll(Register dst) { 2663 int encode = prefix_and_encode(dst->encoding()); 2664 emit_byte(0xD3); 2665 emit_byte(0xE0 | encode); 2666 } 2667 2668 void Assembler::shrl(Register dst, int imm8) { 2669 assert(isShiftCount(imm8), "illegal shift count"); 2670 int encode = prefix_and_encode(dst->encoding()); 2671 emit_byte(0xC1); 2672 emit_byte(0xE8 | encode); 2673 emit_byte(imm8); 2674 } 2675 2676 void Assembler::shrl(Register dst) { 2677 int encode = prefix_and_encode(dst->encoding()); 2678 emit_byte(0xD3); 2679 emit_byte(0xE8 | encode); 2680 } 2681 2682 // copies a single word from [esi] to [edi] 2683 void Assembler::smovl() { 2684 emit_byte(0xA5); 2685 } 2686 2687 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { 2688 // HMM Table D-1 says sse2 2689 // NOT_LP64(assert(VM_Version::supports_sse(), "")); 2690 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2691 emit_byte(0xF2); 2692 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2693 emit_byte(0x0F); 2694 emit_byte(0x51); 2695 emit_byte(0xC0 | encode); 2696 } 2697 2698 void Assembler::sqrtsd(XMMRegister dst, Address src) { 2699 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2700 InstructionMark im(this); 2701 emit_byte(0xF2); 2702 prefix(src, dst); 2703 emit_byte(0x0F); 2704 emit_byte(0x51); 2705 emit_operand(dst, src); 2706 } 2707 2708 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { 2709 // HMM Table D-1 says sse2 2710 // NOT_LP64(assert(VM_Version::supports_sse(), "")); 2711 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2712 emit_byte(0xF3); 2713 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2714 emit_byte(0x0F); 2715 emit_byte(0x51); 2716 emit_byte(0xC0 | encode); 2717 } 2718 2719 void Assembler::sqrtss(XMMRegister dst, Address src) { 2720 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2721 InstructionMark im(this); 2722 emit_byte(0xF3); 2723 prefix(src, dst); 2724 emit_byte(0x0F); 2725 emit_byte(0x51); 2726 emit_operand(dst, src); 2727 } 2728 2729 void Assembler::stmxcsr( Address dst) { 2730 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2731 InstructionMark im(this); 2732 prefix(dst); 2733 emit_byte(0x0F); 2734 emit_byte(0xAE); 2735 emit_operand(as_Register(3), dst); 2736 } 2737 2738 void Assembler::subl(Address dst, int32_t imm32) { 2739 InstructionMark im(this); 2740 prefix(dst); 2741 emit_arith_operand(0x81, rbp, dst, imm32); 2742 } 2743 2744 void Assembler::subl(Address dst, Register src) { 2745 InstructionMark im(this); 2746 prefix(dst, src); 2747 emit_byte(0x29); 2748 emit_operand(src, dst); 2749 } 2750 2751 void Assembler::subl(Register dst, int32_t imm32) { 2752 prefix(dst); 2753 emit_arith(0x81, 0xE8, dst, imm32); 2754 } 2755 2756 void Assembler::subl(Register dst, Address src) { 2757 InstructionMark im(this); 2758 prefix(src, dst); 2759 emit_byte(0x2B); 2760 emit_operand(dst, src); 2761 } 2762 2763 void Assembler::subl(Register dst, Register src) { 2764 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2765 emit_arith(0x2B, 0xC0, dst, src); 2766 } 2767 2768 void Assembler::subsd(XMMRegister dst, XMMRegister src) { 2769 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2770 emit_byte(0xF2); 2771 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2772 emit_byte(0x0F); 2773 emit_byte(0x5C); 2774 emit_byte(0xC0 | encode); 2775 } 2776 2777 void Assembler::subsd(XMMRegister dst, Address src) { 2778 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2779 InstructionMark im(this); 2780 emit_byte(0xF2); 2781 prefix(src, dst); 2782 emit_byte(0x0F); 2783 emit_byte(0x5C); 2784 emit_operand(dst, src); 2785 } 2786 2787 void Assembler::subss(XMMRegister dst, XMMRegister src) { 2788 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2789 emit_byte(0xF3); 2790 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2791 emit_byte(0x0F); 2792 emit_byte(0x5C); 2793 emit_byte(0xC0 | encode); 2794 } 2795 2796 void Assembler::subss(XMMRegister dst, Address src) { 2797 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2798 InstructionMark im(this); 2799 emit_byte(0xF3); 2800 prefix(src, dst); 2801 emit_byte(0x0F); 2802 emit_byte(0x5C); 2803 emit_operand(dst, src); 2804 } 2805 2806 void Assembler::testb(Register dst, int imm8) { 2807 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 2808 (void) prefix_and_encode(dst->encoding(), true); 2809 emit_arith_b(0xF6, 0xC0, dst, imm8); 2810 } 2811 2812 void Assembler::testl(Register dst, int32_t imm32) { 2813 // not using emit_arith because test 2814 // doesn't support sign-extension of 2815 // 8bit operands 2816 int encode = dst->encoding(); 2817 if (encode == 0) { 2818 emit_byte(0xA9); 2819 } else { 2820 encode = prefix_and_encode(encode); 2821 emit_byte(0xF7); 2822 emit_byte(0xC0 | encode); 2823 } 2824 emit_long(imm32); 2825 } 2826 2827 void Assembler::testl(Register dst, Register src) { 2828 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2829 emit_arith(0x85, 0xC0, dst, src); 2830 } 2831 2832 void Assembler::testl(Register dst, Address src) { 2833 InstructionMark im(this); 2834 prefix(src, dst); 2835 emit_byte(0x85); 2836 emit_operand(dst, src); 2837 } 2838 2839 void Assembler::ucomisd(XMMRegister dst, Address src) { 2840 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2841 emit_byte(0x66); 2842 ucomiss(dst, src); 2843 } 2844 2845 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { 2846 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2847 emit_byte(0x66); 2848 ucomiss(dst, src); 2849 } 2850 2851 void Assembler::ucomiss(XMMRegister dst, Address src) { 2852 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2853 2854 InstructionMark im(this); 2855 prefix(src, dst); 2856 emit_byte(0x0F); 2857 emit_byte(0x2E); 2858 emit_operand(dst, src); 2859 } 2860 2861 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { 2862 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2863 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2864 emit_byte(0x0F); 2865 emit_byte(0x2E); 2866 emit_byte(0xC0 | encode); 2867 } 2868 2869 2870 void Assembler::xaddl(Address dst, Register src) { 2871 InstructionMark im(this); 2872 prefix(dst, src); 2873 emit_byte(0x0F); 2874 emit_byte(0xC1); 2875 emit_operand(src, dst); 2876 } 2877 2878 void Assembler::xchgl(Register dst, Address src) { // xchg 2879 InstructionMark im(this); 2880 prefix(src, dst); 2881 emit_byte(0x87); 2882 emit_operand(dst, src); 2883 } 2884 2885 void Assembler::xchgl(Register dst, Register src) { 2886 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2887 emit_byte(0x87); 2888 emit_byte(0xc0 | encode); 2889 } 2890 2891 void Assembler::xorl(Register dst, int32_t imm32) { 2892 prefix(dst); 2893 emit_arith(0x81, 0xF0, dst, imm32); 2894 } 2895 2896 void Assembler::xorl(Register dst, Address src) { 2897 InstructionMark im(this); 2898 prefix(src, dst); 2899 emit_byte(0x33); 2900 emit_operand(dst, src); 2901 } 2902 2903 void Assembler::xorl(Register dst, Register src) { 2904 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2905 emit_arith(0x33, 0xC0, dst, src); 2906 } 2907 2908 void Assembler::xorpd(XMMRegister dst, XMMRegister src) { 2909 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2910 emit_byte(0x66); 2911 xorps(dst, src); 2912 } 2913 2914 void Assembler::xorpd(XMMRegister dst, Address src) { 2915 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2916 InstructionMark im(this); 2917 emit_byte(0x66); 2918 prefix(src, dst); 2919 emit_byte(0x0F); 2920 emit_byte(0x57); 2921 emit_operand(dst, src); 2922 } 2923 2924 2925 void Assembler::xorps(XMMRegister dst, XMMRegister src) { 2926 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2927 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2928 emit_byte(0x0F); 2929 emit_byte(0x57); 2930 emit_byte(0xC0 | encode); 2931 } 2932 2933 void Assembler::xorps(XMMRegister dst, Address src) { 2934 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2935 InstructionMark im(this); 2936 prefix(src, dst); 2937 emit_byte(0x0F); 2938 emit_byte(0x57); 2939 emit_operand(dst, src); 2940 } 2941 2942 #ifndef _LP64 2943 // 32bit only pieces of the assembler 2944 2945 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { 2946 // NO PREFIX AS NEVER 64BIT 2947 InstructionMark im(this); 2948 emit_byte(0x81); 2949 emit_byte(0xF8 | src1->encoding()); 2950 emit_data(imm32, rspec, 0); 2951 } 2952 2953 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { 2954 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs 2955 InstructionMark im(this); 2956 emit_byte(0x81); 2957 emit_operand(rdi, src1); 2958 emit_data(imm32, rspec, 0); 2959 } 2960 2961 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax, 2962 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded 2963 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. 2964 void Assembler::cmpxchg8(Address adr) { 2965 InstructionMark im(this); 2966 emit_byte(0x0F); 2967 emit_byte(0xc7); 2968 emit_operand(rcx, adr); 2969 } 2970 2971 void Assembler::decl(Register dst) { 2972 // Don't use it directly. Use MacroAssembler::decrementl() instead. 2973 emit_byte(0x48 | dst->encoding()); 2974 } 2975 2976 #endif // _LP64 2977 2978 // 64bit typically doesn't use the x87 but needs to for the trig funcs 2979 2980 void Assembler::fabs() { 2981 emit_byte(0xD9); 2982 emit_byte(0xE1); 2983 } 2984 2985 void Assembler::fadd(int i) { 2986 emit_farith(0xD8, 0xC0, i); 2987 } 2988 2989 void Assembler::fadd_d(Address src) { 2990 InstructionMark im(this); 2991 emit_byte(0xDC); 2992 emit_operand32(rax, src); 2993 } 2994 2995 void Assembler::fadd_s(Address src) { 2996 InstructionMark im(this); 2997 emit_byte(0xD8); 2998 emit_operand32(rax, src); 2999 } 3000 3001 void Assembler::fadda(int i) { 3002 emit_farith(0xDC, 0xC0, i); 3003 } 3004 3005 void Assembler::faddp(int i) { 3006 emit_farith(0xDE, 0xC0, i); 3007 } 3008 3009 void Assembler::fchs() { 3010 emit_byte(0xD9); 3011 emit_byte(0xE0); 3012 } 3013 3014 void Assembler::fcom(int i) { 3015 emit_farith(0xD8, 0xD0, i); 3016 } 3017 3018 void Assembler::fcomp(int i) { 3019 emit_farith(0xD8, 0xD8, i); 3020 } 3021 3022 void Assembler::fcomp_d(Address src) { 3023 InstructionMark im(this); 3024 emit_byte(0xDC); 3025 emit_operand32(rbx, src); 3026 } 3027 3028 void Assembler::fcomp_s(Address src) { 3029 InstructionMark im(this); 3030 emit_byte(0xD8); 3031 emit_operand32(rbx, src); 3032 } 3033 3034 void Assembler::fcompp() { 3035 emit_byte(0xDE); 3036 emit_byte(0xD9); 3037 } 3038 3039 void Assembler::fcos() { 3040 emit_byte(0xD9); 3041 emit_byte(0xFF); 3042 } 3043 3044 void Assembler::fdecstp() { 3045 emit_byte(0xD9); 3046 emit_byte(0xF6); 3047 } 3048 3049 void Assembler::fdiv(int i) { 3050 emit_farith(0xD8, 0xF0, i); 3051 } 3052 3053 void Assembler::fdiv_d(Address src) { 3054 InstructionMark im(this); 3055 emit_byte(0xDC); 3056 emit_operand32(rsi, src); 3057 } 3058 3059 void Assembler::fdiv_s(Address src) { 3060 InstructionMark im(this); 3061 emit_byte(0xD8); 3062 emit_operand32(rsi, src); 3063 } 3064 3065 void Assembler::fdiva(int i) { 3066 emit_farith(0xDC, 0xF8, i); 3067 } 3068 3069 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) 3070 // is erroneous for some of the floating-point instructions below. 3071 3072 void Assembler::fdivp(int i) { 3073 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) 3074 } 3075 3076 void Assembler::fdivr(int i) { 3077 emit_farith(0xD8, 0xF8, i); 3078 } 3079 3080 void Assembler::fdivr_d(Address src) { 3081 InstructionMark im(this); 3082 emit_byte(0xDC); 3083 emit_operand32(rdi, src); 3084 } 3085 3086 void Assembler::fdivr_s(Address src) { 3087 InstructionMark im(this); 3088 emit_byte(0xD8); 3089 emit_operand32(rdi, src); 3090 } 3091 3092 void Assembler::fdivra(int i) { 3093 emit_farith(0xDC, 0xF0, i); 3094 } 3095 3096 void Assembler::fdivrp(int i) { 3097 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) 3098 } 3099 3100 void Assembler::ffree(int i) { 3101 emit_farith(0xDD, 0xC0, i); 3102 } 3103 3104 void Assembler::fild_d(Address adr) { 3105 InstructionMark im(this); 3106 emit_byte(0xDF); 3107 emit_operand32(rbp, adr); 3108 } 3109 3110 void Assembler::fild_s(Address adr) { 3111 InstructionMark im(this); 3112 emit_byte(0xDB); 3113 emit_operand32(rax, adr); 3114 } 3115 3116 void Assembler::fincstp() { 3117 emit_byte(0xD9); 3118 emit_byte(0xF7); 3119 } 3120 3121 void Assembler::finit() { 3122 emit_byte(0x9B); 3123 emit_byte(0xDB); 3124 emit_byte(0xE3); 3125 } 3126 3127 void Assembler::fist_s(Address adr) { 3128 InstructionMark im(this); 3129 emit_byte(0xDB); 3130 emit_operand32(rdx, adr); 3131 } 3132 3133 void Assembler::fistp_d(Address adr) { 3134 InstructionMark im(this); 3135 emit_byte(0xDF); 3136 emit_operand32(rdi, adr); 3137 } 3138 3139 void Assembler::fistp_s(Address adr) { 3140 InstructionMark im(this); 3141 emit_byte(0xDB); 3142 emit_operand32(rbx, adr); 3143 } 3144 3145 void Assembler::fld1() { 3146 emit_byte(0xD9); 3147 emit_byte(0xE8); 3148 } 3149 3150 void Assembler::fld_d(Address adr) { 3151 InstructionMark im(this); 3152 emit_byte(0xDD); 3153 emit_operand32(rax, adr); 3154 } 3155 3156 void Assembler::fld_s(Address adr) { 3157 InstructionMark im(this); 3158 emit_byte(0xD9); 3159 emit_operand32(rax, adr); 3160 } 3161 3162 3163 void Assembler::fld_s(int index) { 3164 emit_farith(0xD9, 0xC0, index); 3165 } 3166 3167 void Assembler::fld_x(Address adr) { 3168 InstructionMark im(this); 3169 emit_byte(0xDB); 3170 emit_operand32(rbp, adr); 3171 } 3172 3173 void Assembler::fldcw(Address src) { 3174 InstructionMark im(this); 3175 emit_byte(0xd9); 3176 emit_operand32(rbp, src); 3177 } 3178 3179 void Assembler::fldenv(Address src) { 3180 InstructionMark im(this); 3181 emit_byte(0xD9); 3182 emit_operand32(rsp, src); 3183 } 3184 3185 void Assembler::fldlg2() { 3186 emit_byte(0xD9); 3187 emit_byte(0xEC); 3188 } 3189 3190 void Assembler::fldln2() { 3191 emit_byte(0xD9); 3192 emit_byte(0xED); 3193 } 3194 3195 void Assembler::fldz() { 3196 emit_byte(0xD9); 3197 emit_byte(0xEE); 3198 } 3199 3200 void Assembler::flog() { 3201 fldln2(); 3202 fxch(); 3203 fyl2x(); 3204 } 3205 3206 void Assembler::flog10() { 3207 fldlg2(); 3208 fxch(); 3209 fyl2x(); 3210 } 3211 3212 void Assembler::fmul(int i) { 3213 emit_farith(0xD8, 0xC8, i); 3214 } 3215 3216 void Assembler::fmul_d(Address src) { 3217 InstructionMark im(this); 3218 emit_byte(0xDC); 3219 emit_operand32(rcx, src); 3220 } 3221 3222 void Assembler::fmul_s(Address src) { 3223 InstructionMark im(this); 3224 emit_byte(0xD8); 3225 emit_operand32(rcx, src); 3226 } 3227 3228 void Assembler::fmula(int i) { 3229 emit_farith(0xDC, 0xC8, i); 3230 } 3231 3232 void Assembler::fmulp(int i) { 3233 emit_farith(0xDE, 0xC8, i); 3234 } 3235 3236 void Assembler::fnsave(Address dst) { 3237 InstructionMark im(this); 3238 emit_byte(0xDD); 3239 emit_operand32(rsi, dst); 3240 } 3241 3242 void Assembler::fnstcw(Address src) { 3243 InstructionMark im(this); 3244 emit_byte(0x9B); 3245 emit_byte(0xD9); 3246 emit_operand32(rdi, src); 3247 } 3248 3249 void Assembler::fnstsw_ax() { 3250 emit_byte(0xdF); 3251 emit_byte(0xE0); 3252 } 3253 3254 void Assembler::fprem() { 3255 emit_byte(0xD9); 3256 emit_byte(0xF8); 3257 } 3258 3259 void Assembler::fprem1() { 3260 emit_byte(0xD9); 3261 emit_byte(0xF5); 3262 } 3263 3264 void Assembler::frstor(Address src) { 3265 InstructionMark im(this); 3266 emit_byte(0xDD); 3267 emit_operand32(rsp, src); 3268 } 3269 3270 void Assembler::fsin() { 3271 emit_byte(0xD9); 3272 emit_byte(0xFE); 3273 } 3274 3275 void Assembler::fsqrt() { 3276 emit_byte(0xD9); 3277 emit_byte(0xFA); 3278 } 3279 3280 void Assembler::fst_d(Address adr) { 3281 InstructionMark im(this); 3282 emit_byte(0xDD); 3283 emit_operand32(rdx, adr); 3284 } 3285 3286 void Assembler::fst_s(Address adr) { 3287 InstructionMark im(this); 3288 emit_byte(0xD9); 3289 emit_operand32(rdx, adr); 3290 } 3291 3292 void Assembler::fstp_d(Address adr) { 3293 InstructionMark im(this); 3294 emit_byte(0xDD); 3295 emit_operand32(rbx, adr); 3296 } 3297 3298 void Assembler::fstp_d(int index) { 3299 emit_farith(0xDD, 0xD8, index); 3300 } 3301 3302 void Assembler::fstp_s(Address adr) { 3303 InstructionMark im(this); 3304 emit_byte(0xD9); 3305 emit_operand32(rbx, adr); 3306 } 3307 3308 void Assembler::fstp_x(Address adr) { 3309 InstructionMark im(this); 3310 emit_byte(0xDB); 3311 emit_operand32(rdi, adr); 3312 } 3313 3314 void Assembler::fsub(int i) { 3315 emit_farith(0xD8, 0xE0, i); 3316 } 3317 3318 void Assembler::fsub_d(Address src) { 3319 InstructionMark im(this); 3320 emit_byte(0xDC); 3321 emit_operand32(rsp, src); 3322 } 3323 3324 void Assembler::fsub_s(Address src) { 3325 InstructionMark im(this); 3326 emit_byte(0xD8); 3327 emit_operand32(rsp, src); 3328 } 3329 3330 void Assembler::fsuba(int i) { 3331 emit_farith(0xDC, 0xE8, i); 3332 } 3333 3334 void Assembler::fsubp(int i) { 3335 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) 3336 } 3337 3338 void Assembler::fsubr(int i) { 3339 emit_farith(0xD8, 0xE8, i); 3340 } 3341 3342 void Assembler::fsubr_d(Address src) { 3343 InstructionMark im(this); 3344 emit_byte(0xDC); 3345 emit_operand32(rbp, src); 3346 } 3347 3348 void Assembler::fsubr_s(Address src) { 3349 InstructionMark im(this); 3350 emit_byte(0xD8); 3351 emit_operand32(rbp, src); 3352 } 3353 3354 void Assembler::fsubra(int i) { 3355 emit_farith(0xDC, 0xE0, i); 3356 } 3357 3358 void Assembler::fsubrp(int i) { 3359 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) 3360 } 3361 3362 void Assembler::ftan() { 3363 emit_byte(0xD9); 3364 emit_byte(0xF2); 3365 emit_byte(0xDD); 3366 emit_byte(0xD8); 3367 } 3368 3369 void Assembler::ftst() { 3370 emit_byte(0xD9); 3371 emit_byte(0xE4); 3372 } 3373 3374 void Assembler::fucomi(int i) { 3375 // make sure the instruction is supported (introduced for P6, together with cmov) 3376 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3377 emit_farith(0xDB, 0xE8, i); 3378 } 3379 3380 void Assembler::fucomip(int i) { 3381 // make sure the instruction is supported (introduced for P6, together with cmov) 3382 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3383 emit_farith(0xDF, 0xE8, i); 3384 } 3385 3386 void Assembler::fwait() { 3387 emit_byte(0x9B); 3388 } 3389 3390 void Assembler::fxch(int i) { 3391 emit_farith(0xD9, 0xC8, i); 3392 } 3393 3394 void Assembler::fyl2x() { 3395 emit_byte(0xD9); 3396 emit_byte(0xF1); 3397 } 3398 3399 3400 #ifndef _LP64 3401 3402 void Assembler::incl(Register dst) { 3403 // Don't use it directly. Use MacroAssembler::incrementl() instead. 3404 emit_byte(0x40 | dst->encoding()); 3405 } 3406 3407 void Assembler::lea(Register dst, Address src) { 3408 leal(dst, src); 3409 } 3410 3411 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { 3412 InstructionMark im(this); 3413 emit_byte(0xC7); 3414 emit_operand(rax, dst); 3415 emit_data((int)imm32, rspec, 0); 3416 } 3417 3418 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { 3419 InstructionMark im(this); 3420 int encode = prefix_and_encode(dst->encoding()); 3421 emit_byte(0xB8 | encode); 3422 emit_data((int)imm32, rspec, 0); 3423 } 3424 3425 void Assembler::popa() { // 32bit 3426 emit_byte(0x61); 3427 } 3428 3429 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { 3430 InstructionMark im(this); 3431 emit_byte(0x68); 3432 emit_data(imm32, rspec, 0); 3433 } 3434 3435 void Assembler::pusha() { // 32bit 3436 emit_byte(0x60); 3437 } 3438 3439 void Assembler::set_byte_if_not_zero(Register dst) { 3440 emit_byte(0x0F); 3441 emit_byte(0x95); 3442 emit_byte(0xE0 | dst->encoding()); 3443 } 3444 3445 void Assembler::shldl(Register dst, Register src) { 3446 emit_byte(0x0F); 3447 emit_byte(0xA5); 3448 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3449 } 3450 3451 void Assembler::shrdl(Register dst, Register src) { 3452 emit_byte(0x0F); 3453 emit_byte(0xAD); 3454 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3455 } 3456 3457 #else // LP64 3458 3459 void Assembler::set_byte_if_not_zero(Register dst) { 3460 int enc = prefix_and_encode(dst->encoding(), true); 3461 emit_byte(0x0F); 3462 emit_byte(0x95); 3463 emit_byte(0xE0 | enc); 3464 } 3465 3466 // 64bit only pieces of the assembler 3467 // This should only be used by 64bit instructions that can use rip-relative 3468 // it cannot be used by instructions that want an immediate value. 3469 3470 bool Assembler::reachable(AddressLiteral adr) { 3471 int64_t disp; 3472 // None will force a 64bit literal to the code stream. Likely a placeholder 3473 // for something that will be patched later and we need to certain it will 3474 // always be reachable. 3475 if (adr.reloc() == relocInfo::none) { 3476 return false; 3477 } 3478 if (adr.reloc() == relocInfo::internal_word_type) { 3479 // This should be rip relative and easily reachable. 3480 return true; 3481 } 3482 if (adr.reloc() == relocInfo::virtual_call_type || 3483 adr.reloc() == relocInfo::opt_virtual_call_type || 3484 adr.reloc() == relocInfo::static_call_type || 3485 adr.reloc() == relocInfo::static_stub_type ) { 3486 // This should be rip relative within the code cache and easily 3487 // reachable until we get huge code caches. (At which point 3488 // ic code is going to have issues). 3489 return true; 3490 } 3491 if (adr.reloc() != relocInfo::external_word_type && 3492 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special 3493 adr.reloc() != relocInfo::poll_type && // relocs to identify them 3494 adr.reloc() != relocInfo::runtime_call_type ) { 3495 return false; 3496 } 3497 3498 // Stress the correction code 3499 if (ForceUnreachable) { 3500 // Must be runtimecall reloc, see if it is in the codecache 3501 // Flipping stuff in the codecache to be unreachable causes issues 3502 // with things like inline caches where the additional instructions 3503 // are not handled. 3504 if (CodeCache::find_blob(adr._target) == NULL) { 3505 return false; 3506 } 3507 } 3508 // For external_word_type/runtime_call_type if it is reachable from where we 3509 // are now (possibly a temp buffer) and where we might end up 3510 // anywhere in the codeCache then we are always reachable. 3511 // This would have to change if we ever save/restore shared code 3512 // to be more pessimistic. 3513 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int)); 3514 if (!is_simm32(disp)) return false; 3515 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int)); 3516 if (!is_simm32(disp)) return false; 3517 3518 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int)); 3519 3520 // Because rip relative is a disp + address_of_next_instruction and we 3521 // don't know the value of address_of_next_instruction we apply a fudge factor 3522 // to make sure we will be ok no matter the size of the instruction we get placed into. 3523 // We don't have to fudge the checks above here because they are already worst case. 3524 3525 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal 3526 // + 4 because better safe than sorry. 3527 const int fudge = 12 + 4; 3528 if (disp < 0) { 3529 disp -= fudge; 3530 } else { 3531 disp += fudge; 3532 } 3533 return is_simm32(disp); 3534 } 3535 3536 // Check if the polling page is not reachable from the code cache using rip-relative 3537 // addressing. 3538 bool Assembler::is_polling_page_far() { 3539 intptr_t addr = (intptr_t)os::get_polling_page(); 3540 return !is_simm32(addr - (intptr_t)CodeCache::low_bound()) || 3541 !is_simm32(addr - (intptr_t)CodeCache::high_bound()); 3542 } 3543 3544 void Assembler::emit_data64(jlong data, 3545 relocInfo::relocType rtype, 3546 int format) { 3547 if (rtype == relocInfo::none) { 3548 emit_long64(data); 3549 } else { 3550 emit_data64(data, Relocation::spec_simple(rtype), format); 3551 } 3552 } 3553 3554 void Assembler::emit_data64(jlong data, 3555 RelocationHolder const& rspec, 3556 int format) { 3557 assert(imm_operand == 0, "default format must be immediate in this file"); 3558 assert(imm_operand == format, "must be immediate"); 3559 assert(inst_mark() != NULL, "must be inside InstructionMark"); 3560 // Do not use AbstractAssembler::relocate, which is not intended for 3561 // embedded words. Instead, relocate to the enclosing instruction. 3562 code_section()->relocate(inst_mark(), rspec, format); 3563 #ifdef ASSERT 3564 check_relocation(rspec, format); 3565 #endif 3566 emit_long64(data); 3567 } 3568 3569 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { 3570 if (reg_enc >= 8) { 3571 prefix(REX_B); 3572 reg_enc -= 8; 3573 } else if (byteinst && reg_enc >= 4) { 3574 prefix(REX); 3575 } 3576 return reg_enc; 3577 } 3578 3579 int Assembler::prefixq_and_encode(int reg_enc) { 3580 if (reg_enc < 8) { 3581 prefix(REX_W); 3582 } else { 3583 prefix(REX_WB); 3584 reg_enc -= 8; 3585 } 3586 return reg_enc; 3587 } 3588 3589 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { 3590 if (dst_enc < 8) { 3591 if (src_enc >= 8) { 3592 prefix(REX_B); 3593 src_enc -= 8; 3594 } else if (byteinst && src_enc >= 4) { 3595 prefix(REX); 3596 } 3597 } else { 3598 if (src_enc < 8) { 3599 prefix(REX_R); 3600 } else { 3601 prefix(REX_RB); 3602 src_enc -= 8; 3603 } 3604 dst_enc -= 8; 3605 } 3606 return dst_enc << 3 | src_enc; 3607 } 3608 3609 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { 3610 if (dst_enc < 8) { 3611 if (src_enc < 8) { 3612 prefix(REX_W); 3613 } else { 3614 prefix(REX_WB); 3615 src_enc -= 8; 3616 } 3617 } else { 3618 if (src_enc < 8) { 3619 prefix(REX_WR); 3620 } else { 3621 prefix(REX_WRB); 3622 src_enc -= 8; 3623 } 3624 dst_enc -= 8; 3625 } 3626 return dst_enc << 3 | src_enc; 3627 } 3628 3629 void Assembler::prefix(Register reg) { 3630 if (reg->encoding() >= 8) { 3631 prefix(REX_B); 3632 } 3633 } 3634 3635 void Assembler::prefix(Address adr) { 3636 if (adr.base_needs_rex()) { 3637 if (adr.index_needs_rex()) { 3638 prefix(REX_XB); 3639 } else { 3640 prefix(REX_B); 3641 } 3642 } else { 3643 if (adr.index_needs_rex()) { 3644 prefix(REX_X); 3645 } 3646 } 3647 } 3648 3649 void Assembler::prefixq(Address adr) { 3650 if (adr.base_needs_rex()) { 3651 if (adr.index_needs_rex()) { 3652 prefix(REX_WXB); 3653 } else { 3654 prefix(REX_WB); 3655 } 3656 } else { 3657 if (adr.index_needs_rex()) { 3658 prefix(REX_WX); 3659 } else { 3660 prefix(REX_W); 3661 } 3662 } 3663 } 3664 3665 3666 void Assembler::prefix(Address adr, Register reg, bool byteinst) { 3667 if (reg->encoding() < 8) { 3668 if (adr.base_needs_rex()) { 3669 if (adr.index_needs_rex()) { 3670 prefix(REX_XB); 3671 } else { 3672 prefix(REX_B); 3673 } 3674 } else { 3675 if (adr.index_needs_rex()) { 3676 prefix(REX_X); 3677 } else if (reg->encoding() >= 4 ) { 3678 prefix(REX); 3679 } 3680 } 3681 } else { 3682 if (adr.base_needs_rex()) { 3683 if (adr.index_needs_rex()) { 3684 prefix(REX_RXB); 3685 } else { 3686 prefix(REX_RB); 3687 } 3688 } else { 3689 if (adr.index_needs_rex()) { 3690 prefix(REX_RX); 3691 } else { 3692 prefix(REX_R); 3693 } 3694 } 3695 } 3696 } 3697 3698 void Assembler::prefixq(Address adr, Register src) { 3699 if (src->encoding() < 8) { 3700 if (adr.base_needs_rex()) { 3701 if (adr.index_needs_rex()) { 3702 prefix(REX_WXB); 3703 } else { 3704 prefix(REX_WB); 3705 } 3706 } else { 3707 if (adr.index_needs_rex()) { 3708 prefix(REX_WX); 3709 } else { 3710 prefix(REX_W); 3711 } 3712 } 3713 } else { 3714 if (adr.base_needs_rex()) { 3715 if (adr.index_needs_rex()) { 3716 prefix(REX_WRXB); 3717 } else { 3718 prefix(REX_WRB); 3719 } 3720 } else { 3721 if (adr.index_needs_rex()) { 3722 prefix(REX_WRX); 3723 } else { 3724 prefix(REX_WR); 3725 } 3726 } 3727 } 3728 } 3729 3730 void Assembler::prefix(Address adr, XMMRegister reg) { 3731 if (reg->encoding() < 8) { 3732 if (adr.base_needs_rex()) { 3733 if (adr.index_needs_rex()) { 3734 prefix(REX_XB); 3735 } else { 3736 prefix(REX_B); 3737 } 3738 } else { 3739 if (adr.index_needs_rex()) { 3740 prefix(REX_X); 3741 } 3742 } 3743 } else { 3744 if (adr.base_needs_rex()) { 3745 if (adr.index_needs_rex()) { 3746 prefix(REX_RXB); 3747 } else { 3748 prefix(REX_RB); 3749 } 3750 } else { 3751 if (adr.index_needs_rex()) { 3752 prefix(REX_RX); 3753 } else { 3754 prefix(REX_R); 3755 } 3756 } 3757 } 3758 } 3759 3760 void Assembler::adcq(Register dst, int32_t imm32) { 3761 (void) prefixq_and_encode(dst->encoding()); 3762 emit_arith(0x81, 0xD0, dst, imm32); 3763 } 3764 3765 void Assembler::adcq(Register dst, Address src) { 3766 InstructionMark im(this); 3767 prefixq(src, dst); 3768 emit_byte(0x13); 3769 emit_operand(dst, src); 3770 } 3771 3772 void Assembler::adcq(Register dst, Register src) { 3773 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 3774 emit_arith(0x13, 0xC0, dst, src); 3775 } 3776 3777 void Assembler::addq(Address dst, int32_t imm32) { 3778 InstructionMark im(this); 3779 prefixq(dst); 3780 emit_arith_operand(0x81, rax, dst,imm32); 3781 } 3782 3783 void Assembler::addq(Address dst, Register src) { 3784 InstructionMark im(this); 3785 prefixq(dst, src); 3786 emit_byte(0x01); 3787 emit_operand(src, dst); 3788 } 3789 3790 void Assembler::addq(Register dst, int32_t imm32) { 3791 (void) prefixq_and_encode(dst->encoding()); 3792 emit_arith(0x81, 0xC0, dst, imm32); 3793 } 3794 3795 void Assembler::addq(Register dst, Address src) { 3796 InstructionMark im(this); 3797 prefixq(src, dst); 3798 emit_byte(0x03); 3799 emit_operand(dst, src); 3800 } 3801 3802 void Assembler::addq(Register dst, Register src) { 3803 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 3804 emit_arith(0x03, 0xC0, dst, src); 3805 } 3806 3807 void Assembler::andq(Register dst, int32_t imm32) { 3808 (void) prefixq_and_encode(dst->encoding()); 3809 emit_arith(0x81, 0xE0, dst, imm32); 3810 } 3811 3812 void Assembler::andq(Register dst, Address src) { 3813 InstructionMark im(this); 3814 prefixq(src, dst); 3815 emit_byte(0x23); 3816 emit_operand(dst, src); 3817 } 3818 3819 void Assembler::andq(Register dst, Register src) { 3820 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 3821 emit_arith(0x23, 0xC0, dst, src); 3822 } 3823 3824 void Assembler::bsfq(Register dst, Register src) { 3825 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3826 emit_byte(0x0F); 3827 emit_byte(0xBC); 3828 emit_byte(0xC0 | encode); 3829 } 3830 3831 void Assembler::bsrq(Register dst, Register src) { 3832 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 3833 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3834 emit_byte(0x0F); 3835 emit_byte(0xBD); 3836 emit_byte(0xC0 | encode); 3837 } 3838 3839 void Assembler::bswapq(Register reg) { 3840 int encode = prefixq_and_encode(reg->encoding()); 3841 emit_byte(0x0F); 3842 emit_byte(0xC8 | encode); 3843 } 3844 3845 void Assembler::cdqq() { 3846 prefix(REX_W); 3847 emit_byte(0x99); 3848 } 3849 3850 void Assembler::clflush(Address adr) { 3851 prefix(adr); 3852 emit_byte(0x0F); 3853 emit_byte(0xAE); 3854 emit_operand(rdi, adr); 3855 } 3856 3857 void Assembler::cmovq(Condition cc, Register dst, Register src) { 3858 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3859 emit_byte(0x0F); 3860 emit_byte(0x40 | cc); 3861 emit_byte(0xC0 | encode); 3862 } 3863 3864 void Assembler::cmovq(Condition cc, Register dst, Address src) { 3865 InstructionMark im(this); 3866 prefixq(src, dst); 3867 emit_byte(0x0F); 3868 emit_byte(0x40 | cc); 3869 emit_operand(dst, src); 3870 } 3871 3872 void Assembler::cmpq(Address dst, int32_t imm32) { 3873 InstructionMark im(this); 3874 prefixq(dst); 3875 emit_byte(0x81); 3876 emit_operand(rdi, dst, 4); 3877 emit_long(imm32); 3878 } 3879 3880 void Assembler::cmpq(Register dst, int32_t imm32) { 3881 (void) prefixq_and_encode(dst->encoding()); 3882 emit_arith(0x81, 0xF8, dst, imm32); 3883 } 3884 3885 void Assembler::cmpq(Address dst, Register src) { 3886 InstructionMark im(this); 3887 prefixq(dst, src); 3888 emit_byte(0x3B); 3889 emit_operand(src, dst); 3890 } 3891 3892 void Assembler::cmpq(Register dst, Register src) { 3893 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 3894 emit_arith(0x3B, 0xC0, dst, src); 3895 } 3896 3897 void Assembler::cmpq(Register dst, Address src) { 3898 InstructionMark im(this); 3899 prefixq(src, dst); 3900 emit_byte(0x3B); 3901 emit_operand(dst, src); 3902 } 3903 3904 void Assembler::cmpxchgq(Register reg, Address adr) { 3905 InstructionMark im(this); 3906 prefixq(adr, reg); 3907 emit_byte(0x0F); 3908 emit_byte(0xB1); 3909 emit_operand(reg, adr); 3910 } 3911 3912 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { 3913 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3914 emit_byte(0xF2); 3915 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3916 emit_byte(0x0F); 3917 emit_byte(0x2A); 3918 emit_byte(0xC0 | encode); 3919 } 3920 3921 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { 3922 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3923 emit_byte(0xF3); 3924 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3925 emit_byte(0x0F); 3926 emit_byte(0x2A); 3927 emit_byte(0xC0 | encode); 3928 } 3929 3930 void Assembler::cvttsd2siq(Register dst, XMMRegister src) { 3931 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3932 emit_byte(0xF2); 3933 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3934 emit_byte(0x0F); 3935 emit_byte(0x2C); 3936 emit_byte(0xC0 | encode); 3937 } 3938 3939 void Assembler::cvttss2siq(Register dst, XMMRegister src) { 3940 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3941 emit_byte(0xF3); 3942 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3943 emit_byte(0x0F); 3944 emit_byte(0x2C); 3945 emit_byte(0xC0 | encode); 3946 } 3947 3948 void Assembler::decl(Register dst) { 3949 // Don't use it directly. Use MacroAssembler::decrementl() instead. 3950 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 3951 int encode = prefix_and_encode(dst->encoding()); 3952 emit_byte(0xFF); 3953 emit_byte(0xC8 | encode); 3954 } 3955 3956 void Assembler::decq(Register dst) { 3957 // Don't use it directly. Use MacroAssembler::decrementq() instead. 3958 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3959 int encode = prefixq_and_encode(dst->encoding()); 3960 emit_byte(0xFF); 3961 emit_byte(0xC8 | encode); 3962 } 3963 3964 void Assembler::decq(Address dst) { 3965 // Don't use it directly. Use MacroAssembler::decrementq() instead. 3966 InstructionMark im(this); 3967 prefixq(dst); 3968 emit_byte(0xFF); 3969 emit_operand(rcx, dst); 3970 } 3971 3972 void Assembler::fxrstor(Address src) { 3973 prefixq(src); 3974 emit_byte(0x0F); 3975 emit_byte(0xAE); 3976 emit_operand(as_Register(1), src); 3977 } 3978 3979 void Assembler::fxsave(Address dst) { 3980 prefixq(dst); 3981 emit_byte(0x0F); 3982 emit_byte(0xAE); 3983 emit_operand(as_Register(0), dst); 3984 } 3985 3986 void Assembler::idivq(Register src) { 3987 int encode = prefixq_and_encode(src->encoding()); 3988 emit_byte(0xF7); 3989 emit_byte(0xF8 | encode); 3990 } 3991 3992 void Assembler::imulq(Register dst, Register src) { 3993 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3994 emit_byte(0x0F); 3995 emit_byte(0xAF); 3996 emit_byte(0xC0 | encode); 3997 } 3998 3999 void Assembler::imulq(Register dst, Register src, int value) { 4000 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4001 if (is8bit(value)) { 4002 emit_byte(0x6B); 4003 emit_byte(0xC0 | encode); 4004 emit_byte(value & 0xFF); 4005 } else { 4006 emit_byte(0x69); 4007 emit_byte(0xC0 | encode); 4008 emit_long(value); 4009 } 4010 } 4011 4012 void Assembler::incl(Register dst) { 4013 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4014 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4015 int encode = prefix_and_encode(dst->encoding()); 4016 emit_byte(0xFF); 4017 emit_byte(0xC0 | encode); 4018 } 4019 4020 void Assembler::incq(Register dst) { 4021 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4022 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4023 int encode = prefixq_and_encode(dst->encoding()); 4024 emit_byte(0xFF); 4025 emit_byte(0xC0 | encode); 4026 } 4027 4028 void Assembler::incq(Address dst) { 4029 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4030 InstructionMark im(this); 4031 prefixq(dst); 4032 emit_byte(0xFF); 4033 emit_operand(rax, dst); 4034 } 4035 4036 void Assembler::lea(Register dst, Address src) { 4037 leaq(dst, src); 4038 } 4039 4040 void Assembler::leaq(Register dst, Address src) { 4041 InstructionMark im(this); 4042 prefixq(src, dst); 4043 emit_byte(0x8D); 4044 emit_operand(dst, src); 4045 } 4046 4047 void Assembler::mov64(Register dst, int64_t imm64) { 4048 InstructionMark im(this); 4049 int encode = prefixq_and_encode(dst->encoding()); 4050 emit_byte(0xB8 | encode); 4051 emit_long64(imm64); 4052 } 4053 4054 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { 4055 InstructionMark im(this); 4056 int encode = prefixq_and_encode(dst->encoding()); 4057 emit_byte(0xB8 | encode); 4058 emit_data64(imm64, rspec); 4059 } 4060 4061 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4062 InstructionMark im(this); 4063 int encode = prefix_and_encode(dst->encoding()); 4064 emit_byte(0xB8 | encode); 4065 emit_data((int)imm32, rspec, narrow_oop_operand); 4066 } 4067 4068 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4069 InstructionMark im(this); 4070 prefix(dst); 4071 emit_byte(0xC7); 4072 emit_operand(rax, dst, 4); 4073 emit_data((int)imm32, rspec, narrow_oop_operand); 4074 } 4075 4076 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) { 4077 InstructionMark im(this); 4078 int encode = prefix_and_encode(src1->encoding()); 4079 emit_byte(0x81); 4080 emit_byte(0xF8 | encode); 4081 emit_data((int)imm32, rspec, narrow_oop_operand); 4082 } 4083 4084 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) { 4085 InstructionMark im(this); 4086 prefix(src1); 4087 emit_byte(0x81); 4088 emit_operand(rax, src1, 4); 4089 emit_data((int)imm32, rspec, narrow_oop_operand); 4090 } 4091 4092 void Assembler::lzcntq(Register dst, Register src) { 4093 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 4094 emit_byte(0xF3); 4095 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4096 emit_byte(0x0F); 4097 emit_byte(0xBD); 4098 emit_byte(0xC0 | encode); 4099 } 4100 4101 void Assembler::movdq(XMMRegister dst, Register src) { 4102 // table D-1 says MMX/SSE2 4103 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); 4104 emit_byte(0x66); 4105 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4106 emit_byte(0x0F); 4107 emit_byte(0x6E); 4108 emit_byte(0xC0 | encode); 4109 } 4110 4111 void Assembler::movdq(Register dst, XMMRegister src) { 4112 // table D-1 says MMX/SSE2 4113 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); 4114 emit_byte(0x66); 4115 // swap src/dst to get correct prefix 4116 int encode = prefixq_and_encode(src->encoding(), dst->encoding()); 4117 emit_byte(0x0F); 4118 emit_byte(0x7E); 4119 emit_byte(0xC0 | encode); 4120 } 4121 4122 void Assembler::movq(Register dst, Register src) { 4123 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4124 emit_byte(0x8B); 4125 emit_byte(0xC0 | encode); 4126 } 4127 4128 void Assembler::movq(Register dst, Address src) { 4129 InstructionMark im(this); 4130 prefixq(src, dst); 4131 emit_byte(0x8B); 4132 emit_operand(dst, src); 4133 } 4134 4135 void Assembler::movq(Address dst, Register src) { 4136 InstructionMark im(this); 4137 prefixq(dst, src); 4138 emit_byte(0x89); 4139 emit_operand(src, dst); 4140 } 4141 4142 void Assembler::movsbq(Register dst, Address src) { 4143 InstructionMark im(this); 4144 prefixq(src, dst); 4145 emit_byte(0x0F); 4146 emit_byte(0xBE); 4147 emit_operand(dst, src); 4148 } 4149 4150 void Assembler::movsbq(Register dst, Register src) { 4151 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4152 emit_byte(0x0F); 4153 emit_byte(0xBE); 4154 emit_byte(0xC0 | encode); 4155 } 4156 4157 void Assembler::movslq(Register dst, int32_t imm32) { 4158 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx) 4159 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx) 4160 // as a result we shouldn't use until tested at runtime... 4161 ShouldNotReachHere(); 4162 InstructionMark im(this); 4163 int encode = prefixq_and_encode(dst->encoding()); 4164 emit_byte(0xC7 | encode); 4165 emit_long(imm32); 4166 } 4167 4168 void Assembler::movslq(Address dst, int32_t imm32) { 4169 assert(is_simm32(imm32), "lost bits"); 4170 InstructionMark im(this); 4171 prefixq(dst); 4172 emit_byte(0xC7); 4173 emit_operand(rax, dst, 4); 4174 emit_long(imm32); 4175 } 4176 4177 void Assembler::movslq(Register dst, Address src) { 4178 InstructionMark im(this); 4179 prefixq(src, dst); 4180 emit_byte(0x63); 4181 emit_operand(dst, src); 4182 } 4183 4184 void Assembler::movslq(Register dst, Register src) { 4185 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4186 emit_byte(0x63); 4187 emit_byte(0xC0 | encode); 4188 } 4189 4190 void Assembler::movswq(Register dst, Address src) { 4191 InstructionMark im(this); 4192 prefixq(src, dst); 4193 emit_byte(0x0F); 4194 emit_byte(0xBF); 4195 emit_operand(dst, src); 4196 } 4197 4198 void Assembler::movswq(Register dst, Register src) { 4199 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4200 emit_byte(0x0F); 4201 emit_byte(0xBF); 4202 emit_byte(0xC0 | encode); 4203 } 4204 4205 void Assembler::movzbq(Register dst, Address src) { 4206 InstructionMark im(this); 4207 prefixq(src, dst); 4208 emit_byte(0x0F); 4209 emit_byte(0xB6); 4210 emit_operand(dst, src); 4211 } 4212 4213 void Assembler::movzbq(Register dst, Register src) { 4214 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4215 emit_byte(0x0F); 4216 emit_byte(0xB6); 4217 emit_byte(0xC0 | encode); 4218 } 4219 4220 void Assembler::movzwq(Register dst, Address src) { 4221 InstructionMark im(this); 4222 prefixq(src, dst); 4223 emit_byte(0x0F); 4224 emit_byte(0xB7); 4225 emit_operand(dst, src); 4226 } 4227 4228 void Assembler::movzwq(Register dst, Register src) { 4229 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4230 emit_byte(0x0F); 4231 emit_byte(0xB7); 4232 emit_byte(0xC0 | encode); 4233 } 4234 4235 void Assembler::negq(Register dst) { 4236 int encode = prefixq_and_encode(dst->encoding()); 4237 emit_byte(0xF7); 4238 emit_byte(0xD8 | encode); 4239 } 4240 4241 void Assembler::notq(Register dst) { 4242 int encode = prefixq_and_encode(dst->encoding()); 4243 emit_byte(0xF7); 4244 emit_byte(0xD0 | encode); 4245 } 4246 4247 void Assembler::orq(Address dst, int32_t imm32) { 4248 InstructionMark im(this); 4249 prefixq(dst); 4250 emit_byte(0x81); 4251 emit_operand(rcx, dst, 4); 4252 emit_long(imm32); 4253 } 4254 4255 void Assembler::orq(Register dst, int32_t imm32) { 4256 (void) prefixq_and_encode(dst->encoding()); 4257 emit_arith(0x81, 0xC8, dst, imm32); 4258 } 4259 4260 void Assembler::orq(Register dst, Address src) { 4261 InstructionMark im(this); 4262 prefixq(src, dst); 4263 emit_byte(0x0B); 4264 emit_operand(dst, src); 4265 } 4266 4267 void Assembler::orq(Register dst, Register src) { 4268 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4269 emit_arith(0x0B, 0xC0, dst, src); 4270 } 4271 4272 void Assembler::popa() { // 64bit 4273 movq(r15, Address(rsp, 0)); 4274 movq(r14, Address(rsp, wordSize)); 4275 movq(r13, Address(rsp, 2 * wordSize)); 4276 movq(r12, Address(rsp, 3 * wordSize)); 4277 movq(r11, Address(rsp, 4 * wordSize)); 4278 movq(r10, Address(rsp, 5 * wordSize)); 4279 movq(r9, Address(rsp, 6 * wordSize)); 4280 movq(r8, Address(rsp, 7 * wordSize)); 4281 movq(rdi, Address(rsp, 8 * wordSize)); 4282 movq(rsi, Address(rsp, 9 * wordSize)); 4283 movq(rbp, Address(rsp, 10 * wordSize)); 4284 // skip rsp 4285 movq(rbx, Address(rsp, 12 * wordSize)); 4286 movq(rdx, Address(rsp, 13 * wordSize)); 4287 movq(rcx, Address(rsp, 14 * wordSize)); 4288 movq(rax, Address(rsp, 15 * wordSize)); 4289 4290 addq(rsp, 16 * wordSize); 4291 } 4292 4293 void Assembler::popcntq(Register dst, Address src) { 4294 assert(VM_Version::supports_popcnt(), "must support"); 4295 InstructionMark im(this); 4296 emit_byte(0xF3); 4297 prefixq(src, dst); 4298 emit_byte(0x0F); 4299 emit_byte(0xB8); 4300 emit_operand(dst, src); 4301 } 4302 4303 void Assembler::popcntq(Register dst, Register src) { 4304 assert(VM_Version::supports_popcnt(), "must support"); 4305 emit_byte(0xF3); 4306 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4307 emit_byte(0x0F); 4308 emit_byte(0xB8); 4309 emit_byte(0xC0 | encode); 4310 } 4311 4312 void Assembler::popq(Address dst) { 4313 InstructionMark im(this); 4314 prefixq(dst); 4315 emit_byte(0x8F); 4316 emit_operand(rax, dst); 4317 } 4318 4319 void Assembler::pusha() { // 64bit 4320 // we have to store original rsp. ABI says that 128 bytes 4321 // below rsp are local scratch. 4322 movq(Address(rsp, -5 * wordSize), rsp); 4323 4324 subq(rsp, 16 * wordSize); 4325 4326 movq(Address(rsp, 15 * wordSize), rax); 4327 movq(Address(rsp, 14 * wordSize), rcx); 4328 movq(Address(rsp, 13 * wordSize), rdx); 4329 movq(Address(rsp, 12 * wordSize), rbx); 4330 // skip rsp 4331 movq(Address(rsp, 10 * wordSize), rbp); 4332 movq(Address(rsp, 9 * wordSize), rsi); 4333 movq(Address(rsp, 8 * wordSize), rdi); 4334 movq(Address(rsp, 7 * wordSize), r8); 4335 movq(Address(rsp, 6 * wordSize), r9); 4336 movq(Address(rsp, 5 * wordSize), r10); 4337 movq(Address(rsp, 4 * wordSize), r11); 4338 movq(Address(rsp, 3 * wordSize), r12); 4339 movq(Address(rsp, 2 * wordSize), r13); 4340 movq(Address(rsp, wordSize), r14); 4341 movq(Address(rsp, 0), r15); 4342 } 4343 4344 void Assembler::pushq(Address src) { 4345 InstructionMark im(this); 4346 prefixq(src); 4347 emit_byte(0xFF); 4348 emit_operand(rsi, src); 4349 } 4350 4351 void Assembler::rclq(Register dst, int imm8) { 4352 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4353 int encode = prefixq_and_encode(dst->encoding()); 4354 if (imm8 == 1) { 4355 emit_byte(0xD1); 4356 emit_byte(0xD0 | encode); 4357 } else { 4358 emit_byte(0xC1); 4359 emit_byte(0xD0 | encode); 4360 emit_byte(imm8); 4361 } 4362 } 4363 void Assembler::sarq(Register dst, int imm8) { 4364 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4365 int encode = prefixq_and_encode(dst->encoding()); 4366 if (imm8 == 1) { 4367 emit_byte(0xD1); 4368 emit_byte(0xF8 | encode); 4369 } else { 4370 emit_byte(0xC1); 4371 emit_byte(0xF8 | encode); 4372 emit_byte(imm8); 4373 } 4374 } 4375 4376 void Assembler::sarq(Register dst) { 4377 int encode = prefixq_and_encode(dst->encoding()); 4378 emit_byte(0xD3); 4379 emit_byte(0xF8 | encode); 4380 } 4381 4382 void Assembler::sbbq(Address dst, int32_t imm32) { 4383 InstructionMark im(this); 4384 prefixq(dst); 4385 emit_arith_operand(0x81, rbx, dst, imm32); 4386 } 4387 4388 void Assembler::sbbq(Register dst, int32_t imm32) { 4389 (void) prefixq_and_encode(dst->encoding()); 4390 emit_arith(0x81, 0xD8, dst, imm32); 4391 } 4392 4393 void Assembler::sbbq(Register dst, Address src) { 4394 InstructionMark im(this); 4395 prefixq(src, dst); 4396 emit_byte(0x1B); 4397 emit_operand(dst, src); 4398 } 4399 4400 void Assembler::sbbq(Register dst, Register src) { 4401 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4402 emit_arith(0x1B, 0xC0, dst, src); 4403 } 4404 4405 void Assembler::shlq(Register dst, int imm8) { 4406 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4407 int encode = prefixq_and_encode(dst->encoding()); 4408 if (imm8 == 1) { 4409 emit_byte(0xD1); 4410 emit_byte(0xE0 | encode); 4411 } else { 4412 emit_byte(0xC1); 4413 emit_byte(0xE0 | encode); 4414 emit_byte(imm8); 4415 } 4416 } 4417 4418 void Assembler::shlq(Register dst) { 4419 int encode = prefixq_and_encode(dst->encoding()); 4420 emit_byte(0xD3); 4421 emit_byte(0xE0 | encode); 4422 } 4423 4424 void Assembler::shrq(Register dst, int imm8) { 4425 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4426 int encode = prefixq_and_encode(dst->encoding()); 4427 emit_byte(0xC1); 4428 emit_byte(0xE8 | encode); 4429 emit_byte(imm8); 4430 } 4431 4432 void Assembler::shrq(Register dst) { 4433 int encode = prefixq_and_encode(dst->encoding()); 4434 emit_byte(0xD3); 4435 emit_byte(0xE8 | encode); 4436 } 4437 4438 void Assembler::subq(Address dst, int32_t imm32) { 4439 InstructionMark im(this); 4440 prefixq(dst); 4441 emit_arith_operand(0x81, rbp, dst, imm32); 4442 } 4443 4444 void Assembler::subq(Address dst, Register src) { 4445 InstructionMark im(this); 4446 prefixq(dst, src); 4447 emit_byte(0x29); 4448 emit_operand(src, dst); 4449 } 4450 4451 void Assembler::subq(Register dst, int32_t imm32) { 4452 (void) prefixq_and_encode(dst->encoding()); 4453 emit_arith(0x81, 0xE8, dst, imm32); 4454 } 4455 4456 void Assembler::subq(Register dst, Address src) { 4457 InstructionMark im(this); 4458 prefixq(src, dst); 4459 emit_byte(0x2B); 4460 emit_operand(dst, src); 4461 } 4462 4463 void Assembler::subq(Register dst, Register src) { 4464 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4465 emit_arith(0x2B, 0xC0, dst, src); 4466 } 4467 4468 void Assembler::testq(Register dst, int32_t imm32) { 4469 // not using emit_arith because test 4470 // doesn't support sign-extension of 4471 // 8bit operands 4472 int encode = dst->encoding(); 4473 if (encode == 0) { 4474 prefix(REX_W); 4475 emit_byte(0xA9); 4476 } else { 4477 encode = prefixq_and_encode(encode); 4478 emit_byte(0xF7); 4479 emit_byte(0xC0 | encode); 4480 } 4481 emit_long(imm32); 4482 } 4483 4484 void Assembler::testq(Register dst, Register src) { 4485 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4486 emit_arith(0x85, 0xC0, dst, src); 4487 } 4488 4489 void Assembler::xaddq(Address dst, Register src) { 4490 InstructionMark im(this); 4491 prefixq(dst, src); 4492 emit_byte(0x0F); 4493 emit_byte(0xC1); 4494 emit_operand(src, dst); 4495 } 4496 4497 void Assembler::xchgq(Register dst, Address src) { 4498 InstructionMark im(this); 4499 prefixq(src, dst); 4500 emit_byte(0x87); 4501 emit_operand(dst, src); 4502 } 4503 4504 void Assembler::xchgq(Register dst, Register src) { 4505 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4506 emit_byte(0x87); 4507 emit_byte(0xc0 | encode); 4508 } 4509 4510 void Assembler::xorq(Register dst, Register src) { 4511 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4512 emit_arith(0x33, 0xC0, dst, src); 4513 } 4514 4515 void Assembler::xorq(Register dst, Address src) { 4516 InstructionMark im(this); 4517 prefixq(src, dst); 4518 emit_byte(0x33); 4519 emit_operand(dst, src); 4520 } 4521 4522 #endif // !LP64 4523 4524 static Assembler::Condition reverse[] = { 4525 Assembler::noOverflow /* overflow = 0x0 */ , 4526 Assembler::overflow /* noOverflow = 0x1 */ , 4527 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 4528 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 4529 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 4530 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 4531 Assembler::above /* belowEqual = 0x6 */ , 4532 Assembler::belowEqual /* above = 0x7 */ , 4533 Assembler::positive /* negative = 0x8 */ , 4534 Assembler::negative /* positive = 0x9 */ , 4535 Assembler::noParity /* parity = 0xa */ , 4536 Assembler::parity /* noParity = 0xb */ , 4537 Assembler::greaterEqual /* less = 0xc */ , 4538 Assembler::less /* greaterEqual = 0xd */ , 4539 Assembler::greater /* lessEqual = 0xe */ , 4540 Assembler::lessEqual /* greater = 0xf, */ 4541 4542 }; 4543 4544 4545 // Implementation of MacroAssembler 4546 4547 // First all the versions that have distinct versions depending on 32/64 bit 4548 // Unless the difference is trivial (1 line or so). 4549 4550 #ifndef _LP64 4551 4552 // 32bit versions 4553 4554 Address MacroAssembler::as_Address(AddressLiteral adr) { 4555 return Address(adr.target(), adr.rspec()); 4556 } 4557 4558 Address MacroAssembler::as_Address(ArrayAddress adr) { 4559 return Address::make_array(adr); 4560 } 4561 4562 int MacroAssembler::biased_locking_enter(Register lock_reg, 4563 Register obj_reg, 4564 Register swap_reg, 4565 Register tmp_reg, 4566 bool swap_reg_contains_mark, 4567 Label& done, 4568 Label* slow_case, 4569 BiasedLockingCounters* counters) { 4570 assert(UseBiasedLocking, "why call this otherwise?"); 4571 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 4572 assert_different_registers(lock_reg, obj_reg, swap_reg); 4573 4574 if (PrintBiasedLockingStatistics && counters == NULL) 4575 counters = BiasedLocking::counters(); 4576 4577 bool need_tmp_reg = false; 4578 if (tmp_reg == noreg) { 4579 need_tmp_reg = true; 4580 tmp_reg = lock_reg; 4581 } else { 4582 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 4583 } 4584 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 4585 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 4586 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 4587 Address saved_mark_addr(lock_reg, 0); 4588 4589 // Biased locking 4590 // See whether the lock is currently biased toward our thread and 4591 // whether the epoch is still valid 4592 // Note that the runtime guarantees sufficient alignment of JavaThread 4593 // pointers to allow age to be placed into low bits 4594 // First check to see whether biasing is even enabled for this object 4595 Label cas_label; 4596 int null_check_offset = -1; 4597 if (!swap_reg_contains_mark) { 4598 null_check_offset = offset(); 4599 movl(swap_reg, mark_addr); 4600 } 4601 if (need_tmp_reg) { 4602 push(tmp_reg); 4603 } 4604 movl(tmp_reg, swap_reg); 4605 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 4606 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 4607 if (need_tmp_reg) { 4608 pop(tmp_reg); 4609 } 4610 jcc(Assembler::notEqual, cas_label); 4611 // The bias pattern is present in the object's header. Need to check 4612 // whether the bias owner and the epoch are both still current. 4613 // Note that because there is no current thread register on x86 we 4614 // need to store off the mark word we read out of the object to 4615 // avoid reloading it and needing to recheck invariants below. This 4616 // store is unfortunate but it makes the overall code shorter and 4617 // simpler. 4618 movl(saved_mark_addr, swap_reg); 4619 if (need_tmp_reg) { 4620 push(tmp_reg); 4621 } 4622 get_thread(tmp_reg); 4623 xorl(swap_reg, tmp_reg); 4624 if (swap_reg_contains_mark) { 4625 null_check_offset = offset(); 4626 } 4627 movl(tmp_reg, klass_addr); 4628 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4629 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 4630 if (need_tmp_reg) { 4631 pop(tmp_reg); 4632 } 4633 if (counters != NULL) { 4634 cond_inc32(Assembler::zero, 4635 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 4636 } 4637 jcc(Assembler::equal, done); 4638 4639 Label try_revoke_bias; 4640 Label try_rebias; 4641 4642 // At this point we know that the header has the bias pattern and 4643 // that we are not the bias owner in the current epoch. We need to 4644 // figure out more details about the state of the header in order to 4645 // know what operations can be legally performed on the object's 4646 // header. 4647 4648 // If the low three bits in the xor result aren't clear, that means 4649 // the prototype header is no longer biased and we have to revoke 4650 // the bias on this object. 4651 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 4652 jcc(Assembler::notZero, try_revoke_bias); 4653 4654 // Biasing is still enabled for this data type. See whether the 4655 // epoch of the current bias is still valid, meaning that the epoch 4656 // bits of the mark word are equal to the epoch bits of the 4657 // prototype header. (Note that the prototype header's epoch bits 4658 // only change at a safepoint.) If not, attempt to rebias the object 4659 // toward the current thread. Note that we must be absolutely sure 4660 // that the current epoch is invalid in order to do this because 4661 // otherwise the manipulations it performs on the mark word are 4662 // illegal. 4663 testl(swap_reg, markOopDesc::epoch_mask_in_place); 4664 jcc(Assembler::notZero, try_rebias); 4665 4666 // The epoch of the current bias is still valid but we know nothing 4667 // about the owner; it might be set or it might be clear. Try to 4668 // acquire the bias of the object using an atomic operation. If this 4669 // fails we will go in to the runtime to revoke the object's bias. 4670 // Note that we first construct the presumed unbiased header so we 4671 // don't accidentally blow away another thread's valid bias. 4672 movl(swap_reg, saved_mark_addr); 4673 andl(swap_reg, 4674 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 4675 if (need_tmp_reg) { 4676 push(tmp_reg); 4677 } 4678 get_thread(tmp_reg); 4679 orl(tmp_reg, swap_reg); 4680 if (os::is_MP()) { 4681 lock(); 4682 } 4683 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4684 if (need_tmp_reg) { 4685 pop(tmp_reg); 4686 } 4687 // If the biasing toward our thread failed, this means that 4688 // another thread succeeded in biasing it toward itself and we 4689 // need to revoke that bias. The revocation will occur in the 4690 // interpreter runtime in the slow case. 4691 if (counters != NULL) { 4692 cond_inc32(Assembler::zero, 4693 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 4694 } 4695 if (slow_case != NULL) { 4696 jcc(Assembler::notZero, *slow_case); 4697 } 4698 jmp(done); 4699 4700 bind(try_rebias); 4701 // At this point we know the epoch has expired, meaning that the 4702 // current "bias owner", if any, is actually invalid. Under these 4703 // circumstances _only_, we are allowed to use the current header's 4704 // value as the comparison value when doing the cas to acquire the 4705 // bias in the current epoch. In other words, we allow transfer of 4706 // the bias from one thread to another directly in this situation. 4707 // 4708 // FIXME: due to a lack of registers we currently blow away the age 4709 // bits in this situation. Should attempt to preserve them. 4710 if (need_tmp_reg) { 4711 push(tmp_reg); 4712 } 4713 get_thread(tmp_reg); 4714 movl(swap_reg, klass_addr); 4715 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4716 movl(swap_reg, saved_mark_addr); 4717 if (os::is_MP()) { 4718 lock(); 4719 } 4720 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4721 if (need_tmp_reg) { 4722 pop(tmp_reg); 4723 } 4724 // If the biasing toward our thread failed, then another thread 4725 // succeeded in biasing it toward itself and we need to revoke that 4726 // bias. The revocation will occur in the runtime in the slow case. 4727 if (counters != NULL) { 4728 cond_inc32(Assembler::zero, 4729 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 4730 } 4731 if (slow_case != NULL) { 4732 jcc(Assembler::notZero, *slow_case); 4733 } 4734 jmp(done); 4735 4736 bind(try_revoke_bias); 4737 // The prototype mark in the klass doesn't have the bias bit set any 4738 // more, indicating that objects of this data type are not supposed 4739 // to be biased any more. We are going to try to reset the mark of 4740 // this object to the prototype value and fall through to the 4741 // CAS-based locking scheme. Note that if our CAS fails, it means 4742 // that another thread raced us for the privilege of revoking the 4743 // bias of this particular object, so it's okay to continue in the 4744 // normal locking code. 4745 // 4746 // FIXME: due to a lack of registers we currently blow away the age 4747 // bits in this situation. Should attempt to preserve them. 4748 movl(swap_reg, saved_mark_addr); 4749 if (need_tmp_reg) { 4750 push(tmp_reg); 4751 } 4752 movl(tmp_reg, klass_addr); 4753 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4754 if (os::is_MP()) { 4755 lock(); 4756 } 4757 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4758 if (need_tmp_reg) { 4759 pop(tmp_reg); 4760 } 4761 // Fall through to the normal CAS-based lock, because no matter what 4762 // the result of the above CAS, some thread must have succeeded in 4763 // removing the bias bit from the object's header. 4764 if (counters != NULL) { 4765 cond_inc32(Assembler::zero, 4766 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 4767 } 4768 4769 bind(cas_label); 4770 4771 return null_check_offset; 4772 } 4773 void MacroAssembler::call_VM_leaf_base(address entry_point, 4774 int number_of_arguments) { 4775 call(RuntimeAddress(entry_point)); 4776 increment(rsp, number_of_arguments * wordSize); 4777 } 4778 4779 void MacroAssembler::cmpoop(Address src1, jobject obj) { 4780 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4781 } 4782 4783 void MacroAssembler::cmpoop(Register src1, jobject obj) { 4784 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4785 } 4786 4787 void MacroAssembler::extend_sign(Register hi, Register lo) { 4788 // According to Intel Doc. AP-526, "Integer Divide", p.18. 4789 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 4790 cdql(); 4791 } else { 4792 movl(hi, lo); 4793 sarl(hi, 31); 4794 } 4795 } 4796 4797 void MacroAssembler::fat_nop() { 4798 // A 5 byte nop that is safe for patching (see patch_verified_entry) 4799 emit_byte(0x26); // es: 4800 emit_byte(0x2e); // cs: 4801 emit_byte(0x64); // fs: 4802 emit_byte(0x65); // gs: 4803 emit_byte(0x90); 4804 } 4805 4806 void MacroAssembler::jC2(Register tmp, Label& L) { 4807 // set parity bit if FPU flag C2 is set (via rax) 4808 save_rax(tmp); 4809 fwait(); fnstsw_ax(); 4810 sahf(); 4811 restore_rax(tmp); 4812 // branch 4813 jcc(Assembler::parity, L); 4814 } 4815 4816 void MacroAssembler::jnC2(Register tmp, Label& L) { 4817 // set parity bit if FPU flag C2 is set (via rax) 4818 save_rax(tmp); 4819 fwait(); fnstsw_ax(); 4820 sahf(); 4821 restore_rax(tmp); 4822 // branch 4823 jcc(Assembler::noParity, L); 4824 } 4825 4826 // 32bit can do a case table jump in one instruction but we no longer allow the base 4827 // to be installed in the Address class 4828 void MacroAssembler::jump(ArrayAddress entry) { 4829 jmp(as_Address(entry)); 4830 } 4831 4832 // Note: y_lo will be destroyed 4833 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 4834 // Long compare for Java (semantics as described in JVM spec.) 4835 Label high, low, done; 4836 4837 cmpl(x_hi, y_hi); 4838 jcc(Assembler::less, low); 4839 jcc(Assembler::greater, high); 4840 // x_hi is the return register 4841 xorl(x_hi, x_hi); 4842 cmpl(x_lo, y_lo); 4843 jcc(Assembler::below, low); 4844 jcc(Assembler::equal, done); 4845 4846 bind(high); 4847 xorl(x_hi, x_hi); 4848 increment(x_hi); 4849 jmp(done); 4850 4851 bind(low); 4852 xorl(x_hi, x_hi); 4853 decrementl(x_hi); 4854 4855 bind(done); 4856 } 4857 4858 void MacroAssembler::lea(Register dst, AddressLiteral src) { 4859 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 4860 } 4861 4862 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 4863 // leal(dst, as_Address(adr)); 4864 // see note in movl as to why we must use a move 4865 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 4866 } 4867 4868 void MacroAssembler::leave() { 4869 mov(rsp, rbp); 4870 pop(rbp); 4871 } 4872 4873 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 4874 // Multiplication of two Java long values stored on the stack 4875 // as illustrated below. Result is in rdx:rax. 4876 // 4877 // rsp ---> [ ?? ] \ \ 4878 // .... | y_rsp_offset | 4879 // [ y_lo ] / (in bytes) | x_rsp_offset 4880 // [ y_hi ] | (in bytes) 4881 // .... | 4882 // [ x_lo ] / 4883 // [ x_hi ] 4884 // .... 4885 // 4886 // Basic idea: lo(result) = lo(x_lo * y_lo) 4887 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 4888 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 4889 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 4890 Label quick; 4891 // load x_hi, y_hi and check if quick 4892 // multiplication is possible 4893 movl(rbx, x_hi); 4894 movl(rcx, y_hi); 4895 movl(rax, rbx); 4896 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 4897 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 4898 // do full multiplication 4899 // 1st step 4900 mull(y_lo); // x_hi * y_lo 4901 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 4902 // 2nd step 4903 movl(rax, x_lo); 4904 mull(rcx); // x_lo * y_hi 4905 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 4906 // 3rd step 4907 bind(quick); // note: rbx, = 0 if quick multiply! 4908 movl(rax, x_lo); 4909 mull(y_lo); // x_lo * y_lo 4910 addl(rdx, rbx); // correct hi(x_lo * y_lo) 4911 } 4912 4913 void MacroAssembler::lneg(Register hi, Register lo) { 4914 negl(lo); 4915 adcl(hi, 0); 4916 negl(hi); 4917 } 4918 4919 void MacroAssembler::lshl(Register hi, Register lo) { 4920 // Java shift left long support (semantics as described in JVM spec., p.305) 4921 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 4922 // shift value is in rcx ! 4923 assert(hi != rcx, "must not use rcx"); 4924 assert(lo != rcx, "must not use rcx"); 4925 const Register s = rcx; // shift count 4926 const int n = BitsPerWord; 4927 Label L; 4928 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 4929 cmpl(s, n); // if (s < n) 4930 jcc(Assembler::less, L); // else (s >= n) 4931 movl(hi, lo); // x := x << n 4932 xorl(lo, lo); 4933 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 4934 bind(L); // s (mod n) < n 4935 shldl(hi, lo); // x := x << s 4936 shll(lo); 4937 } 4938 4939 4940 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 4941 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 4942 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 4943 assert(hi != rcx, "must not use rcx"); 4944 assert(lo != rcx, "must not use rcx"); 4945 const Register s = rcx; // shift count 4946 const int n = BitsPerWord; 4947 Label L; 4948 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 4949 cmpl(s, n); // if (s < n) 4950 jcc(Assembler::less, L); // else (s >= n) 4951 movl(lo, hi); // x := x >> n 4952 if (sign_extension) sarl(hi, 31); 4953 else xorl(hi, hi); 4954 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 4955 bind(L); // s (mod n) < n 4956 shrdl(lo, hi); // x := x >> s 4957 if (sign_extension) sarl(hi); 4958 else shrl(hi); 4959 } 4960 4961 void MacroAssembler::movoop(Register dst, jobject obj) { 4962 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4963 } 4964 4965 void MacroAssembler::movoop(Address dst, jobject obj) { 4966 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4967 } 4968 4969 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 4970 if (src.is_lval()) { 4971 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 4972 } else { 4973 movl(dst, as_Address(src)); 4974 } 4975 } 4976 4977 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 4978 movl(as_Address(dst), src); 4979 } 4980 4981 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 4982 movl(dst, as_Address(src)); 4983 } 4984 4985 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 4986 void MacroAssembler::movptr(Address dst, intptr_t src) { 4987 movl(dst, src); 4988 } 4989 4990 4991 void MacroAssembler::pop_callee_saved_registers() { 4992 pop(rcx); 4993 pop(rdx); 4994 pop(rdi); 4995 pop(rsi); 4996 } 4997 4998 void MacroAssembler::pop_fTOS() { 4999 fld_d(Address(rsp, 0)); 5000 addl(rsp, 2 * wordSize); 5001 } 5002 5003 void MacroAssembler::push_callee_saved_registers() { 5004 push(rsi); 5005 push(rdi); 5006 push(rdx); 5007 push(rcx); 5008 } 5009 5010 void MacroAssembler::push_fTOS() { 5011 subl(rsp, 2 * wordSize); 5012 fstp_d(Address(rsp, 0)); 5013 } 5014 5015 5016 void MacroAssembler::pushoop(jobject obj) { 5017 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 5018 } 5019 5020 5021 void MacroAssembler::pushptr(AddressLiteral src) { 5022 if (src.is_lval()) { 5023 push_literal32((int32_t)src.target(), src.rspec()); 5024 } else { 5025 pushl(as_Address(src)); 5026 } 5027 } 5028 5029 void MacroAssembler::set_word_if_not_zero(Register dst) { 5030 xorl(dst, dst); 5031 set_byte_if_not_zero(dst); 5032 } 5033 5034 static void pass_arg0(MacroAssembler* masm, Register arg) { 5035 masm->push(arg); 5036 } 5037 5038 static void pass_arg1(MacroAssembler* masm, Register arg) { 5039 masm->push(arg); 5040 } 5041 5042 static void pass_arg2(MacroAssembler* masm, Register arg) { 5043 masm->push(arg); 5044 } 5045 5046 static void pass_arg3(MacroAssembler* masm, Register arg) { 5047 masm->push(arg); 5048 } 5049 5050 #ifndef PRODUCT 5051 extern "C" void findpc(intptr_t x); 5052 #endif 5053 5054 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 5055 // In order to get locks to work, we need to fake a in_VM state 5056 JavaThread* thread = JavaThread::current(); 5057 JavaThreadState saved_state = thread->thread_state(); 5058 thread->set_thread_state(_thread_in_vm); 5059 if (ShowMessageBoxOnError) { 5060 JavaThread* thread = JavaThread::current(); 5061 JavaThreadState saved_state = thread->thread_state(); 5062 thread->set_thread_state(_thread_in_vm); 5063 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5064 ttyLocker ttyl; 5065 BytecodeCounter::print(); 5066 } 5067 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5068 // This is the value of eip which points to where verify_oop will return. 5069 if (os::message_box(msg, "Execution stopped, print registers?")) { 5070 ttyLocker ttyl; 5071 tty->print_cr("eip = 0x%08x", eip); 5072 #ifndef PRODUCT 5073 if ((WizardMode || Verbose) && PrintMiscellaneous) { 5074 tty->cr(); 5075 findpc(eip); 5076 tty->cr(); 5077 } 5078 #endif 5079 tty->print_cr("rax = 0x%08x", rax); 5080 tty->print_cr("rbx = 0x%08x", rbx); 5081 tty->print_cr("rcx = 0x%08x", rcx); 5082 tty->print_cr("rdx = 0x%08x", rdx); 5083 tty->print_cr("rdi = 0x%08x", rdi); 5084 tty->print_cr("rsi = 0x%08x", rsi); 5085 tty->print_cr("rbp = 0x%08x", rbp); 5086 tty->print_cr("rsp = 0x%08x", rsp); 5087 BREAKPOINT; 5088 assert(false, "start up GDB"); 5089 } 5090 } else { 5091 ttyLocker ttyl; 5092 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 5093 assert(false, "DEBUG MESSAGE"); 5094 } 5095 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5096 } 5097 5098 void MacroAssembler::stop(const char* msg) { 5099 ExternalAddress message((address)msg); 5100 // push address of message 5101 pushptr(message.addr()); 5102 { Label L; call(L, relocInfo::none); bind(L); } // push eip 5103 pusha(); // push registers 5104 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 5105 hlt(); 5106 } 5107 5108 void MacroAssembler::warn(const char* msg) { 5109 push_CPU_state(); 5110 5111 ExternalAddress message((address) msg); 5112 // push address of message 5113 pushptr(message.addr()); 5114 5115 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 5116 addl(rsp, wordSize); // discard argument 5117 pop_CPU_state(); 5118 } 5119 5120 #else // _LP64 5121 5122 // 64 bit versions 5123 5124 Address MacroAssembler::as_Address(AddressLiteral adr) { 5125 // amd64 always does this as a pc-rel 5126 // we can be absolute or disp based on the instruction type 5127 // jmp/call are displacements others are absolute 5128 assert(!adr.is_lval(), "must be rval"); 5129 assert(reachable(adr), "must be"); 5130 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 5131 5132 } 5133 5134 Address MacroAssembler::as_Address(ArrayAddress adr) { 5135 AddressLiteral base = adr.base(); 5136 lea(rscratch1, base); 5137 Address index = adr.index(); 5138 assert(index._disp == 0, "must not have disp"); // maybe it can? 5139 Address array(rscratch1, index._index, index._scale, index._disp); 5140 return array; 5141 } 5142 5143 int MacroAssembler::biased_locking_enter(Register lock_reg, 5144 Register obj_reg, 5145 Register swap_reg, 5146 Register tmp_reg, 5147 bool swap_reg_contains_mark, 5148 Label& done, 5149 Label* slow_case, 5150 BiasedLockingCounters* counters) { 5151 assert(UseBiasedLocking, "why call this otherwise?"); 5152 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 5153 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 5154 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 5155 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 5156 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 5157 Address saved_mark_addr(lock_reg, 0); 5158 5159 if (PrintBiasedLockingStatistics && counters == NULL) 5160 counters = BiasedLocking::counters(); 5161 5162 // Biased locking 5163 // See whether the lock is currently biased toward our thread and 5164 // whether the epoch is still valid 5165 // Note that the runtime guarantees sufficient alignment of JavaThread 5166 // pointers to allow age to be placed into low bits 5167 // First check to see whether biasing is even enabled for this object 5168 Label cas_label; 5169 int null_check_offset = -1; 5170 if (!swap_reg_contains_mark) { 5171 null_check_offset = offset(); 5172 movq(swap_reg, mark_addr); 5173 } 5174 movq(tmp_reg, swap_reg); 5175 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5176 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 5177 jcc(Assembler::notEqual, cas_label); 5178 // The bias pattern is present in the object's header. Need to check 5179 // whether the bias owner and the epoch are both still current. 5180 load_prototype_header(tmp_reg, obj_reg); 5181 orq(tmp_reg, r15_thread); 5182 xorq(tmp_reg, swap_reg); 5183 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 5184 if (counters != NULL) { 5185 cond_inc32(Assembler::zero, 5186 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5187 } 5188 jcc(Assembler::equal, done); 5189 5190 Label try_revoke_bias; 5191 Label try_rebias; 5192 5193 // At this point we know that the header has the bias pattern and 5194 // that we are not the bias owner in the current epoch. We need to 5195 // figure out more details about the state of the header in order to 5196 // know what operations can be legally performed on the object's 5197 // header. 5198 5199 // If the low three bits in the xor result aren't clear, that means 5200 // the prototype header is no longer biased and we have to revoke 5201 // the bias on this object. 5202 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5203 jcc(Assembler::notZero, try_revoke_bias); 5204 5205 // Biasing is still enabled for this data type. See whether the 5206 // epoch of the current bias is still valid, meaning that the epoch 5207 // bits of the mark word are equal to the epoch bits of the 5208 // prototype header. (Note that the prototype header's epoch bits 5209 // only change at a safepoint.) If not, attempt to rebias the object 5210 // toward the current thread. Note that we must be absolutely sure 5211 // that the current epoch is invalid in order to do this because 5212 // otherwise the manipulations it performs on the mark word are 5213 // illegal. 5214 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 5215 jcc(Assembler::notZero, try_rebias); 5216 5217 // The epoch of the current bias is still valid but we know nothing 5218 // about the owner; it might be set or it might be clear. Try to 5219 // acquire the bias of the object using an atomic operation. If this 5220 // fails we will go in to the runtime to revoke the object's bias. 5221 // Note that we first construct the presumed unbiased header so we 5222 // don't accidentally blow away another thread's valid bias. 5223 andq(swap_reg, 5224 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 5225 movq(tmp_reg, swap_reg); 5226 orq(tmp_reg, r15_thread); 5227 if (os::is_MP()) { 5228 lock(); 5229 } 5230 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5231 // If the biasing toward our thread failed, this means that 5232 // another thread succeeded in biasing it toward itself and we 5233 // need to revoke that bias. The revocation will occur in the 5234 // interpreter runtime in the slow case. 5235 if (counters != NULL) { 5236 cond_inc32(Assembler::zero, 5237 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5238 } 5239 if (slow_case != NULL) { 5240 jcc(Assembler::notZero, *slow_case); 5241 } 5242 jmp(done); 5243 5244 bind(try_rebias); 5245 // At this point we know the epoch has expired, meaning that the 5246 // current "bias owner", if any, is actually invalid. Under these 5247 // circumstances _only_, we are allowed to use the current header's 5248 // value as the comparison value when doing the cas to acquire the 5249 // bias in the current epoch. In other words, we allow transfer of 5250 // the bias from one thread to another directly in this situation. 5251 // 5252 // FIXME: due to a lack of registers we currently blow away the age 5253 // bits in this situation. Should attempt to preserve them. 5254 load_prototype_header(tmp_reg, obj_reg); 5255 orq(tmp_reg, r15_thread); 5256 if (os::is_MP()) { 5257 lock(); 5258 } 5259 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5260 // If the biasing toward our thread failed, then another thread 5261 // succeeded in biasing it toward itself and we need to revoke that 5262 // bias. The revocation will occur in the runtime in the slow case. 5263 if (counters != NULL) { 5264 cond_inc32(Assembler::zero, 5265 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 5266 } 5267 if (slow_case != NULL) { 5268 jcc(Assembler::notZero, *slow_case); 5269 } 5270 jmp(done); 5271 5272 bind(try_revoke_bias); 5273 // The prototype mark in the klass doesn't have the bias bit set any 5274 // more, indicating that objects of this data type are not supposed 5275 // to be biased any more. We are going to try to reset the mark of 5276 // this object to the prototype value and fall through to the 5277 // CAS-based locking scheme. Note that if our CAS fails, it means 5278 // that another thread raced us for the privilege of revoking the 5279 // bias of this particular object, so it's okay to continue in the 5280 // normal locking code. 5281 // 5282 // FIXME: due to a lack of registers we currently blow away the age 5283 // bits in this situation. Should attempt to preserve them. 5284 load_prototype_header(tmp_reg, obj_reg); 5285 if (os::is_MP()) { 5286 lock(); 5287 } 5288 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5289 // Fall through to the normal CAS-based lock, because no matter what 5290 // the result of the above CAS, some thread must have succeeded in 5291 // removing the bias bit from the object's header. 5292 if (counters != NULL) { 5293 cond_inc32(Assembler::zero, 5294 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 5295 } 5296 5297 bind(cas_label); 5298 5299 return null_check_offset; 5300 } 5301 5302 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 5303 Label L, E; 5304 5305 #ifdef _WIN64 5306 // Windows always allocates space for it's register args 5307 assert(num_args <= 4, "only register arguments supported"); 5308 subq(rsp, frame::arg_reg_save_area_bytes); 5309 #endif 5310 5311 // Align stack if necessary 5312 testl(rsp, 15); 5313 jcc(Assembler::zero, L); 5314 5315 subq(rsp, 8); 5316 { 5317 call(RuntimeAddress(entry_point)); 5318 } 5319 addq(rsp, 8); 5320 jmp(E); 5321 5322 bind(L); 5323 { 5324 call(RuntimeAddress(entry_point)); 5325 } 5326 5327 bind(E); 5328 5329 #ifdef _WIN64 5330 // restore stack pointer 5331 addq(rsp, frame::arg_reg_save_area_bytes); 5332 #endif 5333 5334 } 5335 5336 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 5337 assert(!src2.is_lval(), "should use cmpptr"); 5338 5339 if (reachable(src2)) { 5340 cmpq(src1, as_Address(src2)); 5341 } else { 5342 lea(rscratch1, src2); 5343 Assembler::cmpq(src1, Address(rscratch1, 0)); 5344 } 5345 } 5346 5347 int MacroAssembler::corrected_idivq(Register reg) { 5348 // Full implementation of Java ldiv and lrem; checks for special 5349 // case as described in JVM spec., p.243 & p.271. The function 5350 // returns the (pc) offset of the idivl instruction - may be needed 5351 // for implicit exceptions. 5352 // 5353 // normal case special case 5354 // 5355 // input : rax: dividend min_long 5356 // reg: divisor (may not be eax/edx) -1 5357 // 5358 // output: rax: quotient (= rax idiv reg) min_long 5359 // rdx: remainder (= rax irem reg) 0 5360 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 5361 static const int64_t min_long = 0x8000000000000000; 5362 Label normal_case, special_case; 5363 5364 // check for special case 5365 cmp64(rax, ExternalAddress((address) &min_long)); 5366 jcc(Assembler::notEqual, normal_case); 5367 xorl(rdx, rdx); // prepare rdx for possible special case (where 5368 // remainder = 0) 5369 cmpq(reg, -1); 5370 jcc(Assembler::equal, special_case); 5371 5372 // handle normal case 5373 bind(normal_case); 5374 cdqq(); 5375 int idivq_offset = offset(); 5376 idivq(reg); 5377 5378 // normal and special case exit 5379 bind(special_case); 5380 5381 return idivq_offset; 5382 } 5383 5384 void MacroAssembler::decrementq(Register reg, int value) { 5385 if (value == min_jint) { subq(reg, value); return; } 5386 if (value < 0) { incrementq(reg, -value); return; } 5387 if (value == 0) { ; return; } 5388 if (value == 1 && UseIncDec) { decq(reg) ; return; } 5389 /* else */ { subq(reg, value) ; return; } 5390 } 5391 5392 void MacroAssembler::decrementq(Address dst, int value) { 5393 if (value == min_jint) { subq(dst, value); return; } 5394 if (value < 0) { incrementq(dst, -value); return; } 5395 if (value == 0) { ; return; } 5396 if (value == 1 && UseIncDec) { decq(dst) ; return; } 5397 /* else */ { subq(dst, value) ; return; } 5398 } 5399 5400 void MacroAssembler::fat_nop() { 5401 // A 5 byte nop that is safe for patching (see patch_verified_entry) 5402 // Recommened sequence from 'Software Optimization Guide for the AMD 5403 // Hammer Processor' 5404 emit_byte(0x66); 5405 emit_byte(0x66); 5406 emit_byte(0x90); 5407 emit_byte(0x66); 5408 emit_byte(0x90); 5409 } 5410 5411 void MacroAssembler::incrementq(Register reg, int value) { 5412 if (value == min_jint) { addq(reg, value); return; } 5413 if (value < 0) { decrementq(reg, -value); return; } 5414 if (value == 0) { ; return; } 5415 if (value == 1 && UseIncDec) { incq(reg) ; return; } 5416 /* else */ { addq(reg, value) ; return; } 5417 } 5418 5419 void MacroAssembler::incrementq(Address dst, int value) { 5420 if (value == min_jint) { addq(dst, value); return; } 5421 if (value < 0) { decrementq(dst, -value); return; } 5422 if (value == 0) { ; return; } 5423 if (value == 1 && UseIncDec) { incq(dst) ; return; } 5424 /* else */ { addq(dst, value) ; return; } 5425 } 5426 5427 // 32bit can do a case table jump in one instruction but we no longer allow the base 5428 // to be installed in the Address class 5429 void MacroAssembler::jump(ArrayAddress entry) { 5430 lea(rscratch1, entry.base()); 5431 Address dispatch = entry.index(); 5432 assert(dispatch._base == noreg, "must be"); 5433 dispatch._base = rscratch1; 5434 jmp(dispatch); 5435 } 5436 5437 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5438 ShouldNotReachHere(); // 64bit doesn't use two regs 5439 cmpq(x_lo, y_lo); 5440 } 5441 5442 void MacroAssembler::lea(Register dst, AddressLiteral src) { 5443 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5444 } 5445 5446 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5447 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 5448 movptr(dst, rscratch1); 5449 } 5450 5451 void MacroAssembler::leave() { 5452 // %%% is this really better? Why not on 32bit too? 5453 emit_byte(0xC9); // LEAVE 5454 } 5455 5456 void MacroAssembler::lneg(Register hi, Register lo) { 5457 ShouldNotReachHere(); // 64bit doesn't use two regs 5458 negq(lo); 5459 } 5460 5461 void MacroAssembler::movoop(Register dst, jobject obj) { 5462 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5463 } 5464 5465 void MacroAssembler::movoop(Address dst, jobject obj) { 5466 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5467 movq(dst, rscratch1); 5468 } 5469 5470 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5471 if (src.is_lval()) { 5472 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5473 } else { 5474 if (reachable(src)) { 5475 movq(dst, as_Address(src)); 5476 } else { 5477 lea(rscratch1, src); 5478 movq(dst, Address(rscratch1,0)); 5479 } 5480 } 5481 } 5482 5483 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5484 movq(as_Address(dst), src); 5485 } 5486 5487 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5488 movq(dst, as_Address(src)); 5489 } 5490 5491 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 5492 void MacroAssembler::movptr(Address dst, intptr_t src) { 5493 mov64(rscratch1, src); 5494 movq(dst, rscratch1); 5495 } 5496 5497 // These are mostly for initializing NULL 5498 void MacroAssembler::movptr(Address dst, int32_t src) { 5499 movslq(dst, src); 5500 } 5501 5502 void MacroAssembler::movptr(Register dst, int32_t src) { 5503 mov64(dst, (intptr_t)src); 5504 } 5505 5506 void MacroAssembler::pushoop(jobject obj) { 5507 movoop(rscratch1, obj); 5508 push(rscratch1); 5509 } 5510 5511 void MacroAssembler::pushptr(AddressLiteral src) { 5512 lea(rscratch1, src); 5513 if (src.is_lval()) { 5514 push(rscratch1); 5515 } else { 5516 pushq(Address(rscratch1, 0)); 5517 } 5518 } 5519 5520 void MacroAssembler::reset_last_Java_frame(bool clear_fp, 5521 bool clear_pc) { 5522 // we must set sp to zero to clear frame 5523 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 5524 // must clear fp, so that compiled frames are not confused; it is 5525 // possible that we need it only for debugging 5526 if (clear_fp) { 5527 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 5528 } 5529 5530 if (clear_pc) { 5531 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 5532 } 5533 } 5534 5535 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 5536 Register last_java_fp, 5537 address last_java_pc) { 5538 // determine last_java_sp register 5539 if (!last_java_sp->is_valid()) { 5540 last_java_sp = rsp; 5541 } 5542 5543 // last_java_fp is optional 5544 if (last_java_fp->is_valid()) { 5545 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 5546 last_java_fp); 5547 } 5548 5549 // last_java_pc is optional 5550 if (last_java_pc != NULL) { 5551 Address java_pc(r15_thread, 5552 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 5553 lea(rscratch1, InternalAddress(last_java_pc)); 5554 movptr(java_pc, rscratch1); 5555 } 5556 5557 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 5558 } 5559 5560 static void pass_arg0(MacroAssembler* masm, Register arg) { 5561 if (c_rarg0 != arg ) { 5562 masm->mov(c_rarg0, arg); 5563 } 5564 } 5565 5566 static void pass_arg1(MacroAssembler* masm, Register arg) { 5567 if (c_rarg1 != arg ) { 5568 masm->mov(c_rarg1, arg); 5569 } 5570 } 5571 5572 static void pass_arg2(MacroAssembler* masm, Register arg) { 5573 if (c_rarg2 != arg ) { 5574 masm->mov(c_rarg2, arg); 5575 } 5576 } 5577 5578 static void pass_arg3(MacroAssembler* masm, Register arg) { 5579 if (c_rarg3 != arg ) { 5580 masm->mov(c_rarg3, arg); 5581 } 5582 } 5583 5584 void MacroAssembler::stop(const char* msg) { 5585 address rip = pc(); 5586 pusha(); // get regs on stack 5587 lea(c_rarg0, ExternalAddress((address) msg)); 5588 lea(c_rarg1, InternalAddress(rip)); 5589 movq(c_rarg2, rsp); // pass pointer to regs array 5590 andq(rsp, -16); // align stack as required by ABI 5591 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 5592 hlt(); 5593 } 5594 5595 void MacroAssembler::warn(const char* msg) { 5596 push(rsp); 5597 andq(rsp, -16); // align stack as required by push_CPU_state and call 5598 5599 push_CPU_state(); // keeps alignment at 16 bytes 5600 lea(c_rarg0, ExternalAddress((address) msg)); 5601 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 5602 pop_CPU_state(); 5603 pop(rsp); 5604 } 5605 5606 #ifndef PRODUCT 5607 extern "C" void findpc(intptr_t x); 5608 #endif 5609 5610 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 5611 // In order to get locks to work, we need to fake a in_VM state 5612 if (ShowMessageBoxOnError ) { 5613 JavaThread* thread = JavaThread::current(); 5614 JavaThreadState saved_state = thread->thread_state(); 5615 thread->set_thread_state(_thread_in_vm); 5616 #ifndef PRODUCT 5617 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5618 ttyLocker ttyl; 5619 BytecodeCounter::print(); 5620 } 5621 #endif 5622 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5623 // XXX correct this offset for amd64 5624 // This is the value of eip which points to where verify_oop will return. 5625 if (os::message_box(msg, "Execution stopped, print registers?")) { 5626 ttyLocker ttyl; 5627 tty->print_cr("rip = 0x%016lx", pc); 5628 #ifndef PRODUCT 5629 tty->cr(); 5630 findpc(pc); 5631 tty->cr(); 5632 #endif 5633 tty->print_cr("rax = 0x%016lx", regs[15]); 5634 tty->print_cr("rbx = 0x%016lx", regs[12]); 5635 tty->print_cr("rcx = 0x%016lx", regs[14]); 5636 tty->print_cr("rdx = 0x%016lx", regs[13]); 5637 tty->print_cr("rdi = 0x%016lx", regs[8]); 5638 tty->print_cr("rsi = 0x%016lx", regs[9]); 5639 tty->print_cr("rbp = 0x%016lx", regs[10]); 5640 tty->print_cr("rsp = 0x%016lx", regs[11]); 5641 tty->print_cr("r8 = 0x%016lx", regs[7]); 5642 tty->print_cr("r9 = 0x%016lx", regs[6]); 5643 tty->print_cr("r10 = 0x%016lx", regs[5]); 5644 tty->print_cr("r11 = 0x%016lx", regs[4]); 5645 tty->print_cr("r12 = 0x%016lx", regs[3]); 5646 tty->print_cr("r13 = 0x%016lx", regs[2]); 5647 tty->print_cr("r14 = 0x%016lx", regs[1]); 5648 tty->print_cr("r15 = 0x%016lx", regs[0]); 5649 BREAKPOINT; 5650 } 5651 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5652 } else { 5653 ttyLocker ttyl; 5654 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 5655 msg); 5656 } 5657 } 5658 5659 #endif // _LP64 5660 5661 // Now versions that are common to 32/64 bit 5662 5663 void MacroAssembler::addptr(Register dst, int32_t imm32) { 5664 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 5665 } 5666 5667 void MacroAssembler::addptr(Register dst, Register src) { 5668 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 5669 } 5670 5671 void MacroAssembler::addptr(Address dst, Register src) { 5672 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 5673 } 5674 5675 void MacroAssembler::align(int modulus) { 5676 if (offset() % modulus != 0) { 5677 nop(modulus - (offset() % modulus)); 5678 } 5679 } 5680 5681 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 5682 if (reachable(src)) { 5683 andpd(dst, as_Address(src)); 5684 } else { 5685 lea(rscratch1, src); 5686 andpd(dst, Address(rscratch1, 0)); 5687 } 5688 } 5689 5690 void MacroAssembler::andptr(Register dst, int32_t imm32) { 5691 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 5692 } 5693 5694 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 5695 pushf(); 5696 if (os::is_MP()) 5697 lock(); 5698 incrementl(counter_addr); 5699 popf(); 5700 } 5701 5702 // Writes to stack successive pages until offset reached to check for 5703 // stack overflow + shadow pages. This clobbers tmp. 5704 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 5705 movptr(tmp, rsp); 5706 // Bang stack for total size given plus shadow page size. 5707 // Bang one page at a time because large size can bang beyond yellow and 5708 // red zones. 5709 Label loop; 5710 bind(loop); 5711 movl(Address(tmp, (-os::vm_page_size())), size ); 5712 subptr(tmp, os::vm_page_size()); 5713 subl(size, os::vm_page_size()); 5714 jcc(Assembler::greater, loop); 5715 5716 // Bang down shadow pages too. 5717 // The -1 because we already subtracted 1 page. 5718 for (int i = 0; i< StackShadowPages-1; i++) { 5719 // this could be any sized move but this is can be a debugging crumb 5720 // so the bigger the better. 5721 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 5722 } 5723 } 5724 5725 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 5726 assert(UseBiasedLocking, "why call this otherwise?"); 5727 5728 // Check for biased locking unlock case, which is a no-op 5729 // Note: we do not have to check the thread ID for two reasons. 5730 // First, the interpreter checks for IllegalMonitorStateException at 5731 // a higher level. Second, if the bias was revoked while we held the 5732 // lock, the object could not be rebiased toward another thread, so 5733 // the bias bit would be clear. 5734 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 5735 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 5736 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 5737 jcc(Assembler::equal, done); 5738 } 5739 5740 void MacroAssembler::c2bool(Register x) { 5741 // implements x == 0 ? 0 : 1 5742 // note: must only look at least-significant byte of x 5743 // since C-style booleans are stored in one byte 5744 // only! (was bug) 5745 andl(x, 0xFF); 5746 setb(Assembler::notZero, x); 5747 } 5748 5749 // Wouldn't need if AddressLiteral version had new name 5750 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 5751 Assembler::call(L, rtype); 5752 } 5753 5754 void MacroAssembler::call(Register entry) { 5755 Assembler::call(entry); 5756 } 5757 5758 void MacroAssembler::call(AddressLiteral entry) { 5759 if (reachable(entry)) { 5760 Assembler::call_literal(entry.target(), entry.rspec()); 5761 } else { 5762 lea(rscratch1, entry); 5763 Assembler::call(rscratch1); 5764 } 5765 } 5766 5767 // Implementation of call_VM versions 5768 5769 void MacroAssembler::call_VM(Register oop_result, 5770 address entry_point, 5771 bool check_exceptions) { 5772 Label C, E; 5773 call(C, relocInfo::none); 5774 jmp(E); 5775 5776 bind(C); 5777 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 5778 ret(0); 5779 5780 bind(E); 5781 } 5782 5783 void MacroAssembler::call_VM(Register oop_result, 5784 address entry_point, 5785 Register arg_1, 5786 bool check_exceptions) { 5787 Label C, E; 5788 call(C, relocInfo::none); 5789 jmp(E); 5790 5791 bind(C); 5792 pass_arg1(this, arg_1); 5793 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 5794 ret(0); 5795 5796 bind(E); 5797 } 5798 5799 void MacroAssembler::call_VM(Register oop_result, 5800 address entry_point, 5801 Register arg_1, 5802 Register arg_2, 5803 bool check_exceptions) { 5804 Label C, E; 5805 call(C, relocInfo::none); 5806 jmp(E); 5807 5808 bind(C); 5809 5810 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5811 5812 pass_arg2(this, arg_2); 5813 pass_arg1(this, arg_1); 5814 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 5815 ret(0); 5816 5817 bind(E); 5818 } 5819 5820 void MacroAssembler::call_VM(Register oop_result, 5821 address entry_point, 5822 Register arg_1, 5823 Register arg_2, 5824 Register arg_3, 5825 bool check_exceptions) { 5826 Label C, E; 5827 call(C, relocInfo::none); 5828 jmp(E); 5829 5830 bind(C); 5831 5832 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 5833 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 5834 pass_arg3(this, arg_3); 5835 5836 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5837 pass_arg2(this, arg_2); 5838 5839 pass_arg1(this, arg_1); 5840 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 5841 ret(0); 5842 5843 bind(E); 5844 } 5845 5846 void MacroAssembler::call_VM(Register oop_result, 5847 Register last_java_sp, 5848 address entry_point, 5849 int number_of_arguments, 5850 bool check_exceptions) { 5851 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 5852 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 5853 } 5854 5855 void MacroAssembler::call_VM(Register oop_result, 5856 Register last_java_sp, 5857 address entry_point, 5858 Register arg_1, 5859 bool check_exceptions) { 5860 pass_arg1(this, arg_1); 5861 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 5862 } 5863 5864 void MacroAssembler::call_VM(Register oop_result, 5865 Register last_java_sp, 5866 address entry_point, 5867 Register arg_1, 5868 Register arg_2, 5869 bool check_exceptions) { 5870 5871 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5872 pass_arg2(this, arg_2); 5873 pass_arg1(this, arg_1); 5874 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 5875 } 5876 5877 void MacroAssembler::call_VM(Register oop_result, 5878 Register last_java_sp, 5879 address entry_point, 5880 Register arg_1, 5881 Register arg_2, 5882 Register arg_3, 5883 bool check_exceptions) { 5884 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 5885 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 5886 pass_arg3(this, arg_3); 5887 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5888 pass_arg2(this, arg_2); 5889 pass_arg1(this, arg_1); 5890 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 5891 } 5892 5893 void MacroAssembler::call_VM_base(Register oop_result, 5894 Register java_thread, 5895 Register last_java_sp, 5896 address entry_point, 5897 int number_of_arguments, 5898 bool check_exceptions) { 5899 // determine java_thread register 5900 if (!java_thread->is_valid()) { 5901 #ifdef _LP64 5902 java_thread = r15_thread; 5903 #else 5904 java_thread = rdi; 5905 get_thread(java_thread); 5906 #endif // LP64 5907 } 5908 // determine last_java_sp register 5909 if (!last_java_sp->is_valid()) { 5910 last_java_sp = rsp; 5911 } 5912 // debugging support 5913 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 5914 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 5915 #ifdef ASSERT 5916 LP64_ONLY(if (UseCompressedOops) verify_heapbase("call_VM_base");) 5917 #endif // ASSERT 5918 5919 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 5920 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 5921 5922 // push java thread (becomes first argument of C function) 5923 5924 NOT_LP64(push(java_thread); number_of_arguments++); 5925 LP64_ONLY(mov(c_rarg0, r15_thread)); 5926 5927 // set last Java frame before call 5928 assert(last_java_sp != rbp, "can't use ebp/rbp"); 5929 5930 // Only interpreter should have to set fp 5931 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 5932 5933 // do the call, remove parameters 5934 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 5935 5936 // restore the thread (cannot use the pushed argument since arguments 5937 // may be overwritten by C code generated by an optimizing compiler); 5938 // however can use the register value directly if it is callee saved. 5939 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 5940 // rdi & rsi (also r15) are callee saved -> nothing to do 5941 #ifdef ASSERT 5942 guarantee(java_thread != rax, "change this code"); 5943 push(rax); 5944 { Label L; 5945 get_thread(rax); 5946 cmpptr(java_thread, rax); 5947 jcc(Assembler::equal, L); 5948 stop("MacroAssembler::call_VM_base: rdi not callee saved?"); 5949 bind(L); 5950 } 5951 pop(rax); 5952 #endif 5953 } else { 5954 get_thread(java_thread); 5955 } 5956 // reset last Java frame 5957 // Only interpreter should have to clear fp 5958 reset_last_Java_frame(java_thread, true, false); 5959 5960 #ifndef CC_INTERP 5961 // C++ interp handles this in the interpreter 5962 check_and_handle_popframe(java_thread); 5963 check_and_handle_earlyret(java_thread); 5964 #endif /* CC_INTERP */ 5965 5966 if (check_exceptions) { 5967 // check for pending exceptions (java_thread is set upon return) 5968 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 5969 #ifndef _LP64 5970 jump_cc(Assembler::notEqual, 5971 RuntimeAddress(StubRoutines::forward_exception_entry())); 5972 #else 5973 // This used to conditionally jump to forward_exception however it is 5974 // possible if we relocate that the branch will not reach. So we must jump 5975 // around so we can always reach 5976 5977 Label ok; 5978 jcc(Assembler::equal, ok); 5979 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 5980 bind(ok); 5981 #endif // LP64 5982 } 5983 5984 // get oop result if there is one and reset the value in the thread 5985 if (oop_result->is_valid()) { 5986 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 5987 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 5988 verify_oop(oop_result, "broken oop in call_VM_base"); 5989 } 5990 } 5991 5992 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 5993 5994 // Calculate the value for last_Java_sp 5995 // somewhat subtle. call_VM does an intermediate call 5996 // which places a return address on the stack just under the 5997 // stack pointer as the user finsihed with it. This allows 5998 // use to retrieve last_Java_pc from last_Java_sp[-1]. 5999 // On 32bit we then have to push additional args on the stack to accomplish 6000 // the actual requested call. On 64bit call_VM only can use register args 6001 // so the only extra space is the return address that call_VM created. 6002 // This hopefully explains the calculations here. 6003 6004 #ifdef _LP64 6005 // We've pushed one address, correct last_Java_sp 6006 lea(rax, Address(rsp, wordSize)); 6007 #else 6008 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 6009 #endif // LP64 6010 6011 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 6012 6013 } 6014 6015 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 6016 call_VM_leaf_base(entry_point, number_of_arguments); 6017 } 6018 6019 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 6020 pass_arg0(this, arg_0); 6021 call_VM_leaf(entry_point, 1); 6022 } 6023 6024 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 6025 6026 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6027 pass_arg1(this, arg_1); 6028 pass_arg0(this, arg_0); 6029 call_VM_leaf(entry_point, 2); 6030 } 6031 6032 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 6033 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6034 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6035 pass_arg2(this, arg_2); 6036 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6037 pass_arg1(this, arg_1); 6038 pass_arg0(this, arg_0); 6039 call_VM_leaf(entry_point, 3); 6040 } 6041 6042 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 6043 pass_arg0(this, arg_0); 6044 MacroAssembler::call_VM_leaf_base(entry_point, 1); 6045 } 6046 6047 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 6048 6049 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6050 pass_arg1(this, arg_1); 6051 pass_arg0(this, arg_0); 6052 MacroAssembler::call_VM_leaf_base(entry_point, 2); 6053 } 6054 6055 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 6056 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6057 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6058 pass_arg2(this, arg_2); 6059 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6060 pass_arg1(this, arg_1); 6061 pass_arg0(this, arg_0); 6062 MacroAssembler::call_VM_leaf_base(entry_point, 3); 6063 } 6064 6065 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 6066 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); 6067 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6068 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6069 pass_arg3(this, arg_3); 6070 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6071 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6072 pass_arg2(this, arg_2); 6073 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6074 pass_arg1(this, arg_1); 6075 pass_arg0(this, arg_0); 6076 MacroAssembler::call_VM_leaf_base(entry_point, 4); 6077 } 6078 6079 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 6080 } 6081 6082 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 6083 } 6084 6085 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 6086 if (reachable(src1)) { 6087 cmpl(as_Address(src1), imm); 6088 } else { 6089 lea(rscratch1, src1); 6090 cmpl(Address(rscratch1, 0), imm); 6091 } 6092 } 6093 6094 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 6095 assert(!src2.is_lval(), "use cmpptr"); 6096 if (reachable(src2)) { 6097 cmpl(src1, as_Address(src2)); 6098 } else { 6099 lea(rscratch1, src2); 6100 cmpl(src1, Address(rscratch1, 0)); 6101 } 6102 } 6103 6104 void MacroAssembler::cmp32(Register src1, int32_t imm) { 6105 Assembler::cmpl(src1, imm); 6106 } 6107 6108 void MacroAssembler::cmp32(Register src1, Address src2) { 6109 Assembler::cmpl(src1, src2); 6110 } 6111 6112 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6113 ucomisd(opr1, opr2); 6114 6115 Label L; 6116 if (unordered_is_less) { 6117 movl(dst, -1); 6118 jcc(Assembler::parity, L); 6119 jcc(Assembler::below , L); 6120 movl(dst, 0); 6121 jcc(Assembler::equal , L); 6122 increment(dst); 6123 } else { // unordered is greater 6124 movl(dst, 1); 6125 jcc(Assembler::parity, L); 6126 jcc(Assembler::above , L); 6127 movl(dst, 0); 6128 jcc(Assembler::equal , L); 6129 decrementl(dst); 6130 } 6131 bind(L); 6132 } 6133 6134 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6135 ucomiss(opr1, opr2); 6136 6137 Label L; 6138 if (unordered_is_less) { 6139 movl(dst, -1); 6140 jcc(Assembler::parity, L); 6141 jcc(Assembler::below , L); 6142 movl(dst, 0); 6143 jcc(Assembler::equal , L); 6144 increment(dst); 6145 } else { // unordered is greater 6146 movl(dst, 1); 6147 jcc(Assembler::parity, L); 6148 jcc(Assembler::above , L); 6149 movl(dst, 0); 6150 jcc(Assembler::equal , L); 6151 decrementl(dst); 6152 } 6153 bind(L); 6154 } 6155 6156 6157 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 6158 if (reachable(src1)) { 6159 cmpb(as_Address(src1), imm); 6160 } else { 6161 lea(rscratch1, src1); 6162 cmpb(Address(rscratch1, 0), imm); 6163 } 6164 } 6165 6166 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 6167 #ifdef _LP64 6168 if (src2.is_lval()) { 6169 movptr(rscratch1, src2); 6170 Assembler::cmpq(src1, rscratch1); 6171 } else if (reachable(src2)) { 6172 cmpq(src1, as_Address(src2)); 6173 } else { 6174 lea(rscratch1, src2); 6175 Assembler::cmpq(src1, Address(rscratch1, 0)); 6176 } 6177 #else 6178 if (src2.is_lval()) { 6179 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 6180 } else { 6181 cmpl(src1, as_Address(src2)); 6182 } 6183 #endif // _LP64 6184 } 6185 6186 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 6187 assert(src2.is_lval(), "not a mem-mem compare"); 6188 #ifdef _LP64 6189 // moves src2's literal address 6190 movptr(rscratch1, src2); 6191 Assembler::cmpq(src1, rscratch1); 6192 #else 6193 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 6194 #endif // _LP64 6195 } 6196 6197 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 6198 if (reachable(adr)) { 6199 if (os::is_MP()) 6200 lock(); 6201 cmpxchgptr(reg, as_Address(adr)); 6202 } else { 6203 lea(rscratch1, adr); 6204 if (os::is_MP()) 6205 lock(); 6206 cmpxchgptr(reg, Address(rscratch1, 0)); 6207 } 6208 } 6209 6210 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 6211 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 6212 } 6213 6214 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 6215 if (reachable(src)) { 6216 comisd(dst, as_Address(src)); 6217 } else { 6218 lea(rscratch1, src); 6219 comisd(dst, Address(rscratch1, 0)); 6220 } 6221 } 6222 6223 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 6224 if (reachable(src)) { 6225 comiss(dst, as_Address(src)); 6226 } else { 6227 lea(rscratch1, src); 6228 comiss(dst, Address(rscratch1, 0)); 6229 } 6230 } 6231 6232 6233 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 6234 Condition negated_cond = negate_condition(cond); 6235 Label L; 6236 jcc(negated_cond, L); 6237 atomic_incl(counter_addr); 6238 bind(L); 6239 } 6240 6241 int MacroAssembler::corrected_idivl(Register reg) { 6242 // Full implementation of Java idiv and irem; checks for 6243 // special case as described in JVM spec., p.243 & p.271. 6244 // The function returns the (pc) offset of the idivl 6245 // instruction - may be needed for implicit exceptions. 6246 // 6247 // normal case special case 6248 // 6249 // input : rax,: dividend min_int 6250 // reg: divisor (may not be rax,/rdx) -1 6251 // 6252 // output: rax,: quotient (= rax, idiv reg) min_int 6253 // rdx: remainder (= rax, irem reg) 0 6254 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 6255 const int min_int = 0x80000000; 6256 Label normal_case, special_case; 6257 6258 // check for special case 6259 cmpl(rax, min_int); 6260 jcc(Assembler::notEqual, normal_case); 6261 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 6262 cmpl(reg, -1); 6263 jcc(Assembler::equal, special_case); 6264 6265 // handle normal case 6266 bind(normal_case); 6267 cdql(); 6268 int idivl_offset = offset(); 6269 idivl(reg); 6270 6271 // normal and special case exit 6272 bind(special_case); 6273 6274 return idivl_offset; 6275 } 6276 6277 6278 6279 void MacroAssembler::decrementl(Register reg, int value) { 6280 if (value == min_jint) {subl(reg, value) ; return; } 6281 if (value < 0) { incrementl(reg, -value); return; } 6282 if (value == 0) { ; return; } 6283 if (value == 1 && UseIncDec) { decl(reg) ; return; } 6284 /* else */ { subl(reg, value) ; return; } 6285 } 6286 6287 void MacroAssembler::decrementl(Address dst, int value) { 6288 if (value == min_jint) {subl(dst, value) ; return; } 6289 if (value < 0) { incrementl(dst, -value); return; } 6290 if (value == 0) { ; return; } 6291 if (value == 1 && UseIncDec) { decl(dst) ; return; } 6292 /* else */ { subl(dst, value) ; return; } 6293 } 6294 6295 void MacroAssembler::division_with_shift (Register reg, int shift_value) { 6296 assert (shift_value > 0, "illegal shift value"); 6297 Label _is_positive; 6298 testl (reg, reg); 6299 jcc (Assembler::positive, _is_positive); 6300 int offset = (1 << shift_value) - 1 ; 6301 6302 if (offset == 1) { 6303 incrementl(reg); 6304 } else { 6305 addl(reg, offset); 6306 } 6307 6308 bind (_is_positive); 6309 sarl(reg, shift_value); 6310 } 6311 6312 // !defined(COMPILER2) is because of stupid core builds 6313 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 6314 void MacroAssembler::empty_FPU_stack() { 6315 if (VM_Version::supports_mmx()) { 6316 emms(); 6317 } else { 6318 for (int i = 8; i-- > 0; ) ffree(i); 6319 } 6320 } 6321 #endif // !LP64 || C1 || !C2 6322 6323 6324 // Defines obj, preserves var_size_in_bytes 6325 void MacroAssembler::eden_allocate(Register obj, 6326 Register var_size_in_bytes, 6327 int con_size_in_bytes, 6328 Register t1, 6329 Label& slow_case) { 6330 assert(obj == rax, "obj must be in rax, for cmpxchg"); 6331 assert_different_registers(obj, var_size_in_bytes, t1); 6332 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 6333 jmp(slow_case); 6334 } else { 6335 Register end = t1; 6336 Label retry; 6337 bind(retry); 6338 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 6339 movptr(obj, heap_top); 6340 if (var_size_in_bytes == noreg) { 6341 lea(end, Address(obj, con_size_in_bytes)); 6342 } else { 6343 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 6344 } 6345 // if end < obj then we wrapped around => object too long => slow case 6346 cmpptr(end, obj); 6347 jcc(Assembler::below, slow_case); 6348 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 6349 jcc(Assembler::above, slow_case); 6350 // Compare obj with the top addr, and if still equal, store the new top addr in 6351 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 6352 // it otherwise. Use lock prefix for atomicity on MPs. 6353 locked_cmpxchgptr(end, heap_top); 6354 jcc(Assembler::notEqual, retry); 6355 } 6356 } 6357 6358 void MacroAssembler::enter() { 6359 push(rbp); 6360 mov(rbp, rsp); 6361 } 6362 6363 void MacroAssembler::fcmp(Register tmp) { 6364 fcmp(tmp, 1, true, true); 6365 } 6366 6367 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 6368 assert(!pop_right || pop_left, "usage error"); 6369 if (VM_Version::supports_cmov()) { 6370 assert(tmp == noreg, "unneeded temp"); 6371 if (pop_left) { 6372 fucomip(index); 6373 } else { 6374 fucomi(index); 6375 } 6376 if (pop_right) { 6377 fpop(); 6378 } 6379 } else { 6380 assert(tmp != noreg, "need temp"); 6381 if (pop_left) { 6382 if (pop_right) { 6383 fcompp(); 6384 } else { 6385 fcomp(index); 6386 } 6387 } else { 6388 fcom(index); 6389 } 6390 // convert FPU condition into eflags condition via rax, 6391 save_rax(tmp); 6392 fwait(); fnstsw_ax(); 6393 sahf(); 6394 restore_rax(tmp); 6395 } 6396 // condition codes set as follows: 6397 // 6398 // CF (corresponds to C0) if x < y 6399 // PF (corresponds to C2) if unordered 6400 // ZF (corresponds to C3) if x = y 6401 } 6402 6403 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 6404 fcmp2int(dst, unordered_is_less, 1, true, true); 6405 } 6406 6407 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 6408 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 6409 Label L; 6410 if (unordered_is_less) { 6411 movl(dst, -1); 6412 jcc(Assembler::parity, L); 6413 jcc(Assembler::below , L); 6414 movl(dst, 0); 6415 jcc(Assembler::equal , L); 6416 increment(dst); 6417 } else { // unordered is greater 6418 movl(dst, 1); 6419 jcc(Assembler::parity, L); 6420 jcc(Assembler::above , L); 6421 movl(dst, 0); 6422 jcc(Assembler::equal , L); 6423 decrementl(dst); 6424 } 6425 bind(L); 6426 } 6427 6428 void MacroAssembler::fld_d(AddressLiteral src) { 6429 fld_d(as_Address(src)); 6430 } 6431 6432 void MacroAssembler::fld_s(AddressLiteral src) { 6433 fld_s(as_Address(src)); 6434 } 6435 6436 void MacroAssembler::fld_x(AddressLiteral src) { 6437 Assembler::fld_x(as_Address(src)); 6438 } 6439 6440 void MacroAssembler::fldcw(AddressLiteral src) { 6441 Assembler::fldcw(as_Address(src)); 6442 } 6443 6444 void MacroAssembler::fpop() { 6445 ffree(); 6446 fincstp(); 6447 } 6448 6449 void MacroAssembler::fremr(Register tmp) { 6450 save_rax(tmp); 6451 { Label L; 6452 bind(L); 6453 fprem(); 6454 fwait(); fnstsw_ax(); 6455 #ifdef _LP64 6456 testl(rax, 0x400); 6457 jcc(Assembler::notEqual, L); 6458 #else 6459 sahf(); 6460 jcc(Assembler::parity, L); 6461 #endif // _LP64 6462 } 6463 restore_rax(tmp); 6464 // Result is in ST0. 6465 // Note: fxch & fpop to get rid of ST1 6466 // (otherwise FPU stack could overflow eventually) 6467 fxch(1); 6468 fpop(); 6469 } 6470 6471 6472 void MacroAssembler::incrementl(AddressLiteral dst) { 6473 if (reachable(dst)) { 6474 incrementl(as_Address(dst)); 6475 } else { 6476 lea(rscratch1, dst); 6477 incrementl(Address(rscratch1, 0)); 6478 } 6479 } 6480 6481 void MacroAssembler::incrementl(ArrayAddress dst) { 6482 incrementl(as_Address(dst)); 6483 } 6484 6485 void MacroAssembler::incrementl(Register reg, int value) { 6486 if (value == min_jint) {addl(reg, value) ; return; } 6487 if (value < 0) { decrementl(reg, -value); return; } 6488 if (value == 0) { ; return; } 6489 if (value == 1 && UseIncDec) { incl(reg) ; return; } 6490 /* else */ { addl(reg, value) ; return; } 6491 } 6492 6493 void MacroAssembler::incrementl(Address dst, int value) { 6494 if (value == min_jint) {addl(dst, value) ; return; } 6495 if (value < 0) { decrementl(dst, -value); return; } 6496 if (value == 0) { ; return; } 6497 if (value == 1 && UseIncDec) { incl(dst) ; return; } 6498 /* else */ { addl(dst, value) ; return; } 6499 } 6500 6501 void MacroAssembler::jump(AddressLiteral dst) { 6502 if (reachable(dst)) { 6503 jmp_literal(dst.target(), dst.rspec()); 6504 } else { 6505 lea(rscratch1, dst); 6506 jmp(rscratch1); 6507 } 6508 } 6509 6510 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 6511 if (reachable(dst)) { 6512 InstructionMark im(this); 6513 relocate(dst.reloc()); 6514 const int short_size = 2; 6515 const int long_size = 6; 6516 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos); 6517 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 6518 // 0111 tttn #8-bit disp 6519 emit_byte(0x70 | cc); 6520 emit_byte((offs - short_size) & 0xFF); 6521 } else { 6522 // 0000 1111 1000 tttn #32-bit disp 6523 emit_byte(0x0F); 6524 emit_byte(0x80 | cc); 6525 emit_long(offs - long_size); 6526 } 6527 } else { 6528 #ifdef ASSERT 6529 warning("reversing conditional branch"); 6530 #endif /* ASSERT */ 6531 Label skip; 6532 jccb(reverse[cc], skip); 6533 lea(rscratch1, dst); 6534 Assembler::jmp(rscratch1); 6535 bind(skip); 6536 } 6537 } 6538 6539 void MacroAssembler::ldmxcsr(AddressLiteral src) { 6540 if (reachable(src)) { 6541 Assembler::ldmxcsr(as_Address(src)); 6542 } else { 6543 lea(rscratch1, src); 6544 Assembler::ldmxcsr(Address(rscratch1, 0)); 6545 } 6546 } 6547 6548 int MacroAssembler::load_signed_byte(Register dst, Address src) { 6549 int off; 6550 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6551 off = offset(); 6552 movsbl(dst, src); // movsxb 6553 } else { 6554 off = load_unsigned_byte(dst, src); 6555 shll(dst, 24); 6556 sarl(dst, 24); 6557 } 6558 return off; 6559 } 6560 6561 // Note: load_signed_short used to be called load_signed_word. 6562 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler 6563 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. 6564 // The term "word" in HotSpot means a 32- or 64-bit machine word. 6565 int MacroAssembler::load_signed_short(Register dst, Address src) { 6566 int off; 6567 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6568 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 6569 // version but this is what 64bit has always done. This seems to imply 6570 // that users are only using 32bits worth. 6571 off = offset(); 6572 movswl(dst, src); // movsxw 6573 } else { 6574 off = load_unsigned_short(dst, src); 6575 shll(dst, 16); 6576 sarl(dst, 16); 6577 } 6578 return off; 6579 } 6580 6581 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 6582 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 6583 // and "3.9 Partial Register Penalties", p. 22). 6584 int off; 6585 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 6586 off = offset(); 6587 movzbl(dst, src); // movzxb 6588 } else { 6589 xorl(dst, dst); 6590 off = offset(); 6591 movb(dst, src); 6592 } 6593 return off; 6594 } 6595 6596 // Note: load_unsigned_short used to be called load_unsigned_word. 6597 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 6598 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 6599 // and "3.9 Partial Register Penalties", p. 22). 6600 int off; 6601 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 6602 off = offset(); 6603 movzwl(dst, src); // movzxw 6604 } else { 6605 xorl(dst, dst); 6606 off = offset(); 6607 movw(dst, src); 6608 } 6609 return off; 6610 } 6611 6612 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 6613 switch (size_in_bytes) { 6614 #ifndef _LP64 6615 case 8: 6616 assert(dst2 != noreg, "second dest register required"); 6617 movl(dst, src); 6618 movl(dst2, src.plus_disp(BytesPerInt)); 6619 break; 6620 #else 6621 case 8: movq(dst, src); break; 6622 #endif 6623 case 4: movl(dst, src); break; 6624 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 6625 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 6626 default: ShouldNotReachHere(); 6627 } 6628 } 6629 6630 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 6631 switch (size_in_bytes) { 6632 #ifndef _LP64 6633 case 8: 6634 assert(src2 != noreg, "second source register required"); 6635 movl(dst, src); 6636 movl(dst.plus_disp(BytesPerInt), src2); 6637 break; 6638 #else 6639 case 8: movq(dst, src); break; 6640 #endif 6641 case 4: movl(dst, src); break; 6642 case 2: movw(dst, src); break; 6643 case 1: movb(dst, src); break; 6644 default: ShouldNotReachHere(); 6645 } 6646 } 6647 6648 void MacroAssembler::mov32(AddressLiteral dst, Register src) { 6649 if (reachable(dst)) { 6650 movl(as_Address(dst), src); 6651 } else { 6652 lea(rscratch1, dst); 6653 movl(Address(rscratch1, 0), src); 6654 } 6655 } 6656 6657 void MacroAssembler::mov32(Register dst, AddressLiteral src) { 6658 if (reachable(src)) { 6659 movl(dst, as_Address(src)); 6660 } else { 6661 lea(rscratch1, src); 6662 movl(dst, Address(rscratch1, 0)); 6663 } 6664 } 6665 6666 // C++ bool manipulation 6667 6668 void MacroAssembler::movbool(Register dst, Address src) { 6669 if(sizeof(bool) == 1) 6670 movb(dst, src); 6671 else if(sizeof(bool) == 2) 6672 movw(dst, src); 6673 else if(sizeof(bool) == 4) 6674 movl(dst, src); 6675 else 6676 // unsupported 6677 ShouldNotReachHere(); 6678 } 6679 6680 void MacroAssembler::movbool(Address dst, bool boolconst) { 6681 if(sizeof(bool) == 1) 6682 movb(dst, (int) boolconst); 6683 else if(sizeof(bool) == 2) 6684 movw(dst, (int) boolconst); 6685 else if(sizeof(bool) == 4) 6686 movl(dst, (int) boolconst); 6687 else 6688 // unsupported 6689 ShouldNotReachHere(); 6690 } 6691 6692 void MacroAssembler::movbool(Address dst, Register src) { 6693 if(sizeof(bool) == 1) 6694 movb(dst, src); 6695 else if(sizeof(bool) == 2) 6696 movw(dst, src); 6697 else if(sizeof(bool) == 4) 6698 movl(dst, src); 6699 else 6700 // unsupported 6701 ShouldNotReachHere(); 6702 } 6703 6704 void MacroAssembler::movbyte(ArrayAddress dst, int src) { 6705 movb(as_Address(dst), src); 6706 } 6707 6708 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 6709 if (reachable(src)) { 6710 if (UseXmmLoadAndClearUpper) { 6711 movsd (dst, as_Address(src)); 6712 } else { 6713 movlpd(dst, as_Address(src)); 6714 } 6715 } else { 6716 lea(rscratch1, src); 6717 if (UseXmmLoadAndClearUpper) { 6718 movsd (dst, Address(rscratch1, 0)); 6719 } else { 6720 movlpd(dst, Address(rscratch1, 0)); 6721 } 6722 } 6723 } 6724 6725 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 6726 if (reachable(src)) { 6727 movss(dst, as_Address(src)); 6728 } else { 6729 lea(rscratch1, src); 6730 movss(dst, Address(rscratch1, 0)); 6731 } 6732 } 6733 6734 void MacroAssembler::movptr(Register dst, Register src) { 6735 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6736 } 6737 6738 void MacroAssembler::movptr(Register dst, Address src) { 6739 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6740 } 6741 6742 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 6743 void MacroAssembler::movptr(Register dst, intptr_t src) { 6744 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 6745 } 6746 6747 void MacroAssembler::movptr(Address dst, Register src) { 6748 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6749 } 6750 6751 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 6752 if (reachable(src)) { 6753 movss(dst, as_Address(src)); 6754 } else { 6755 lea(rscratch1, src); 6756 movss(dst, Address(rscratch1, 0)); 6757 } 6758 } 6759 6760 void MacroAssembler::null_check(Register reg, int offset) { 6761 if (needs_explicit_null_check(offset)) { 6762 // provoke OS NULL exception if reg = NULL by 6763 // accessing M[reg] w/o changing any (non-CC) registers 6764 // NOTE: cmpl is plenty here to provoke a segv 6765 cmpptr(rax, Address(reg, 0)); 6766 // Note: should probably use testl(rax, Address(reg, 0)); 6767 // may be shorter code (however, this version of 6768 // testl needs to be implemented first) 6769 } else { 6770 // nothing to do, (later) access of M[reg + offset] 6771 // will provoke OS NULL exception if reg = NULL 6772 } 6773 } 6774 6775 void MacroAssembler::os_breakpoint() { 6776 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 6777 // (e.g., MSVC can't call ps() otherwise) 6778 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 6779 } 6780 6781 void MacroAssembler::pop_CPU_state() { 6782 pop_FPU_state(); 6783 pop_IU_state(); 6784 } 6785 6786 void MacroAssembler::pop_FPU_state() { 6787 NOT_LP64(frstor(Address(rsp, 0));) 6788 LP64_ONLY(fxrstor(Address(rsp, 0));) 6789 addptr(rsp, FPUStateSizeInWords * wordSize); 6790 } 6791 6792 void MacroAssembler::pop_IU_state() { 6793 popa(); 6794 LP64_ONLY(addq(rsp, 8)); 6795 popf(); 6796 } 6797 6798 // Save Integer and Float state 6799 // Warning: Stack must be 16 byte aligned (64bit) 6800 void MacroAssembler::push_CPU_state() { 6801 push_IU_state(); 6802 push_FPU_state(); 6803 } 6804 6805 void MacroAssembler::push_FPU_state() { 6806 subptr(rsp, FPUStateSizeInWords * wordSize); 6807 #ifndef _LP64 6808 fnsave(Address(rsp, 0)); 6809 fwait(); 6810 #else 6811 fxsave(Address(rsp, 0)); 6812 #endif // LP64 6813 } 6814 6815 void MacroAssembler::push_IU_state() { 6816 // Push flags first because pusha kills them 6817 pushf(); 6818 // Make sure rsp stays 16-byte aligned 6819 LP64_ONLY(subq(rsp, 8)); 6820 pusha(); 6821 } 6822 6823 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 6824 // determine java_thread register 6825 if (!java_thread->is_valid()) { 6826 java_thread = rdi; 6827 get_thread(java_thread); 6828 } 6829 // we must set sp to zero to clear frame 6830 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 6831 if (clear_fp) { 6832 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 6833 } 6834 6835 if (clear_pc) 6836 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 6837 6838 } 6839 6840 void MacroAssembler::restore_rax(Register tmp) { 6841 if (tmp == noreg) pop(rax); 6842 else if (tmp != rax) mov(rax, tmp); 6843 } 6844 6845 void MacroAssembler::round_to(Register reg, int modulus) { 6846 addptr(reg, modulus - 1); 6847 andptr(reg, -modulus); 6848 } 6849 6850 void MacroAssembler::save_rax(Register tmp) { 6851 if (tmp == noreg) push(rax); 6852 else if (tmp != rax) mov(tmp, rax); 6853 } 6854 6855 // Write serialization page so VM thread can do a pseudo remote membar. 6856 // We use the current thread pointer to calculate a thread specific 6857 // offset to write to within the page. This minimizes bus traffic 6858 // due to cache line collision. 6859 void MacroAssembler::serialize_memory(Register thread, Register tmp) { 6860 movl(tmp, thread); 6861 shrl(tmp, os::get_serialize_page_shift_count()); 6862 andl(tmp, (os::vm_page_size() - sizeof(int))); 6863 6864 Address index(noreg, tmp, Address::times_1); 6865 ExternalAddress page(os::get_memory_serialize_page()); 6866 6867 // Size of store must match masking code above 6868 movl(as_Address(ArrayAddress(page, index)), tmp); 6869 } 6870 6871 // Calls to C land 6872 // 6873 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded 6874 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 6875 // has to be reset to 0. This is required to allow proper stack traversal. 6876 void MacroAssembler::set_last_Java_frame(Register java_thread, 6877 Register last_java_sp, 6878 Register last_java_fp, 6879 address last_java_pc) { 6880 // determine java_thread register 6881 if (!java_thread->is_valid()) { 6882 java_thread = rdi; 6883 get_thread(java_thread); 6884 } 6885 // determine last_java_sp register 6886 if (!last_java_sp->is_valid()) { 6887 last_java_sp = rsp; 6888 } 6889 6890 // last_java_fp is optional 6891 6892 if (last_java_fp->is_valid()) { 6893 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 6894 } 6895 6896 // last_java_pc is optional 6897 6898 if (last_java_pc != NULL) { 6899 lea(Address(java_thread, 6900 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 6901 InternalAddress(last_java_pc)); 6902 6903 } 6904 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 6905 } 6906 6907 void MacroAssembler::shlptr(Register dst, int imm8) { 6908 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 6909 } 6910 6911 void MacroAssembler::shrptr(Register dst, int imm8) { 6912 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 6913 } 6914 6915 void MacroAssembler::sign_extend_byte(Register reg) { 6916 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 6917 movsbl(reg, reg); // movsxb 6918 } else { 6919 shll(reg, 24); 6920 sarl(reg, 24); 6921 } 6922 } 6923 6924 void MacroAssembler::sign_extend_short(Register reg) { 6925 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6926 movswl(reg, reg); // movsxw 6927 } else { 6928 shll(reg, 16); 6929 sarl(reg, 16); 6930 } 6931 } 6932 6933 void MacroAssembler::testl(Register dst, AddressLiteral src) { 6934 assert(reachable(src), "Address should be reachable"); 6935 testl(dst, as_Address(src)); 6936 } 6937 6938 ////////////////////////////////////////////////////////////////////////////////// 6939 #ifndef SERIALGC 6940 6941 void MacroAssembler::g1_write_barrier_pre(Register obj, 6942 #ifndef _LP64 6943 Register thread, 6944 #endif 6945 Register tmp, 6946 Register tmp2, 6947 bool tosca_live) { 6948 LP64_ONLY(Register thread = r15_thread;) 6949 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 6950 PtrQueue::byte_offset_of_active())); 6951 6952 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 6953 PtrQueue::byte_offset_of_index())); 6954 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 6955 PtrQueue::byte_offset_of_buf())); 6956 6957 6958 Label done; 6959 Label runtime; 6960 6961 // if (!marking_in_progress) goto done; 6962 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 6963 cmpl(in_progress, 0); 6964 } else { 6965 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 6966 cmpb(in_progress, 0); 6967 } 6968 jcc(Assembler::equal, done); 6969 6970 // if (x.f == NULL) goto done; 6971 #ifdef _LP64 6972 load_heap_oop(tmp2, Address(obj, 0)); 6973 #else 6974 movptr(tmp2, Address(obj, 0)); 6975 #endif 6976 cmpptr(tmp2, (int32_t) NULL_WORD); 6977 jcc(Assembler::equal, done); 6978 6979 // Can we store original value in the thread's buffer? 6980 6981 #ifdef _LP64 6982 movslq(tmp, index); 6983 cmpq(tmp, 0); 6984 #else 6985 cmpl(index, 0); 6986 #endif 6987 jcc(Assembler::equal, runtime); 6988 #ifdef _LP64 6989 subq(tmp, wordSize); 6990 movl(index, tmp); 6991 addq(tmp, buffer); 6992 #else 6993 subl(index, wordSize); 6994 movl(tmp, buffer); 6995 addl(tmp, index); 6996 #endif 6997 movptr(Address(tmp, 0), tmp2); 6998 jmp(done); 6999 bind(runtime); 7000 // save the live input values 7001 if(tosca_live) push(rax); 7002 push(obj); 7003 #ifdef _LP64 7004 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, r15_thread); 7005 #else 7006 push(thread); 7007 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread); 7008 pop(thread); 7009 #endif 7010 pop(obj); 7011 if(tosca_live) pop(rax); 7012 bind(done); 7013 7014 } 7015 7016 void MacroAssembler::g1_write_barrier_post(Register store_addr, 7017 Register new_val, 7018 #ifndef _LP64 7019 Register thread, 7020 #endif 7021 Register tmp, 7022 Register tmp2) { 7023 7024 LP64_ONLY(Register thread = r15_thread;) 7025 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 7026 PtrQueue::byte_offset_of_index())); 7027 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 7028 PtrQueue::byte_offset_of_buf())); 7029 BarrierSet* bs = Universe::heap()->barrier_set(); 7030 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 7031 Label done; 7032 Label runtime; 7033 7034 // Does store cross heap regions? 7035 7036 movptr(tmp, store_addr); 7037 xorptr(tmp, new_val); 7038 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 7039 jcc(Assembler::equal, done); 7040 7041 // crosses regions, storing NULL? 7042 7043 cmpptr(new_val, (int32_t) NULL_WORD); 7044 jcc(Assembler::equal, done); 7045 7046 // storing region crossing non-NULL, is card already dirty? 7047 7048 ExternalAddress cardtable((address) ct->byte_map_base); 7049 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 7050 #ifdef _LP64 7051 const Register card_addr = tmp; 7052 7053 movq(card_addr, store_addr); 7054 shrq(card_addr, CardTableModRefBS::card_shift); 7055 7056 lea(tmp2, cardtable); 7057 7058 // get the address of the card 7059 addq(card_addr, tmp2); 7060 #else 7061 const Register card_index = tmp; 7062 7063 movl(card_index, store_addr); 7064 shrl(card_index, CardTableModRefBS::card_shift); 7065 7066 Address index(noreg, card_index, Address::times_1); 7067 const Register card_addr = tmp; 7068 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 7069 #endif 7070 cmpb(Address(card_addr, 0), 0); 7071 jcc(Assembler::equal, done); 7072 7073 // storing a region crossing, non-NULL oop, card is clean. 7074 // dirty card and log. 7075 7076 movb(Address(card_addr, 0), 0); 7077 7078 cmpl(queue_index, 0); 7079 jcc(Assembler::equal, runtime); 7080 subl(queue_index, wordSize); 7081 movptr(tmp2, buffer); 7082 #ifdef _LP64 7083 movslq(rscratch1, queue_index); 7084 addq(tmp2, rscratch1); 7085 movq(Address(tmp2, 0), card_addr); 7086 #else 7087 addl(tmp2, queue_index); 7088 movl(Address(tmp2, 0), card_index); 7089 #endif 7090 jmp(done); 7091 7092 bind(runtime); 7093 // save the live input values 7094 push(store_addr); 7095 push(new_val); 7096 #ifdef _LP64 7097 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 7098 #else 7099 push(thread); 7100 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 7101 pop(thread); 7102 #endif 7103 pop(new_val); 7104 pop(store_addr); 7105 7106 bind(done); 7107 7108 } 7109 7110 #endif // SERIALGC 7111 ////////////////////////////////////////////////////////////////////////////////// 7112 7113 7114 void MacroAssembler::store_check(Register obj) { 7115 // Does a store check for the oop in register obj. The content of 7116 // register obj is destroyed afterwards. 7117 store_check_part_1(obj); 7118 store_check_part_2(obj); 7119 } 7120 7121 void MacroAssembler::store_check(Register obj, Address dst) { 7122 store_check(obj); 7123 } 7124 7125 7126 // split the store check operation so that other instructions can be scheduled inbetween 7127 void MacroAssembler::store_check_part_1(Register obj) { 7128 BarrierSet* bs = Universe::heap()->barrier_set(); 7129 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 7130 shrptr(obj, CardTableModRefBS::card_shift); 7131 } 7132 7133 void MacroAssembler::store_check_part_2(Register obj) { 7134 BarrierSet* bs = Universe::heap()->barrier_set(); 7135 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 7136 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 7137 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 7138 7139 // The calculation for byte_map_base is as follows: 7140 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 7141 // So this essentially converts an address to a displacement and 7142 // it will never need to be relocated. On 64bit however the value may be too 7143 // large for a 32bit displacement 7144 7145 intptr_t disp = (intptr_t) ct->byte_map_base; 7146 if (is_simm32(disp)) { 7147 Address cardtable(noreg, obj, Address::times_1, disp); 7148 movb(cardtable, 0); 7149 } else { 7150 // By doing it as an ExternalAddress disp could be converted to a rip-relative 7151 // displacement and done in a single instruction given favorable mapping and 7152 // a smarter version of as_Address. Worst case it is two instructions which 7153 // is no worse off then loading disp into a register and doing as a simple 7154 // Address() as above. 7155 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 7156 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 7157 // in some cases we'll get a single instruction version. 7158 7159 ExternalAddress cardtable((address)disp); 7160 Address index(noreg, obj, Address::times_1); 7161 movb(as_Address(ArrayAddress(cardtable, index)), 0); 7162 } 7163 } 7164 7165 void MacroAssembler::subptr(Register dst, int32_t imm32) { 7166 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 7167 } 7168 7169 void MacroAssembler::subptr(Register dst, Register src) { 7170 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 7171 } 7172 7173 // C++ bool manipulation 7174 void MacroAssembler::testbool(Register dst) { 7175 if(sizeof(bool) == 1) 7176 testb(dst, 0xff); 7177 else if(sizeof(bool) == 2) { 7178 // testw implementation needed for two byte bools 7179 ShouldNotReachHere(); 7180 } else if(sizeof(bool) == 4) 7181 testl(dst, dst); 7182 else 7183 // unsupported 7184 ShouldNotReachHere(); 7185 } 7186 7187 void MacroAssembler::testptr(Register dst, Register src) { 7188 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 7189 } 7190 7191 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 7192 void MacroAssembler::tlab_allocate(Register obj, 7193 Register var_size_in_bytes, 7194 int con_size_in_bytes, 7195 Register t1, 7196 Register t2, 7197 Label& slow_case) { 7198 assert_different_registers(obj, t1, t2); 7199 assert_different_registers(obj, var_size_in_bytes, t1); 7200 Register end = t2; 7201 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 7202 7203 verify_tlab(); 7204 7205 NOT_LP64(get_thread(thread)); 7206 7207 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 7208 if (var_size_in_bytes == noreg) { 7209 lea(end, Address(obj, con_size_in_bytes)); 7210 } else { 7211 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 7212 } 7213 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 7214 jcc(Assembler::above, slow_case); 7215 7216 // update the tlab top pointer 7217 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 7218 7219 // recover var_size_in_bytes if necessary 7220 if (var_size_in_bytes == end) { 7221 subptr(var_size_in_bytes, obj); 7222 } 7223 verify_tlab(); 7224 } 7225 7226 // Preserves rbx, and rdx. 7227 Register MacroAssembler::tlab_refill(Label& retry, 7228 Label& try_eden, 7229 Label& slow_case) { 7230 Register top = rax; 7231 Register t1 = rcx; 7232 Register t2 = rsi; 7233 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 7234 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 7235 Label do_refill, discard_tlab; 7236 7237 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 7238 // No allocation in the shared eden. 7239 jmp(slow_case); 7240 } 7241 7242 NOT_LP64(get_thread(thread_reg)); 7243 7244 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 7245 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 7246 7247 // calculate amount of free space 7248 subptr(t1, top); 7249 shrptr(t1, LogHeapWordSize); 7250 7251 // Retain tlab and allocate object in shared space if 7252 // the amount free in the tlab is too large to discard. 7253 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 7254 jcc(Assembler::lessEqual, discard_tlab); 7255 7256 // Retain 7257 // %%% yuck as movptr... 7258 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 7259 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 7260 if (TLABStats) { 7261 // increment number of slow_allocations 7262 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 7263 } 7264 jmp(try_eden); 7265 7266 bind(discard_tlab); 7267 if (TLABStats) { 7268 // increment number of refills 7269 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 7270 // accumulate wastage -- t1 is amount free in tlab 7271 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 7272 } 7273 7274 // if tlab is currently allocated (top or end != null) then 7275 // fill [top, end + alignment_reserve) with array object 7276 testptr(top, top); 7277 jcc(Assembler::zero, do_refill); 7278 7279 // set up the mark word 7280 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 7281 // set the length to the remaining space 7282 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 7283 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 7284 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 7285 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 7286 // set klass to intArrayKlass 7287 // dubious reloc why not an oop reloc? 7288 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); 7289 // store klass last. concurrent gcs assumes klass length is valid if 7290 // klass field is not null. 7291 store_klass(top, t1); 7292 7293 movptr(t1, top); 7294 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 7295 incr_allocated_bytes(thread_reg, t1, 0); 7296 7297 // refill the tlab with an eden allocation 7298 bind(do_refill); 7299 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 7300 shlptr(t1, LogHeapWordSize); 7301 // allocate new tlab, address returned in top 7302 eden_allocate(top, t1, 0, t2, slow_case); 7303 7304 // Check that t1 was preserved in eden_allocate. 7305 #ifdef ASSERT 7306 if (UseTLAB) { 7307 Label ok; 7308 Register tsize = rsi; 7309 assert_different_registers(tsize, thread_reg, t1); 7310 push(tsize); 7311 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 7312 shlptr(tsize, LogHeapWordSize); 7313 cmpptr(t1, tsize); 7314 jcc(Assembler::equal, ok); 7315 stop("assert(t1 != tlab size)"); 7316 should_not_reach_here(); 7317 7318 bind(ok); 7319 pop(tsize); 7320 } 7321 #endif 7322 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 7323 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 7324 addptr(top, t1); 7325 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 7326 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 7327 verify_tlab(); 7328 jmp(retry); 7329 7330 return thread_reg; // for use by caller 7331 } 7332 7333 void MacroAssembler::incr_allocated_bytes(Register thread, 7334 Register var_size_in_bytes, 7335 int con_size_in_bytes, 7336 Register t1) { 7337 #ifdef _LP64 7338 if (var_size_in_bytes->is_valid()) { 7339 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 7340 } else { 7341 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 7342 } 7343 #else 7344 if (!thread->is_valid()) { 7345 assert(t1->is_valid(), "need temp reg"); 7346 thread = t1; 7347 get_thread(thread); 7348 } 7349 7350 if (var_size_in_bytes->is_valid()) { 7351 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 7352 } else { 7353 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 7354 } 7355 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); 7356 #endif 7357 } 7358 7359 static const double pi_4 = 0.7853981633974483; 7360 7361 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 7362 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 7363 // was attempted in this code; unfortunately it appears that the 7364 // switch to 80-bit precision and back causes this to be 7365 // unprofitable compared with simply performing a runtime call if 7366 // the argument is out of the (-pi/4, pi/4) range. 7367 7368 Register tmp = noreg; 7369 if (!VM_Version::supports_cmov()) { 7370 // fcmp needs a temporary so preserve rbx, 7371 tmp = rbx; 7372 push(tmp); 7373 } 7374 7375 Label slow_case, done; 7376 7377 ExternalAddress pi4_adr = (address)&pi_4; 7378 if (reachable(pi4_adr)) { 7379 // x ?<= pi/4 7380 fld_d(pi4_adr); 7381 fld_s(1); // Stack: X PI/4 X 7382 fabs(); // Stack: |X| PI/4 X 7383 fcmp(tmp); 7384 jcc(Assembler::above, slow_case); 7385 7386 // fastest case: -pi/4 <= x <= pi/4 7387 switch(trig) { 7388 case 's': 7389 fsin(); 7390 break; 7391 case 'c': 7392 fcos(); 7393 break; 7394 case 't': 7395 ftan(); 7396 break; 7397 default: 7398 assert(false, "bad intrinsic"); 7399 break; 7400 } 7401 jmp(done); 7402 } 7403 7404 // slow case: runtime call 7405 bind(slow_case); 7406 // Preserve registers across runtime call 7407 pusha(); 7408 int incoming_argument_and_return_value_offset = -1; 7409 if (num_fpu_regs_in_use > 1) { 7410 // Must preserve all other FPU regs (could alternatively convert 7411 // SharedRuntime::dsin and dcos into assembly routines known not to trash 7412 // FPU state, but can not trust C compiler) 7413 NEEDS_CLEANUP; 7414 // NOTE that in this case we also push the incoming argument to 7415 // the stack and restore it later; we also use this stack slot to 7416 // hold the return value from dsin or dcos. 7417 for (int i = 0; i < num_fpu_regs_in_use; i++) { 7418 subptr(rsp, sizeof(jdouble)); 7419 fstp_d(Address(rsp, 0)); 7420 } 7421 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 7422 fld_d(Address(rsp, incoming_argument_and_return_value_offset)); 7423 } 7424 subptr(rsp, sizeof(jdouble)); 7425 fstp_d(Address(rsp, 0)); 7426 #ifdef _LP64 7427 movdbl(xmm0, Address(rsp, 0)); 7428 #endif // _LP64 7429 7430 // NOTE: we must not use call_VM_leaf here because that requires a 7431 // complete interpreter frame in debug mode -- same bug as 4387334 7432 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 7433 // do proper 64bit abi 7434 7435 NEEDS_CLEANUP; 7436 // Need to add stack banging before this runtime call if it needs to 7437 // be taken; however, there is no generic stack banging routine at 7438 // the MacroAssembler level 7439 switch(trig) { 7440 case 's': 7441 { 7442 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0); 7443 } 7444 break; 7445 case 'c': 7446 { 7447 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0); 7448 } 7449 break; 7450 case 't': 7451 { 7452 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0); 7453 } 7454 break; 7455 default: 7456 assert(false, "bad intrinsic"); 7457 break; 7458 } 7459 #ifdef _LP64 7460 movsd(Address(rsp, 0), xmm0); 7461 fld_d(Address(rsp, 0)); 7462 #endif // _LP64 7463 addptr(rsp, sizeof(jdouble)); 7464 if (num_fpu_regs_in_use > 1) { 7465 // Must save return value to stack and then restore entire FPU stack 7466 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 7467 for (int i = 0; i < num_fpu_regs_in_use; i++) { 7468 fld_d(Address(rsp, 0)); 7469 addptr(rsp, sizeof(jdouble)); 7470 } 7471 } 7472 popa(); 7473 7474 // Come here with result in F-TOS 7475 bind(done); 7476 7477 if (tmp != noreg) { 7478 pop(tmp); 7479 } 7480 } 7481 7482 7483 // Look up the method for a megamorphic invokeinterface call. 7484 // The target method is determined by <intf_klass, itable_index>. 7485 // The receiver klass is in recv_klass. 7486 // On success, the result will be in method_result, and execution falls through. 7487 // On failure, execution transfers to the given label. 7488 void MacroAssembler::lookup_interface_method(Register recv_klass, 7489 Register intf_klass, 7490 RegisterOrConstant itable_index, 7491 Register method_result, 7492 Register scan_temp, 7493 Label& L_no_such_interface) { 7494 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 7495 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 7496 "caller must use same register for non-constant itable index as for method"); 7497 7498 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 7499 int vtable_base = instanceKlass::vtable_start_offset() * wordSize; 7500 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 7501 int scan_step = itableOffsetEntry::size() * wordSize; 7502 int vte_size = vtableEntry::size() * wordSize; 7503 Address::ScaleFactor times_vte_scale = Address::times_ptr; 7504 assert(vte_size == wordSize, "else adjust times_vte_scale"); 7505 7506 movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize)); 7507 7508 // %%% Could store the aligned, prescaled offset in the klassoop. 7509 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 7510 if (HeapWordsPerLong > 1) { 7511 // Round up to align_object_offset boundary 7512 // see code for instanceKlass::start_of_itable! 7513 round_to(scan_temp, BytesPerLong); 7514 } 7515 7516 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 7517 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 7518 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 7519 7520 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 7521 // if (scan->interface() == intf) { 7522 // result = (klass + scan->offset() + itable_index); 7523 // } 7524 // } 7525 Label search, found_method; 7526 7527 for (int peel = 1; peel >= 0; peel--) { 7528 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 7529 cmpptr(intf_klass, method_result); 7530 7531 if (peel) { 7532 jccb(Assembler::equal, found_method); 7533 } else { 7534 jccb(Assembler::notEqual, search); 7535 // (invert the test to fall through to found_method...) 7536 } 7537 7538 if (!peel) break; 7539 7540 bind(search); 7541 7542 // Check that the previous entry is non-null. A null entry means that 7543 // the receiver class doesn't implement the interface, and wasn't the 7544 // same as when the caller was compiled. 7545 testptr(method_result, method_result); 7546 jcc(Assembler::zero, L_no_such_interface); 7547 addptr(scan_temp, scan_step); 7548 } 7549 7550 bind(found_method); 7551 7552 // Got a hit. 7553 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 7554 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 7555 } 7556 7557 7558 void MacroAssembler::check_klass_subtype(Register sub_klass, 7559 Register super_klass, 7560 Register temp_reg, 7561 Label& L_success) { 7562 Label L_failure; 7563 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 7564 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 7565 bind(L_failure); 7566 } 7567 7568 7569 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 7570 Register super_klass, 7571 Register temp_reg, 7572 Label* L_success, 7573 Label* L_failure, 7574 Label* L_slow_path, 7575 RegisterOrConstant super_check_offset) { 7576 assert_different_registers(sub_klass, super_klass, temp_reg); 7577 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 7578 if (super_check_offset.is_register()) { 7579 assert_different_registers(sub_klass, super_klass, 7580 super_check_offset.as_register()); 7581 } else if (must_load_sco) { 7582 assert(temp_reg != noreg, "supply either a temp or a register offset"); 7583 } 7584 7585 Label L_fallthrough; 7586 int label_nulls = 0; 7587 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 7588 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 7589 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 7590 assert(label_nulls <= 1, "at most one NULL in the batch"); 7591 7592 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 7593 Klass::secondary_super_cache_offset_in_bytes()); 7594 int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 7595 Klass::super_check_offset_offset_in_bytes()); 7596 Address super_check_offset_addr(super_klass, sco_offset); 7597 7598 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 7599 // range of a jccb. If this routine grows larger, reconsider at 7600 // least some of these. 7601 #define local_jcc(assembler_cond, label) \ 7602 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 7603 else jcc( assembler_cond, label) /*omit semi*/ 7604 7605 // Hacked jmp, which may only be used just before L_fallthrough. 7606 #define final_jmp(label) \ 7607 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 7608 else jmp(label) /*omit semi*/ 7609 7610 // If the pointers are equal, we are done (e.g., String[] elements). 7611 // This self-check enables sharing of secondary supertype arrays among 7612 // non-primary types such as array-of-interface. Otherwise, each such 7613 // type would need its own customized SSA. 7614 // We move this check to the front of the fast path because many 7615 // type checks are in fact trivially successful in this manner, 7616 // so we get a nicely predicted branch right at the start of the check. 7617 cmpptr(sub_klass, super_klass); 7618 local_jcc(Assembler::equal, *L_success); 7619 7620 // Check the supertype display: 7621 if (must_load_sco) { 7622 // Positive movl does right thing on LP64. 7623 movl(temp_reg, super_check_offset_addr); 7624 super_check_offset = RegisterOrConstant(temp_reg); 7625 } 7626 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 7627 cmpptr(super_klass, super_check_addr); // load displayed supertype 7628 7629 // This check has worked decisively for primary supers. 7630 // Secondary supers are sought in the super_cache ('super_cache_addr'). 7631 // (Secondary supers are interfaces and very deeply nested subtypes.) 7632 // This works in the same check above because of a tricky aliasing 7633 // between the super_cache and the primary super display elements. 7634 // (The 'super_check_addr' can address either, as the case requires.) 7635 // Note that the cache is updated below if it does not help us find 7636 // what we need immediately. 7637 // So if it was a primary super, we can just fail immediately. 7638 // Otherwise, it's the slow path for us (no success at this point). 7639 7640 if (super_check_offset.is_register()) { 7641 local_jcc(Assembler::equal, *L_success); 7642 cmpl(super_check_offset.as_register(), sc_offset); 7643 if (L_failure == &L_fallthrough) { 7644 local_jcc(Assembler::equal, *L_slow_path); 7645 } else { 7646 local_jcc(Assembler::notEqual, *L_failure); 7647 final_jmp(*L_slow_path); 7648 } 7649 } else if (super_check_offset.as_constant() == sc_offset) { 7650 // Need a slow path; fast failure is impossible. 7651 if (L_slow_path == &L_fallthrough) { 7652 local_jcc(Assembler::equal, *L_success); 7653 } else { 7654 local_jcc(Assembler::notEqual, *L_slow_path); 7655 final_jmp(*L_success); 7656 } 7657 } else { 7658 // No slow path; it's a fast decision. 7659 if (L_failure == &L_fallthrough) { 7660 local_jcc(Assembler::equal, *L_success); 7661 } else { 7662 local_jcc(Assembler::notEqual, *L_failure); 7663 final_jmp(*L_success); 7664 } 7665 } 7666 7667 bind(L_fallthrough); 7668 7669 #undef local_jcc 7670 #undef final_jmp 7671 } 7672 7673 7674 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 7675 Register super_klass, 7676 Register temp_reg, 7677 Register temp2_reg, 7678 Label* L_success, 7679 Label* L_failure, 7680 bool set_cond_codes) { 7681 assert_different_registers(sub_klass, super_klass, temp_reg); 7682 if (temp2_reg != noreg) 7683 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 7684 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 7685 7686 Label L_fallthrough; 7687 int label_nulls = 0; 7688 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 7689 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 7690 assert(label_nulls <= 1, "at most one NULL in the batch"); 7691 7692 // a couple of useful fields in sub_klass: 7693 int ss_offset = (klassOopDesc::header_size() * HeapWordSize + 7694 Klass::secondary_supers_offset_in_bytes()); 7695 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 7696 Klass::secondary_super_cache_offset_in_bytes()); 7697 Address secondary_supers_addr(sub_klass, ss_offset); 7698 Address super_cache_addr( sub_klass, sc_offset); 7699 7700 // Do a linear scan of the secondary super-klass chain. 7701 // This code is rarely used, so simplicity is a virtue here. 7702 // The repne_scan instruction uses fixed registers, which we must spill. 7703 // Don't worry too much about pre-existing connections with the input regs. 7704 7705 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 7706 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 7707 7708 // Get super_klass value into rax (even if it was in rdi or rcx). 7709 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 7710 if (super_klass != rax || UseCompressedOops) { 7711 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 7712 mov(rax, super_klass); 7713 } 7714 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 7715 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 7716 7717 #ifndef PRODUCT 7718 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 7719 ExternalAddress pst_counter_addr((address) pst_counter); 7720 NOT_LP64( incrementl(pst_counter_addr) ); 7721 LP64_ONLY( lea(rcx, pst_counter_addr) ); 7722 LP64_ONLY( incrementl(Address(rcx, 0)) ); 7723 #endif //PRODUCT 7724 7725 // We will consult the secondary-super array. 7726 movptr(rdi, secondary_supers_addr); 7727 // Load the array length. (Positive movl does right thing on LP64.) 7728 movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 7729 // Skip to start of data. 7730 addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 7731 7732 // Scan RCX words at [RDI] for an occurrence of RAX. 7733 // Set NZ/Z based on last compare. 7734 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 7735 // not change flags (only scas instruction which is repeated sets flags). 7736 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 7737 #ifdef _LP64 7738 // This part is tricky, as values in supers array could be 32 or 64 bit wide 7739 // and we store values in objArrays always encoded, thus we need to encode 7740 // the value of rax before repne. Note that rax is dead after the repne. 7741 if (UseCompressedOops) { 7742 encode_heap_oop_not_null(rax); // Changes flags. 7743 // The superclass is never null; it would be a basic system error if a null 7744 // pointer were to sneak in here. Note that we have already loaded the 7745 // Klass::super_check_offset from the super_klass in the fast path, 7746 // so if there is a null in that register, we are already in the afterlife. 7747 testl(rax,rax); // Set Z = 0 7748 repne_scanl(); 7749 } else 7750 #endif // _LP64 7751 { 7752 testptr(rax,rax); // Set Z = 0 7753 repne_scan(); 7754 } 7755 // Unspill the temp. registers: 7756 if (pushed_rdi) pop(rdi); 7757 if (pushed_rcx) pop(rcx); 7758 if (pushed_rax) pop(rax); 7759 7760 if (set_cond_codes) { 7761 // Special hack for the AD files: rdi is guaranteed non-zero. 7762 assert(!pushed_rdi, "rdi must be left non-NULL"); 7763 // Also, the condition codes are properly set Z/NZ on succeed/failure. 7764 } 7765 7766 if (L_failure == &L_fallthrough) 7767 jccb(Assembler::notEqual, *L_failure); 7768 else jcc(Assembler::notEqual, *L_failure); 7769 7770 // Success. Cache the super we found and proceed in triumph. 7771 movptr(super_cache_addr, super_klass); 7772 7773 if (L_success != &L_fallthrough) { 7774 jmp(*L_success); 7775 } 7776 7777 #undef IS_A_TEMP 7778 7779 bind(L_fallthrough); 7780 } 7781 7782 7783 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 7784 ucomisd(dst, as_Address(src)); 7785 } 7786 7787 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 7788 ucomiss(dst, as_Address(src)); 7789 } 7790 7791 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 7792 if (reachable(src)) { 7793 xorpd(dst, as_Address(src)); 7794 } else { 7795 lea(rscratch1, src); 7796 xorpd(dst, Address(rscratch1, 0)); 7797 } 7798 } 7799 7800 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 7801 if (reachable(src)) { 7802 xorps(dst, as_Address(src)); 7803 } else { 7804 lea(rscratch1, src); 7805 xorps(dst, Address(rscratch1, 0)); 7806 } 7807 } 7808 7809 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 7810 if (VM_Version::supports_cmov()) { 7811 cmovl(cc, dst, src); 7812 } else { 7813 Label L; 7814 jccb(negate_condition(cc), L); 7815 movl(dst, src); 7816 bind(L); 7817 } 7818 } 7819 7820 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { 7821 if (VM_Version::supports_cmov()) { 7822 cmovl(cc, dst, src); 7823 } else { 7824 Label L; 7825 jccb(negate_condition(cc), L); 7826 movl(dst, src); 7827 bind(L); 7828 } 7829 } 7830 7831 void MacroAssembler::verify_oop(Register reg, const char* s) { 7832 if (!VerifyOops) return; 7833 7834 // Pass register number to verify_oop_subroutine 7835 char* b = new char[strlen(s) + 50]; 7836 sprintf(b, "verify_oop: %s: %s", reg->name(), s); 7837 #ifdef _LP64 7838 push(rscratch1); // save r10, trashed by movptr() 7839 #endif 7840 push(rax); // save rax, 7841 push(reg); // pass register argument 7842 ExternalAddress buffer((address) b); 7843 // avoid using pushptr, as it modifies scratch registers 7844 // and our contract is not to modify anything 7845 movptr(rax, buffer.addr()); 7846 push(rax); 7847 // call indirectly to solve generation ordering problem 7848 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 7849 call(rax); 7850 // Caller pops the arguments (oop, message) and restores rax, r10 7851 } 7852 7853 7854 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 7855 Register tmp, 7856 int offset) { 7857 intptr_t value = *delayed_value_addr; 7858 if (value != 0) 7859 return RegisterOrConstant(value + offset); 7860 7861 // load indirectly to solve generation ordering problem 7862 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 7863 7864 #ifdef ASSERT 7865 { Label L; 7866 testptr(tmp, tmp); 7867 if (WizardMode) { 7868 jcc(Assembler::notZero, L); 7869 char* buf = new char[40]; 7870 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 7871 stop(buf); 7872 } else { 7873 jccb(Assembler::notZero, L); 7874 hlt(); 7875 } 7876 bind(L); 7877 } 7878 #endif 7879 7880 if (offset != 0) 7881 addptr(tmp, offset); 7882 7883 return RegisterOrConstant(tmp); 7884 } 7885 7886 7887 // registers on entry: 7888 // - rax ('check' register): required MethodType 7889 // - rcx: method handle 7890 // - rdx, rsi, or ?: killable temp 7891 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg, 7892 Register temp_reg, 7893 Label& wrong_method_type) { 7894 Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)); 7895 // compare method type against that of the receiver 7896 if (UseCompressedOops) { 7897 load_heap_oop(temp_reg, type_addr); 7898 cmpptr(mtype_reg, temp_reg); 7899 } else { 7900 cmpptr(mtype_reg, type_addr); 7901 } 7902 jcc(Assembler::notEqual, wrong_method_type); 7903 } 7904 7905 7906 // A method handle has a "vmslots" field which gives the size of its 7907 // argument list in JVM stack slots. This field is either located directly 7908 // in every method handle, or else is indirectly accessed through the 7909 // method handle's MethodType. This macro hides the distinction. 7910 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg, 7911 Register temp_reg) { 7912 assert_different_registers(vmslots_reg, mh_reg, temp_reg); 7913 // load mh.type.form.vmslots 7914 if (java_lang_invoke_MethodHandle::vmslots_offset_in_bytes() != 0) { 7915 // hoist vmslots into every mh to avoid dependent load chain 7916 movl(vmslots_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmslots_offset_in_bytes, temp_reg))); 7917 } else { 7918 Register temp2_reg = vmslots_reg; 7919 load_heap_oop(temp2_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg))); 7920 load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg))); 7921 movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg))); 7922 } 7923 } 7924 7925 7926 // registers on entry: 7927 // - rcx: method handle 7928 // - rdx: killable temp (interpreted only) 7929 // - rax: killable temp (compiled only) 7930 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) { 7931 assert(mh_reg == rcx, "caller must put MH object in rcx"); 7932 assert_different_registers(mh_reg, temp_reg); 7933 7934 // pick out the interpreted side of the handler 7935 // NOTE: vmentry is not an oop! 7936 movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg))); 7937 7938 // off we go... 7939 jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes())); 7940 7941 // for the various stubs which take control at this point, 7942 // see MethodHandles::generate_method_handle_stub 7943 } 7944 7945 7946 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 7947 int extra_slot_offset) { 7948 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 7949 int stackElementSize = Interpreter::stackElementSize; 7950 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 7951 #ifdef ASSERT 7952 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 7953 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 7954 #endif 7955 Register scale_reg = noreg; 7956 Address::ScaleFactor scale_factor = Address::no_scale; 7957 if (arg_slot.is_constant()) { 7958 offset += arg_slot.as_constant() * stackElementSize; 7959 } else { 7960 scale_reg = arg_slot.as_register(); 7961 scale_factor = Address::times(stackElementSize); 7962 } 7963 offset += wordSize; // return PC is on stack 7964 return Address(rsp, scale_reg, scale_factor, offset); 7965 } 7966 7967 7968 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 7969 if (!VerifyOops) return; 7970 7971 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 7972 // Pass register number to verify_oop_subroutine 7973 char* b = new char[strlen(s) + 50]; 7974 sprintf(b, "verify_oop_addr: %s", s); 7975 7976 #ifdef _LP64 7977 push(rscratch1); // save r10, trashed by movptr() 7978 #endif 7979 push(rax); // save rax, 7980 // addr may contain rsp so we will have to adjust it based on the push 7981 // we just did (and on 64 bit we do two pushes) 7982 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 7983 // stores rax into addr which is backwards of what was intended. 7984 if (addr.uses(rsp)) { 7985 lea(rax, addr); 7986 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); 7987 } else { 7988 pushptr(addr); 7989 } 7990 7991 ExternalAddress buffer((address) b); 7992 // pass msg argument 7993 // avoid using pushptr, as it modifies scratch registers 7994 // and our contract is not to modify anything 7995 movptr(rax, buffer.addr()); 7996 push(rax); 7997 7998 // call indirectly to solve generation ordering problem 7999 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 8000 call(rax); 8001 // Caller pops the arguments (addr, message) and restores rax, r10. 8002 } 8003 8004 void MacroAssembler::verify_tlab() { 8005 #ifdef ASSERT 8006 if (UseTLAB && VerifyOops) { 8007 Label next, ok; 8008 Register t1 = rsi; 8009 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 8010 8011 push(t1); 8012 NOT_LP64(push(thread_reg)); 8013 NOT_LP64(get_thread(thread_reg)); 8014 8015 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8016 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 8017 jcc(Assembler::aboveEqual, next); 8018 stop("assert(top >= start)"); 8019 should_not_reach_here(); 8020 8021 bind(next); 8022 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 8023 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8024 jcc(Assembler::aboveEqual, ok); 8025 stop("assert(top <= end)"); 8026 should_not_reach_here(); 8027 8028 bind(ok); 8029 NOT_LP64(pop(thread_reg)); 8030 pop(t1); 8031 } 8032 #endif 8033 } 8034 8035 class ControlWord { 8036 public: 8037 int32_t _value; 8038 8039 int rounding_control() const { return (_value >> 10) & 3 ; } 8040 int precision_control() const { return (_value >> 8) & 3 ; } 8041 bool precision() const { return ((_value >> 5) & 1) != 0; } 8042 bool underflow() const { return ((_value >> 4) & 1) != 0; } 8043 bool overflow() const { return ((_value >> 3) & 1) != 0; } 8044 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 8045 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 8046 bool invalid() const { return ((_value >> 0) & 1) != 0; } 8047 8048 void print() const { 8049 // rounding control 8050 const char* rc; 8051 switch (rounding_control()) { 8052 case 0: rc = "round near"; break; 8053 case 1: rc = "round down"; break; 8054 case 2: rc = "round up "; break; 8055 case 3: rc = "chop "; break; 8056 }; 8057 // precision control 8058 const char* pc; 8059 switch (precision_control()) { 8060 case 0: pc = "24 bits "; break; 8061 case 1: pc = "reserved"; break; 8062 case 2: pc = "53 bits "; break; 8063 case 3: pc = "64 bits "; break; 8064 }; 8065 // flags 8066 char f[9]; 8067 f[0] = ' '; 8068 f[1] = ' '; 8069 f[2] = (precision ()) ? 'P' : 'p'; 8070 f[3] = (underflow ()) ? 'U' : 'u'; 8071 f[4] = (overflow ()) ? 'O' : 'o'; 8072 f[5] = (zero_divide ()) ? 'Z' : 'z'; 8073 f[6] = (denormalized()) ? 'D' : 'd'; 8074 f[7] = (invalid ()) ? 'I' : 'i'; 8075 f[8] = '\x0'; 8076 // output 8077 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 8078 } 8079 8080 }; 8081 8082 class StatusWord { 8083 public: 8084 int32_t _value; 8085 8086 bool busy() const { return ((_value >> 15) & 1) != 0; } 8087 bool C3() const { return ((_value >> 14) & 1) != 0; } 8088 bool C2() const { return ((_value >> 10) & 1) != 0; } 8089 bool C1() const { return ((_value >> 9) & 1) != 0; } 8090 bool C0() const { return ((_value >> 8) & 1) != 0; } 8091 int top() const { return (_value >> 11) & 7 ; } 8092 bool error_status() const { return ((_value >> 7) & 1) != 0; } 8093 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 8094 bool precision() const { return ((_value >> 5) & 1) != 0; } 8095 bool underflow() const { return ((_value >> 4) & 1) != 0; } 8096 bool overflow() const { return ((_value >> 3) & 1) != 0; } 8097 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 8098 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 8099 bool invalid() const { return ((_value >> 0) & 1) != 0; } 8100 8101 void print() const { 8102 // condition codes 8103 char c[5]; 8104 c[0] = (C3()) ? '3' : '-'; 8105 c[1] = (C2()) ? '2' : '-'; 8106 c[2] = (C1()) ? '1' : '-'; 8107 c[3] = (C0()) ? '0' : '-'; 8108 c[4] = '\x0'; 8109 // flags 8110 char f[9]; 8111 f[0] = (error_status()) ? 'E' : '-'; 8112 f[1] = (stack_fault ()) ? 'S' : '-'; 8113 f[2] = (precision ()) ? 'P' : '-'; 8114 f[3] = (underflow ()) ? 'U' : '-'; 8115 f[4] = (overflow ()) ? 'O' : '-'; 8116 f[5] = (zero_divide ()) ? 'Z' : '-'; 8117 f[6] = (denormalized()) ? 'D' : '-'; 8118 f[7] = (invalid ()) ? 'I' : '-'; 8119 f[8] = '\x0'; 8120 // output 8121 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 8122 } 8123 8124 }; 8125 8126 class TagWord { 8127 public: 8128 int32_t _value; 8129 8130 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 8131 8132 void print() const { 8133 printf("%04x", _value & 0xFFFF); 8134 } 8135 8136 }; 8137 8138 class FPU_Register { 8139 public: 8140 int32_t _m0; 8141 int32_t _m1; 8142 int16_t _ex; 8143 8144 bool is_indefinite() const { 8145 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 8146 } 8147 8148 void print() const { 8149 char sign = (_ex < 0) ? '-' : '+'; 8150 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 8151 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 8152 }; 8153 8154 }; 8155 8156 class FPU_State { 8157 public: 8158 enum { 8159 register_size = 10, 8160 number_of_registers = 8, 8161 register_mask = 7 8162 }; 8163 8164 ControlWord _control_word; 8165 StatusWord _status_word; 8166 TagWord _tag_word; 8167 int32_t _error_offset; 8168 int32_t _error_selector; 8169 int32_t _data_offset; 8170 int32_t _data_selector; 8171 int8_t _register[register_size * number_of_registers]; 8172 8173 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 8174 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 8175 8176 const char* tag_as_string(int tag) const { 8177 switch (tag) { 8178 case 0: return "valid"; 8179 case 1: return "zero"; 8180 case 2: return "special"; 8181 case 3: return "empty"; 8182 } 8183 ShouldNotReachHere(); 8184 return NULL; 8185 } 8186 8187 void print() const { 8188 // print computation registers 8189 { int t = _status_word.top(); 8190 for (int i = 0; i < number_of_registers; i++) { 8191 int j = (i - t) & register_mask; 8192 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 8193 st(j)->print(); 8194 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 8195 } 8196 } 8197 printf("\n"); 8198 // print control registers 8199 printf("ctrl = "); _control_word.print(); printf("\n"); 8200 printf("stat = "); _status_word .print(); printf("\n"); 8201 printf("tags = "); _tag_word .print(); printf("\n"); 8202 } 8203 8204 }; 8205 8206 class Flag_Register { 8207 public: 8208 int32_t _value; 8209 8210 bool overflow() const { return ((_value >> 11) & 1) != 0; } 8211 bool direction() const { return ((_value >> 10) & 1) != 0; } 8212 bool sign() const { return ((_value >> 7) & 1) != 0; } 8213 bool zero() const { return ((_value >> 6) & 1) != 0; } 8214 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 8215 bool parity() const { return ((_value >> 2) & 1) != 0; } 8216 bool carry() const { return ((_value >> 0) & 1) != 0; } 8217 8218 void print() const { 8219 // flags 8220 char f[8]; 8221 f[0] = (overflow ()) ? 'O' : '-'; 8222 f[1] = (direction ()) ? 'D' : '-'; 8223 f[2] = (sign ()) ? 'S' : '-'; 8224 f[3] = (zero ()) ? 'Z' : '-'; 8225 f[4] = (auxiliary_carry()) ? 'A' : '-'; 8226 f[5] = (parity ()) ? 'P' : '-'; 8227 f[6] = (carry ()) ? 'C' : '-'; 8228 f[7] = '\x0'; 8229 // output 8230 printf("%08x flags = %s", _value, f); 8231 } 8232 8233 }; 8234 8235 class IU_Register { 8236 public: 8237 int32_t _value; 8238 8239 void print() const { 8240 printf("%08x %11d", _value, _value); 8241 } 8242 8243 }; 8244 8245 class IU_State { 8246 public: 8247 Flag_Register _eflags; 8248 IU_Register _rdi; 8249 IU_Register _rsi; 8250 IU_Register _rbp; 8251 IU_Register _rsp; 8252 IU_Register _rbx; 8253 IU_Register _rdx; 8254 IU_Register _rcx; 8255 IU_Register _rax; 8256 8257 void print() const { 8258 // computation registers 8259 printf("rax, = "); _rax.print(); printf("\n"); 8260 printf("rbx, = "); _rbx.print(); printf("\n"); 8261 printf("rcx = "); _rcx.print(); printf("\n"); 8262 printf("rdx = "); _rdx.print(); printf("\n"); 8263 printf("rdi = "); _rdi.print(); printf("\n"); 8264 printf("rsi = "); _rsi.print(); printf("\n"); 8265 printf("rbp, = "); _rbp.print(); printf("\n"); 8266 printf("rsp = "); _rsp.print(); printf("\n"); 8267 printf("\n"); 8268 // control registers 8269 printf("flgs = "); _eflags.print(); printf("\n"); 8270 } 8271 }; 8272 8273 8274 class CPU_State { 8275 public: 8276 FPU_State _fpu_state; 8277 IU_State _iu_state; 8278 8279 void print() const { 8280 printf("--------------------------------------------------\n"); 8281 _iu_state .print(); 8282 printf("\n"); 8283 _fpu_state.print(); 8284 printf("--------------------------------------------------\n"); 8285 } 8286 8287 }; 8288 8289 8290 static void _print_CPU_state(CPU_State* state) { 8291 state->print(); 8292 }; 8293 8294 8295 void MacroAssembler::print_CPU_state() { 8296 push_CPU_state(); 8297 push(rsp); // pass CPU state 8298 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 8299 addptr(rsp, wordSize); // discard argument 8300 pop_CPU_state(); 8301 } 8302 8303 8304 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 8305 static int counter = 0; 8306 FPU_State* fs = &state->_fpu_state; 8307 counter++; 8308 // For leaf calls, only verify that the top few elements remain empty. 8309 // We only need 1 empty at the top for C2 code. 8310 if( stack_depth < 0 ) { 8311 if( fs->tag_for_st(7) != 3 ) { 8312 printf("FPR7 not empty\n"); 8313 state->print(); 8314 assert(false, "error"); 8315 return false; 8316 } 8317 return true; // All other stack states do not matter 8318 } 8319 8320 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 8321 "bad FPU control word"); 8322 8323 // compute stack depth 8324 int i = 0; 8325 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 8326 int d = i; 8327 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 8328 // verify findings 8329 if (i != FPU_State::number_of_registers) { 8330 // stack not contiguous 8331 printf("%s: stack not contiguous at ST%d\n", s, i); 8332 state->print(); 8333 assert(false, "error"); 8334 return false; 8335 } 8336 // check if computed stack depth corresponds to expected stack depth 8337 if (stack_depth < 0) { 8338 // expected stack depth is -stack_depth or less 8339 if (d > -stack_depth) { 8340 // too many elements on the stack 8341 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 8342 state->print(); 8343 assert(false, "error"); 8344 return false; 8345 } 8346 } else { 8347 // expected stack depth is stack_depth 8348 if (d != stack_depth) { 8349 // wrong stack depth 8350 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 8351 state->print(); 8352 assert(false, "error"); 8353 return false; 8354 } 8355 } 8356 // everything is cool 8357 return true; 8358 } 8359 8360 8361 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 8362 if (!VerifyFPU) return; 8363 push_CPU_state(); 8364 push(rsp); // pass CPU state 8365 ExternalAddress msg((address) s); 8366 // pass message string s 8367 pushptr(msg.addr()); 8368 push(stack_depth); // pass stack depth 8369 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 8370 addptr(rsp, 3 * wordSize); // discard arguments 8371 // check for error 8372 { Label L; 8373 testl(rax, rax); 8374 jcc(Assembler::notZero, L); 8375 int3(); // break if error condition 8376 bind(L); 8377 } 8378 pop_CPU_state(); 8379 } 8380 8381 void MacroAssembler::load_klass(Register dst, Register src) { 8382 #ifdef _LP64 8383 if (UseCompressedOops) { 8384 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8385 decode_heap_oop_not_null(dst); 8386 } else 8387 #endif 8388 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8389 } 8390 8391 void MacroAssembler::load_prototype_header(Register dst, Register src) { 8392 #ifdef _LP64 8393 if (UseCompressedOops) { 8394 assert (Universe::heap() != NULL, "java heap should be initialized"); 8395 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8396 if (Universe::narrow_oop_shift() != 0) { 8397 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8398 if (LogMinObjAlignmentInBytes == Address::times_8) { 8399 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8400 } else { 8401 // OK to use shift since we don't need to preserve flags. 8402 shlq(dst, LogMinObjAlignmentInBytes); 8403 movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8404 } 8405 } else { 8406 movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8407 } 8408 } else 8409 #endif 8410 { 8411 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8412 movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8413 } 8414 } 8415 8416 void MacroAssembler::store_klass(Register dst, Register src) { 8417 #ifdef _LP64 8418 if (UseCompressedOops) { 8419 encode_heap_oop_not_null(src); 8420 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 8421 } else 8422 #endif 8423 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 8424 } 8425 8426 void MacroAssembler::load_heap_oop(Register dst, Address src) { 8427 #ifdef _LP64 8428 if (UseCompressedOops) { 8429 movl(dst, src); 8430 decode_heap_oop(dst); 8431 } else 8432 #endif 8433 movptr(dst, src); 8434 } 8435 8436 // Doesn't do verfication, generates fixed size code 8437 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { 8438 #ifdef _LP64 8439 if (UseCompressedOops) { 8440 movl(dst, src); 8441 decode_heap_oop_not_null(dst); 8442 } else 8443 #endif 8444 movptr(dst, src); 8445 } 8446 8447 void MacroAssembler::store_heap_oop(Address dst, Register src) { 8448 #ifdef _LP64 8449 if (UseCompressedOops) { 8450 assert(!dst.uses(src), "not enough registers"); 8451 encode_heap_oop(src); 8452 movl(dst, src); 8453 } else 8454 #endif 8455 movptr(dst, src); 8456 } 8457 8458 // Used for storing NULLs. 8459 void MacroAssembler::store_heap_oop_null(Address dst) { 8460 #ifdef _LP64 8461 if (UseCompressedOops) { 8462 movl(dst, (int32_t)NULL_WORD); 8463 } else { 8464 movslq(dst, (int32_t)NULL_WORD); 8465 } 8466 #else 8467 movl(dst, (int32_t)NULL_WORD); 8468 #endif 8469 } 8470 8471 #ifdef _LP64 8472 void MacroAssembler::store_klass_gap(Register dst, Register src) { 8473 if (UseCompressedOops) { 8474 // Store to klass gap in destination 8475 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 8476 } 8477 } 8478 8479 #ifdef ASSERT 8480 void MacroAssembler::verify_heapbase(const char* msg) { 8481 assert (UseCompressedOops, "should be compressed"); 8482 assert (Universe::heap() != NULL, "java heap should be initialized"); 8483 if (CheckCompressedOops) { 8484 Label ok; 8485 push(rscratch1); // cmpptr trashes rscratch1 8486 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 8487 jcc(Assembler::equal, ok); 8488 stop(msg); 8489 bind(ok); 8490 pop(rscratch1); 8491 } 8492 } 8493 #endif 8494 8495 // Algorithm must match oop.inline.hpp encode_heap_oop. 8496 void MacroAssembler::encode_heap_oop(Register r) { 8497 #ifdef ASSERT 8498 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 8499 #endif 8500 verify_oop(r, "broken oop in encode_heap_oop"); 8501 if (Universe::narrow_oop_base() == NULL) { 8502 if (Universe::narrow_oop_shift() != 0) { 8503 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8504 shrq(r, LogMinObjAlignmentInBytes); 8505 } 8506 return; 8507 } 8508 testq(r, r); 8509 cmovq(Assembler::equal, r, r12_heapbase); 8510 subq(r, r12_heapbase); 8511 shrq(r, LogMinObjAlignmentInBytes); 8512 } 8513 8514 void MacroAssembler::encode_heap_oop_not_null(Register r) { 8515 #ifdef ASSERT 8516 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 8517 if (CheckCompressedOops) { 8518 Label ok; 8519 testq(r, r); 8520 jcc(Assembler::notEqual, ok); 8521 stop("null oop passed to encode_heap_oop_not_null"); 8522 bind(ok); 8523 } 8524 #endif 8525 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 8526 if (Universe::narrow_oop_base() != NULL) { 8527 subq(r, r12_heapbase); 8528 } 8529 if (Universe::narrow_oop_shift() != 0) { 8530 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8531 shrq(r, LogMinObjAlignmentInBytes); 8532 } 8533 } 8534 8535 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 8536 #ifdef ASSERT 8537 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 8538 if (CheckCompressedOops) { 8539 Label ok; 8540 testq(src, src); 8541 jcc(Assembler::notEqual, ok); 8542 stop("null oop passed to encode_heap_oop_not_null2"); 8543 bind(ok); 8544 } 8545 #endif 8546 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 8547 if (dst != src) { 8548 movq(dst, src); 8549 } 8550 if (Universe::narrow_oop_base() != NULL) { 8551 subq(dst, r12_heapbase); 8552 } 8553 if (Universe::narrow_oop_shift() != 0) { 8554 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8555 shrq(dst, LogMinObjAlignmentInBytes); 8556 } 8557 } 8558 8559 void MacroAssembler::decode_heap_oop(Register r) { 8560 #ifdef ASSERT 8561 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 8562 #endif 8563 if (Universe::narrow_oop_base() == NULL) { 8564 if (Universe::narrow_oop_shift() != 0) { 8565 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8566 shlq(r, LogMinObjAlignmentInBytes); 8567 } 8568 } else { 8569 Label done; 8570 shlq(r, LogMinObjAlignmentInBytes); 8571 jccb(Assembler::equal, done); 8572 addq(r, r12_heapbase); 8573 bind(done); 8574 } 8575 verify_oop(r, "broken oop in decode_heap_oop"); 8576 } 8577 8578 void MacroAssembler::decode_heap_oop_not_null(Register r) { 8579 // Note: it will change flags 8580 assert (UseCompressedOops, "should only be used for compressed headers"); 8581 assert (Universe::heap() != NULL, "java heap should be initialized"); 8582 // Cannot assert, unverified entry point counts instructions (see .ad file) 8583 // vtableStubs also counts instructions in pd_code_size_limit. 8584 // Also do not verify_oop as this is called by verify_oop. 8585 if (Universe::narrow_oop_shift() != 0) { 8586 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8587 shlq(r, LogMinObjAlignmentInBytes); 8588 if (Universe::narrow_oop_base() != NULL) { 8589 addq(r, r12_heapbase); 8590 } 8591 } else { 8592 assert (Universe::narrow_oop_base() == NULL, "sanity"); 8593 } 8594 } 8595 8596 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 8597 // Note: it will change flags 8598 assert (UseCompressedOops, "should only be used for compressed headers"); 8599 assert (Universe::heap() != NULL, "java heap should be initialized"); 8600 // Cannot assert, unverified entry point counts instructions (see .ad file) 8601 // vtableStubs also counts instructions in pd_code_size_limit. 8602 // Also do not verify_oop as this is called by verify_oop. 8603 if (Universe::narrow_oop_shift() != 0) { 8604 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8605 if (LogMinObjAlignmentInBytes == Address::times_8) { 8606 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 8607 } else { 8608 if (dst != src) { 8609 movq(dst, src); 8610 } 8611 shlq(dst, LogMinObjAlignmentInBytes); 8612 if (Universe::narrow_oop_base() != NULL) { 8613 addq(dst, r12_heapbase); 8614 } 8615 } 8616 } else { 8617 assert (Universe::narrow_oop_base() == NULL, "sanity"); 8618 if (dst != src) { 8619 movq(dst, src); 8620 } 8621 } 8622 } 8623 8624 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 8625 assert (UseCompressedOops, "should only be used for compressed headers"); 8626 assert (Universe::heap() != NULL, "java heap should be initialized"); 8627 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8628 int oop_index = oop_recorder()->find_index(obj); 8629 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8630 mov_narrow_oop(dst, oop_index, rspec); 8631 } 8632 8633 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 8634 assert (UseCompressedOops, "should only be used for compressed headers"); 8635 assert (Universe::heap() != NULL, "java heap should be initialized"); 8636 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8637 int oop_index = oop_recorder()->find_index(obj); 8638 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8639 mov_narrow_oop(dst, oop_index, rspec); 8640 } 8641 8642 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 8643 assert (UseCompressedOops, "should only be used for compressed headers"); 8644 assert (Universe::heap() != NULL, "java heap should be initialized"); 8645 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8646 int oop_index = oop_recorder()->find_index(obj); 8647 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8648 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 8649 } 8650 8651 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 8652 assert (UseCompressedOops, "should only be used for compressed headers"); 8653 assert (Universe::heap() != NULL, "java heap should be initialized"); 8654 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8655 int oop_index = oop_recorder()->find_index(obj); 8656 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8657 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 8658 } 8659 8660 void MacroAssembler::reinit_heapbase() { 8661 if (UseCompressedOops) { 8662 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 8663 } 8664 } 8665 #endif // _LP64 8666 8667 // IndexOf for constant substrings with size >= 8 chars 8668 // which don't need to be loaded through stack. 8669 void MacroAssembler::string_indexofC8(Register str1, Register str2, 8670 Register cnt1, Register cnt2, 8671 int int_cnt2, Register result, 8672 XMMRegister vec, Register tmp) { 8673 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 8674 8675 // This method uses pcmpestri inxtruction with bound registers 8676 // inputs: 8677 // xmm - substring 8678 // rax - substring length (elements count) 8679 // mem - scanned string 8680 // rdx - string length (elements count) 8681 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 8682 // outputs: 8683 // rcx - matched index in string 8684 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 8685 8686 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, 8687 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, 8688 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; 8689 8690 // Note, inline_string_indexOf() generates checks: 8691 // if (substr.count > string.count) return -1; 8692 // if (substr.count == 0) return 0; 8693 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); 8694 8695 // Load substring. 8696 movdqu(vec, Address(str2, 0)); 8697 movl(cnt2, int_cnt2); 8698 movptr(result, str1); // string addr 8699 8700 if (int_cnt2 > 8) { 8701 jmpb(SCAN_TO_SUBSTR); 8702 8703 // Reload substr for rescan, this code 8704 // is executed only for large substrings (> 8 chars) 8705 bind(RELOAD_SUBSTR); 8706 movdqu(vec, Address(str2, 0)); 8707 negptr(cnt2); // Jumped here with negative cnt2, convert to positive 8708 8709 bind(RELOAD_STR); 8710 // We came here after the beginning of the substring was 8711 // matched but the rest of it was not so we need to search 8712 // again. Start from the next element after the previous match. 8713 8714 // cnt2 is number of substring reminding elements and 8715 // cnt1 is number of string reminding elements when cmp failed. 8716 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 8717 subl(cnt1, cnt2); 8718 addl(cnt1, int_cnt2); 8719 movl(cnt2, int_cnt2); // Now restore cnt2 8720 8721 decrementl(cnt1); // Shift to next element 8722 cmpl(cnt1, cnt2); 8723 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 8724 8725 addptr(result, 2); 8726 8727 } // (int_cnt2 > 8) 8728 8729 // Scan string for start of substr in 16-byte vectors 8730 bind(SCAN_TO_SUBSTR); 8731 pcmpestri(vec, Address(result, 0), 0x0d); 8732 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 8733 subl(cnt1, 8); 8734 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 8735 cmpl(cnt1, cnt2); 8736 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 8737 addptr(result, 16); 8738 jmpb(SCAN_TO_SUBSTR); 8739 8740 // Found a potential substr 8741 bind(FOUND_CANDIDATE); 8742 // Matched whole vector if first element matched (tmp(rcx) == 0). 8743 if (int_cnt2 == 8) { 8744 jccb(Assembler::overflow, RET_FOUND); // OF == 1 8745 } else { // int_cnt2 > 8 8746 jccb(Assembler::overflow, FOUND_SUBSTR); 8747 } 8748 // After pcmpestri tmp(rcx) contains matched element index 8749 // Compute start addr of substr 8750 lea(result, Address(result, tmp, Address::times_2)); 8751 8752 // Make sure string is still long enough 8753 subl(cnt1, tmp); 8754 cmpl(cnt1, cnt2); 8755 if (int_cnt2 == 8) { 8756 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 8757 } else { // int_cnt2 > 8 8758 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); 8759 } 8760 // Left less then substring. 8761 8762 bind(RET_NOT_FOUND); 8763 movl(result, -1); 8764 jmpb(EXIT); 8765 8766 if (int_cnt2 > 8) { 8767 // This code is optimized for the case when whole substring 8768 // is matched if its head is matched. 8769 bind(MATCH_SUBSTR_HEAD); 8770 pcmpestri(vec, Address(result, 0), 0x0d); 8771 // Reload only string if does not match 8772 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 8773 8774 Label CONT_SCAN_SUBSTR; 8775 // Compare the rest of substring (> 8 chars). 8776 bind(FOUND_SUBSTR); 8777 // First 8 chars are already matched. 8778 negptr(cnt2); 8779 addptr(cnt2, 8); 8780 8781 bind(SCAN_SUBSTR); 8782 subl(cnt1, 8); 8783 cmpl(cnt2, -8); // Do not read beyond substring 8784 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); 8785 // Back-up strings to avoid reading beyond substring: 8786 // cnt1 = cnt1 - cnt2 + 8 8787 addl(cnt1, cnt2); // cnt2 is negative 8788 addl(cnt1, 8); 8789 movl(cnt2, 8); negptr(cnt2); 8790 bind(CONT_SCAN_SUBSTR); 8791 if (int_cnt2 < (int)G) { 8792 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); 8793 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); 8794 } else { 8795 // calculate index in register to avoid integer overflow (int_cnt2*2) 8796 movl(tmp, int_cnt2); 8797 addptr(tmp, cnt2); 8798 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); 8799 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 8800 } 8801 // Need to reload strings pointers if not matched whole vector 8802 jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 8803 addptr(cnt2, 8); 8804 jccb(Assembler::negative, SCAN_SUBSTR); 8805 // Fall through if found full substring 8806 8807 } // (int_cnt2 > 8) 8808 8809 bind(RET_FOUND); 8810 // Found result if we matched full small substring. 8811 // Compute substr offset 8812 subptr(result, str1); 8813 shrl(result, 1); // index 8814 bind(EXIT); 8815 8816 } // string_indexofC8 8817 8818 // Small strings are loaded through stack if they cross page boundary. 8819 void MacroAssembler::string_indexof(Register str1, Register str2, 8820 Register cnt1, Register cnt2, 8821 int int_cnt2, Register result, 8822 XMMRegister vec, Register tmp) { 8823 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 8824 // 8825 // int_cnt2 is length of small (< 8 chars) constant substring 8826 // or (-1) for non constant substring in which case its length 8827 // is in cnt2 register. 8828 // 8829 // Note, inline_string_indexOf() generates checks: 8830 // if (substr.count > string.count) return -1; 8831 // if (substr.count == 0) return 0; 8832 // 8833 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); 8834 8835 // This method uses pcmpestri inxtruction with bound registers 8836 // inputs: 8837 // xmm - substring 8838 // rax - substring length (elements count) 8839 // mem - scanned string 8840 // rdx - string length (elements count) 8841 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 8842 // outputs: 8843 // rcx - matched index in string 8844 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 8845 8846 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, 8847 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, 8848 FOUND_CANDIDATE; 8849 8850 { //======================================================== 8851 // We don't know where these strings are located 8852 // and we can't read beyond them. Load them through stack. 8853 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; 8854 8855 movptr(tmp, rsp); // save old SP 8856 8857 if (int_cnt2 > 0) { // small (< 8 chars) constant substring 8858 if (int_cnt2 == 1) { // One char 8859 load_unsigned_short(result, Address(str2, 0)); 8860 movdl(vec, result); // move 32 bits 8861 } else if (int_cnt2 == 2) { // Two chars 8862 movdl(vec, Address(str2, 0)); // move 32 bits 8863 } else if (int_cnt2 == 4) { // Four chars 8864 movq(vec, Address(str2, 0)); // move 64 bits 8865 } else { // cnt2 = { 3, 5, 6, 7 } 8866 // Array header size is 12 bytes in 32-bit VM 8867 // + 6 bytes for 3 chars == 18 bytes, 8868 // enough space to load vec and shift. 8869 assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity"); 8870 movdqu(vec, Address(str2, (int_cnt2*2)-16)); 8871 psrldq(vec, 16-(int_cnt2*2)); 8872 } 8873 } else { // not constant substring 8874 cmpl(cnt2, 8); 8875 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough 8876 8877 // We can read beyond string if srt+16 does not cross page boundary 8878 // since heaps are aligned and mapped by pages. 8879 assert(os::vm_page_size() < (int)G, "default page should be small"); 8880 movl(result, str2); // We need only low 32 bits 8881 andl(result, (os::vm_page_size()-1)); 8882 cmpl(result, (os::vm_page_size()-16)); 8883 jccb(Assembler::belowEqual, CHECK_STR); 8884 8885 // Move small strings to stack to allow load 16 bytes into vec. 8886 subptr(rsp, 16); 8887 int stk_offset = wordSize-2; 8888 push(cnt2); 8889 8890 bind(COPY_SUBSTR); 8891 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); 8892 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 8893 decrement(cnt2); 8894 jccb(Assembler::notZero, COPY_SUBSTR); 8895 8896 pop(cnt2); 8897 movptr(str2, rsp); // New substring address 8898 } // non constant 8899 8900 bind(CHECK_STR); 8901 cmpl(cnt1, 8); 8902 jccb(Assembler::aboveEqual, BIG_STRINGS); 8903 8904 // Check cross page boundary. 8905 movl(result, str1); // We need only low 32 bits 8906 andl(result, (os::vm_page_size()-1)); 8907 cmpl(result, (os::vm_page_size()-16)); 8908 jccb(Assembler::belowEqual, BIG_STRINGS); 8909 8910 subptr(rsp, 16); 8911 int stk_offset = -2; 8912 if (int_cnt2 < 0) { // not constant 8913 push(cnt2); 8914 stk_offset += wordSize; 8915 } 8916 movl(cnt2, cnt1); 8917 8918 bind(COPY_STR); 8919 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); 8920 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 8921 decrement(cnt2); 8922 jccb(Assembler::notZero, COPY_STR); 8923 8924 if (int_cnt2 < 0) { // not constant 8925 pop(cnt2); 8926 } 8927 movptr(str1, rsp); // New string address 8928 8929 bind(BIG_STRINGS); 8930 // Load substring. 8931 if (int_cnt2 < 0) { // -1 8932 movdqu(vec, Address(str2, 0)); 8933 push(cnt2); // substr count 8934 push(str2); // substr addr 8935 push(str1); // string addr 8936 } else { 8937 // Small (< 8 chars) constant substrings are loaded already. 8938 movl(cnt2, int_cnt2); 8939 } 8940 push(tmp); // original SP 8941 8942 } // Finished loading 8943 8944 //======================================================== 8945 // Start search 8946 // 8947 8948 movptr(result, str1); // string addr 8949 8950 if (int_cnt2 < 0) { // Only for non constant substring 8951 jmpb(SCAN_TO_SUBSTR); 8952 8953 // SP saved at sp+0 8954 // String saved at sp+1*wordSize 8955 // Substr saved at sp+2*wordSize 8956 // Substr count saved at sp+3*wordSize 8957 8958 // Reload substr for rescan, this code 8959 // is executed only for large substrings (> 8 chars) 8960 bind(RELOAD_SUBSTR); 8961 movptr(str2, Address(rsp, 2*wordSize)); 8962 movl(cnt2, Address(rsp, 3*wordSize)); 8963 movdqu(vec, Address(str2, 0)); 8964 // We came here after the beginning of the substring was 8965 // matched but the rest of it was not so we need to search 8966 // again. Start from the next element after the previous match. 8967 subptr(str1, result); // Restore counter 8968 shrl(str1, 1); 8969 addl(cnt1, str1); 8970 decrementl(cnt1); // Shift to next element 8971 cmpl(cnt1, cnt2); 8972 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 8973 8974 addptr(result, 2); 8975 } // non constant 8976 8977 // Scan string for start of substr in 16-byte vectors 8978 bind(SCAN_TO_SUBSTR); 8979 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 8980 pcmpestri(vec, Address(result, 0), 0x0d); 8981 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 8982 subl(cnt1, 8); 8983 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 8984 cmpl(cnt1, cnt2); 8985 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 8986 addptr(result, 16); 8987 8988 bind(ADJUST_STR); 8989 cmpl(cnt1, 8); // Do not read beyond string 8990 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 8991 // Back-up string to avoid reading beyond string. 8992 lea(result, Address(result, cnt1, Address::times_2, -16)); 8993 movl(cnt1, 8); 8994 jmpb(SCAN_TO_SUBSTR); 8995 8996 // Found a potential substr 8997 bind(FOUND_CANDIDATE); 8998 // After pcmpestri tmp(rcx) contains matched element index 8999 9000 // Make sure string is still long enough 9001 subl(cnt1, tmp); 9002 cmpl(cnt1, cnt2); 9003 jccb(Assembler::greaterEqual, FOUND_SUBSTR); 9004 // Left less then substring. 9005 9006 bind(RET_NOT_FOUND); 9007 movl(result, -1); 9008 jmpb(CLEANUP); 9009 9010 bind(FOUND_SUBSTR); 9011 // Compute start addr of substr 9012 lea(result, Address(result, tmp, Address::times_2)); 9013 9014 if (int_cnt2 > 0) { // Constant substring 9015 // Repeat search for small substring (< 8 chars) 9016 // from new point without reloading substring. 9017 // Have to check that we don't read beyond string. 9018 cmpl(tmp, 8-int_cnt2); 9019 jccb(Assembler::greater, ADJUST_STR); 9020 // Fall through if matched whole substring. 9021 } else { // non constant 9022 assert(int_cnt2 == -1, "should be != 0"); 9023 9024 addl(tmp, cnt2); 9025 // Found result if we matched whole substring. 9026 cmpl(tmp, 8); 9027 jccb(Assembler::lessEqual, RET_FOUND); 9028 9029 // Repeat search for small substring (<= 8 chars) 9030 // from new point 'str1' without reloading substring. 9031 cmpl(cnt2, 8); 9032 // Have to check that we don't read beyond string. 9033 jccb(Assembler::lessEqual, ADJUST_STR); 9034 9035 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; 9036 // Compare the rest of substring (> 8 chars). 9037 movptr(str1, result); 9038 9039 cmpl(tmp, cnt2); 9040 // First 8 chars are already matched. 9041 jccb(Assembler::equal, CHECK_NEXT); 9042 9043 bind(SCAN_SUBSTR); 9044 pcmpestri(vec, Address(str1, 0), 0x0d); 9045 // Need to reload strings pointers if not matched whole vector 9046 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 9047 9048 bind(CHECK_NEXT); 9049 subl(cnt2, 8); 9050 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring 9051 addptr(str1, 16); 9052 addptr(str2, 16); 9053 subl(cnt1, 8); 9054 cmpl(cnt2, 8); // Do not read beyond substring 9055 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); 9056 // Back-up strings to avoid reading beyond substring. 9057 lea(str2, Address(str2, cnt2, Address::times_2, -16)); 9058 lea(str1, Address(str1, cnt2, Address::times_2, -16)); 9059 subl(cnt1, cnt2); 9060 movl(cnt2, 8); 9061 addl(cnt1, 8); 9062 bind(CONT_SCAN_SUBSTR); 9063 movdqu(vec, Address(str2, 0)); 9064 jmpb(SCAN_SUBSTR); 9065 9066 bind(RET_FOUND_LONG); 9067 movptr(str1, Address(rsp, wordSize)); 9068 } // non constant 9069 9070 bind(RET_FOUND); 9071 // Compute substr offset 9072 subptr(result, str1); 9073 shrl(result, 1); // index 9074 9075 bind(CLEANUP); 9076 pop(rsp); // restore SP 9077 9078 } // string_indexof 9079 9080 // Compare strings. 9081 void MacroAssembler::string_compare(Register str1, Register str2, 9082 Register cnt1, Register cnt2, Register result, 9083 XMMRegister vec1) { 9084 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 9085 9086 // Compute the minimum of the string lengths and the 9087 // difference of the string lengths (stack). 9088 // Do the conditional move stuff 9089 movl(result, cnt1); 9090 subl(cnt1, cnt2); 9091 push(cnt1); 9092 cmov32(Assembler::lessEqual, cnt2, result); 9093 9094 // Is the minimum length zero? 9095 testl(cnt2, cnt2); 9096 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 9097 9098 // Load first characters 9099 load_unsigned_short(result, Address(str1, 0)); 9100 load_unsigned_short(cnt1, Address(str2, 0)); 9101 9102 // Compare first characters 9103 subl(result, cnt1); 9104 jcc(Assembler::notZero, POP_LABEL); 9105 decrementl(cnt2); 9106 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 9107 9108 { 9109 // Check after comparing first character to see if strings are equivalent 9110 Label LSkip2; 9111 // Check if the strings start at same location 9112 cmpptr(str1, str2); 9113 jccb(Assembler::notEqual, LSkip2); 9114 9115 // Check if the length difference is zero (from stack) 9116 cmpl(Address(rsp, 0), 0x0); 9117 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 9118 9119 // Strings might not be equivalent 9120 bind(LSkip2); 9121 } 9122 9123 Address::ScaleFactor scale = Address::times_2; 9124 int stride = 8; 9125 9126 // Advance to next element 9127 addptr(str1, 16/stride); 9128 addptr(str2, 16/stride); 9129 9130 if (UseSSE42Intrinsics) { 9131 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 9132 int pcmpmask = 0x19; 9133 // Setup to compare 16-byte vectors 9134 movl(result, cnt2); 9135 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 9136 jccb(Assembler::zero, COMPARE_TAIL); 9137 9138 lea(str1, Address(str1, result, scale)); 9139 lea(str2, Address(str2, result, scale)); 9140 negptr(result); 9141 9142 // pcmpestri 9143 // inputs: 9144 // vec1- substring 9145 // rax - negative string length (elements count) 9146 // mem - scaned string 9147 // rdx - string length (elements count) 9148 // pcmpmask - cmp mode: 11000 (string compare with negated result) 9149 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 9150 // outputs: 9151 // rcx - first mismatched element index 9152 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 9153 9154 bind(COMPARE_WIDE_VECTORS); 9155 movdqu(vec1, Address(str1, result, scale)); 9156 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 9157 // After pcmpestri cnt1(rcx) contains mismatched element index 9158 9159 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 9160 addptr(result, stride); 9161 subptr(cnt2, stride); 9162 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); 9163 9164 // compare wide vectors tail 9165 testl(result, result); 9166 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 9167 9168 movl(cnt2, stride); 9169 movl(result, stride); 9170 negptr(result); 9171 movdqu(vec1, Address(str1, result, scale)); 9172 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 9173 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); 9174 9175 // Mismatched characters in the vectors 9176 bind(VECTOR_NOT_EQUAL); 9177 addptr(result, cnt1); 9178 movptr(cnt2, result); 9179 load_unsigned_short(result, Address(str1, cnt2, scale)); 9180 load_unsigned_short(cnt1, Address(str2, cnt2, scale)); 9181 subl(result, cnt1); 9182 jmpb(POP_LABEL); 9183 9184 bind(COMPARE_TAIL); // limit is zero 9185 movl(cnt2, result); 9186 // Fallthru to tail compare 9187 } 9188 9189 // Shift str2 and str1 to the end of the arrays, negate min 9190 lea(str1, Address(str1, cnt2, scale, 0)); 9191 lea(str2, Address(str2, cnt2, scale, 0)); 9192 negptr(cnt2); 9193 9194 // Compare the rest of the elements 9195 bind(WHILE_HEAD_LABEL); 9196 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); 9197 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); 9198 subl(result, cnt1); 9199 jccb(Assembler::notZero, POP_LABEL); 9200 increment(cnt2); 9201 jccb(Assembler::notZero, WHILE_HEAD_LABEL); 9202 9203 // Strings are equal up to min length. Return the length difference. 9204 bind(LENGTH_DIFF_LABEL); 9205 pop(result); 9206 jmpb(DONE_LABEL); 9207 9208 // Discard the stored length difference 9209 bind(POP_LABEL); 9210 pop(cnt1); 9211 9212 // That's it 9213 bind(DONE_LABEL); 9214 } 9215 9216 // Compare char[] arrays aligned to 4 bytes or substrings. 9217 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 9218 Register limit, Register result, Register chr, 9219 XMMRegister vec1, XMMRegister vec2) { 9220 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 9221 9222 int length_offset = arrayOopDesc::length_offset_in_bytes(); 9223 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 9224 9225 // Check the input args 9226 cmpptr(ary1, ary2); 9227 jcc(Assembler::equal, TRUE_LABEL); 9228 9229 if (is_array_equ) { 9230 // Need additional checks for arrays_equals. 9231 testptr(ary1, ary1); 9232 jcc(Assembler::zero, FALSE_LABEL); 9233 testptr(ary2, ary2); 9234 jcc(Assembler::zero, FALSE_LABEL); 9235 9236 // Check the lengths 9237 movl(limit, Address(ary1, length_offset)); 9238 cmpl(limit, Address(ary2, length_offset)); 9239 jcc(Assembler::notEqual, FALSE_LABEL); 9240 } 9241 9242 // count == 0 9243 testl(limit, limit); 9244 jcc(Assembler::zero, TRUE_LABEL); 9245 9246 if (is_array_equ) { 9247 // Load array address 9248 lea(ary1, Address(ary1, base_offset)); 9249 lea(ary2, Address(ary2, base_offset)); 9250 } 9251 9252 shll(limit, 1); // byte count != 0 9253 movl(result, limit); // copy 9254 9255 if (UseSSE42Intrinsics) { 9256 // With SSE4.2, use double quad vector compare 9257 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 9258 9259 // Compare 16-byte vectors 9260 andl(result, 0x0000000e); // tail count (in bytes) 9261 andl(limit, 0xfffffff0); // vector count (in bytes) 9262 jccb(Assembler::zero, COMPARE_TAIL); 9263 9264 lea(ary1, Address(ary1, limit, Address::times_1)); 9265 lea(ary2, Address(ary2, limit, Address::times_1)); 9266 negptr(limit); 9267 9268 bind(COMPARE_WIDE_VECTORS); 9269 movdqu(vec1, Address(ary1, limit, Address::times_1)); 9270 movdqu(vec2, Address(ary2, limit, Address::times_1)); 9271 pxor(vec1, vec2); 9272 9273 ptest(vec1, vec1); 9274 jccb(Assembler::notZero, FALSE_LABEL); 9275 addptr(limit, 16); 9276 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 9277 9278 testl(result, result); 9279 jccb(Assembler::zero, TRUE_LABEL); 9280 9281 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); 9282 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); 9283 pxor(vec1, vec2); 9284 9285 ptest(vec1, vec1); 9286 jccb(Assembler::notZero, FALSE_LABEL); 9287 jmpb(TRUE_LABEL); 9288 9289 bind(COMPARE_TAIL); // limit is zero 9290 movl(limit, result); 9291 // Fallthru to tail compare 9292 } 9293 9294 // Compare 4-byte vectors 9295 andl(limit, 0xfffffffc); // vector count (in bytes) 9296 jccb(Assembler::zero, COMPARE_CHAR); 9297 9298 lea(ary1, Address(ary1, limit, Address::times_1)); 9299 lea(ary2, Address(ary2, limit, Address::times_1)); 9300 negptr(limit); 9301 9302 bind(COMPARE_VECTORS); 9303 movl(chr, Address(ary1, limit, Address::times_1)); 9304 cmpl(chr, Address(ary2, limit, Address::times_1)); 9305 jccb(Assembler::notEqual, FALSE_LABEL); 9306 addptr(limit, 4); 9307 jcc(Assembler::notZero, COMPARE_VECTORS); 9308 9309 // Compare trailing char (final 2 bytes), if any 9310 bind(COMPARE_CHAR); 9311 testl(result, 0x2); // tail char 9312 jccb(Assembler::zero, TRUE_LABEL); 9313 load_unsigned_short(chr, Address(ary1, 0)); 9314 load_unsigned_short(limit, Address(ary2, 0)); 9315 cmpl(chr, limit); 9316 jccb(Assembler::notEqual, FALSE_LABEL); 9317 9318 bind(TRUE_LABEL); 9319 movl(result, 1); // return true 9320 jmpb(DONE); 9321 9322 bind(FALSE_LABEL); 9323 xorl(result, result); // return false 9324 9325 // That's it 9326 bind(DONE); 9327 } 9328 9329 #ifdef PRODUCT 9330 #define BLOCK_COMMENT(str) /* nothing */ 9331 #else 9332 #define BLOCK_COMMENT(str) block_comment(str) 9333 #endif 9334 9335 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 9336 void MacroAssembler::generate_fill(BasicType t, bool aligned, 9337 Register to, Register value, Register count, 9338 Register rtmp, XMMRegister xtmp) { 9339 assert_different_registers(to, value, count, rtmp); 9340 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 9341 Label L_fill_2_bytes, L_fill_4_bytes; 9342 9343 int shift = -1; 9344 switch (t) { 9345 case T_BYTE: 9346 shift = 2; 9347 break; 9348 case T_SHORT: 9349 shift = 1; 9350 break; 9351 case T_INT: 9352 shift = 0; 9353 break; 9354 default: ShouldNotReachHere(); 9355 } 9356 9357 if (t == T_BYTE) { 9358 andl(value, 0xff); 9359 movl(rtmp, value); 9360 shll(rtmp, 8); 9361 orl(value, rtmp); 9362 } 9363 if (t == T_SHORT) { 9364 andl(value, 0xffff); 9365 } 9366 if (t == T_BYTE || t == T_SHORT) { 9367 movl(rtmp, value); 9368 shll(rtmp, 16); 9369 orl(value, rtmp); 9370 } 9371 9372 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 9373 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 9374 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 9375 // align source address at 4 bytes address boundary 9376 if (t == T_BYTE) { 9377 // One byte misalignment happens only for byte arrays 9378 testptr(to, 1); 9379 jccb(Assembler::zero, L_skip_align1); 9380 movb(Address(to, 0), value); 9381 increment(to); 9382 decrement(count); 9383 BIND(L_skip_align1); 9384 } 9385 // Two bytes misalignment happens only for byte and short (char) arrays 9386 testptr(to, 2); 9387 jccb(Assembler::zero, L_skip_align2); 9388 movw(Address(to, 0), value); 9389 addptr(to, 2); 9390 subl(count, 1<<(shift-1)); 9391 BIND(L_skip_align2); 9392 } 9393 if (UseSSE < 2) { 9394 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 9395 // Fill 32-byte chunks 9396 subl(count, 8 << shift); 9397 jcc(Assembler::less, L_check_fill_8_bytes); 9398 align(16); 9399 9400 BIND(L_fill_32_bytes_loop); 9401 9402 for (int i = 0; i < 32; i += 4) { 9403 movl(Address(to, i), value); 9404 } 9405 9406 addptr(to, 32); 9407 subl(count, 8 << shift); 9408 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 9409 BIND(L_check_fill_8_bytes); 9410 addl(count, 8 << shift); 9411 jccb(Assembler::zero, L_exit); 9412 jmpb(L_fill_8_bytes); 9413 9414 // 9415 // length is too short, just fill qwords 9416 // 9417 BIND(L_fill_8_bytes_loop); 9418 movl(Address(to, 0), value); 9419 movl(Address(to, 4), value); 9420 addptr(to, 8); 9421 BIND(L_fill_8_bytes); 9422 subl(count, 1 << (shift + 1)); 9423 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 9424 // fall through to fill 4 bytes 9425 } else { 9426 Label L_fill_32_bytes; 9427 if (!UseUnalignedLoadStores) { 9428 // align to 8 bytes, we know we are 4 byte aligned to start 9429 testptr(to, 4); 9430 jccb(Assembler::zero, L_fill_32_bytes); 9431 movl(Address(to, 0), value); 9432 addptr(to, 4); 9433 subl(count, 1<<shift); 9434 } 9435 BIND(L_fill_32_bytes); 9436 { 9437 assert( UseSSE >= 2, "supported cpu only" ); 9438 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 9439 // Fill 32-byte chunks 9440 movdl(xtmp, value); 9441 pshufd(xtmp, xtmp, 0); 9442 9443 subl(count, 8 << shift); 9444 jcc(Assembler::less, L_check_fill_8_bytes); 9445 align(16); 9446 9447 BIND(L_fill_32_bytes_loop); 9448 9449 if (UseUnalignedLoadStores) { 9450 movdqu(Address(to, 0), xtmp); 9451 movdqu(Address(to, 16), xtmp); 9452 } else { 9453 movq(Address(to, 0), xtmp); 9454 movq(Address(to, 8), xtmp); 9455 movq(Address(to, 16), xtmp); 9456 movq(Address(to, 24), xtmp); 9457 } 9458 9459 addptr(to, 32); 9460 subl(count, 8 << shift); 9461 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 9462 BIND(L_check_fill_8_bytes); 9463 addl(count, 8 << shift); 9464 jccb(Assembler::zero, L_exit); 9465 jmpb(L_fill_8_bytes); 9466 9467 // 9468 // length is too short, just fill qwords 9469 // 9470 BIND(L_fill_8_bytes_loop); 9471 movq(Address(to, 0), xtmp); 9472 addptr(to, 8); 9473 BIND(L_fill_8_bytes); 9474 subl(count, 1 << (shift + 1)); 9475 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 9476 } 9477 } 9478 // fill trailing 4 bytes 9479 BIND(L_fill_4_bytes); 9480 testl(count, 1<<shift); 9481 jccb(Assembler::zero, L_fill_2_bytes); 9482 movl(Address(to, 0), value); 9483 if (t == T_BYTE || t == T_SHORT) { 9484 addptr(to, 4); 9485 BIND(L_fill_2_bytes); 9486 // fill trailing 2 bytes 9487 testl(count, 1<<(shift-1)); 9488 jccb(Assembler::zero, L_fill_byte); 9489 movw(Address(to, 0), value); 9490 if (t == T_BYTE) { 9491 addptr(to, 2); 9492 BIND(L_fill_byte); 9493 // fill trailing byte 9494 testl(count, 1); 9495 jccb(Assembler::zero, L_exit); 9496 movb(Address(to, 0), value); 9497 } else { 9498 BIND(L_fill_byte); 9499 } 9500 } else { 9501 BIND(L_fill_2_bytes); 9502 } 9503 BIND(L_exit); 9504 } 9505 #undef BIND 9506 #undef BLOCK_COMMENT 9507 9508 9509 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 9510 switch (cond) { 9511 // Note some conditions are synonyms for others 9512 case Assembler::zero: return Assembler::notZero; 9513 case Assembler::notZero: return Assembler::zero; 9514 case Assembler::less: return Assembler::greaterEqual; 9515 case Assembler::lessEqual: return Assembler::greater; 9516 case Assembler::greater: return Assembler::lessEqual; 9517 case Assembler::greaterEqual: return Assembler::less; 9518 case Assembler::below: return Assembler::aboveEqual; 9519 case Assembler::belowEqual: return Assembler::above; 9520 case Assembler::above: return Assembler::belowEqual; 9521 case Assembler::aboveEqual: return Assembler::below; 9522 case Assembler::overflow: return Assembler::noOverflow; 9523 case Assembler::noOverflow: return Assembler::overflow; 9524 case Assembler::negative: return Assembler::positive; 9525 case Assembler::positive: return Assembler::negative; 9526 case Assembler::parity: return Assembler::noParity; 9527 case Assembler::noParity: return Assembler::parity; 9528 } 9529 ShouldNotReachHere(); return Assembler::overflow; 9530 } 9531 9532 SkipIfEqual::SkipIfEqual( 9533 MacroAssembler* masm, const bool* flag_addr, bool value) { 9534 _masm = masm; 9535 _masm->cmp8(ExternalAddress((address)flag_addr), value); 9536 _masm->jcc(Assembler::equal, _label); 9537 } 9538 9539 SkipIfEqual::~SkipIfEqual() { 9540 _masm->bind(_label); 9541 }