1 /* 2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "assembler_x86.inline.hpp" 27 #include "gc_interface/collectedHeap.inline.hpp" 28 #include "interpreter/interpreter.hpp" 29 #include "memory/cardTableModRefBS.hpp" 30 #include "memory/resourceArea.hpp" 31 #include "prims/methodHandles.hpp" 32 #include "runtime/biasedLocking.hpp" 33 #include "runtime/interfaceSupport.hpp" 34 #include "runtime/objectMonitor.hpp" 35 #include "runtime/os.hpp" 36 #include "runtime/sharedRuntime.hpp" 37 #include "runtime/stubRoutines.hpp" 38 #ifndef SERIALGC 39 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 40 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 41 #include "gc_implementation/g1/heapRegion.hpp" 42 #endif 43 44 // Implementation of AddressLiteral 45 46 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { 47 _is_lval = false; 48 _target = target; 49 switch (rtype) { 50 case relocInfo::oop_type: 51 // Oops are a special case. Normally they would be their own section 52 // but in cases like icBuffer they are literals in the code stream that 53 // we don't have a section for. We use none so that we get a literal address 54 // which is always patchable. 55 break; 56 case relocInfo::external_word_type: 57 _rspec = external_word_Relocation::spec(target); 58 break; 59 case relocInfo::internal_word_type: 60 _rspec = internal_word_Relocation::spec(target); 61 break; 62 case relocInfo::opt_virtual_call_type: 63 _rspec = opt_virtual_call_Relocation::spec(); 64 break; 65 case relocInfo::static_call_type: 66 _rspec = static_call_Relocation::spec(); 67 break; 68 case relocInfo::runtime_call_type: 69 _rspec = runtime_call_Relocation::spec(); 70 break; 71 case relocInfo::poll_type: 72 case relocInfo::poll_return_type: 73 _rspec = Relocation::spec_simple(rtype); 74 break; 75 case relocInfo::none: 76 break; 77 default: 78 ShouldNotReachHere(); 79 break; 80 } 81 } 82 83 // Implementation of Address 84 85 #ifdef _LP64 86 87 Address Address::make_array(ArrayAddress adr) { 88 // Not implementable on 64bit machines 89 // Should have been handled higher up the call chain. 90 ShouldNotReachHere(); 91 return Address(); 92 } 93 94 // exceedingly dangerous constructor 95 Address::Address(int disp, address loc, relocInfo::relocType rtype) { 96 _base = noreg; 97 _index = noreg; 98 _scale = no_scale; 99 _disp = disp; 100 switch (rtype) { 101 case relocInfo::external_word_type: 102 _rspec = external_word_Relocation::spec(loc); 103 break; 104 case relocInfo::internal_word_type: 105 _rspec = internal_word_Relocation::spec(loc); 106 break; 107 case relocInfo::runtime_call_type: 108 // HMM 109 _rspec = runtime_call_Relocation::spec(); 110 break; 111 case relocInfo::poll_type: 112 case relocInfo::poll_return_type: 113 _rspec = Relocation::spec_simple(rtype); 114 break; 115 case relocInfo::none: 116 break; 117 default: 118 ShouldNotReachHere(); 119 } 120 } 121 #else // LP64 122 123 Address Address::make_array(ArrayAddress adr) { 124 AddressLiteral base = adr.base(); 125 Address index = adr.index(); 126 assert(index._disp == 0, "must not have disp"); // maybe it can? 127 Address array(index._base, index._index, index._scale, (intptr_t) base.target()); 128 array._rspec = base._rspec; 129 return array; 130 } 131 132 // exceedingly dangerous constructor 133 Address::Address(address loc, RelocationHolder spec) { 134 _base = noreg; 135 _index = noreg; 136 _scale = no_scale; 137 _disp = (intptr_t) loc; 138 _rspec = spec; 139 } 140 141 #endif // _LP64 142 143 144 145 // Convert the raw encoding form into the form expected by the constructor for 146 // Address. An index of 4 (rsp) corresponds to having no index, so convert 147 // that to noreg for the Address constructor. 148 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) { 149 RelocationHolder rspec; 150 if (disp_is_oop) { 151 rspec = Relocation::spec_simple(relocInfo::oop_type); 152 } 153 bool valid_index = index != rsp->encoding(); 154 if (valid_index) { 155 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); 156 madr._rspec = rspec; 157 return madr; 158 } else { 159 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); 160 madr._rspec = rspec; 161 return madr; 162 } 163 } 164 165 // Implementation of Assembler 166 167 int AbstractAssembler::code_fill_byte() { 168 return (u_char)'\xF4'; // hlt 169 } 170 171 // make this go away someday 172 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { 173 if (rtype == relocInfo::none) 174 emit_long(data); 175 else emit_data(data, Relocation::spec_simple(rtype), format); 176 } 177 178 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { 179 assert(imm_operand == 0, "default format must be immediate in this file"); 180 assert(inst_mark() != NULL, "must be inside InstructionMark"); 181 if (rspec.type() != relocInfo::none) { 182 #ifdef ASSERT 183 check_relocation(rspec, format); 184 #endif 185 // Do not use AbstractAssembler::relocate, which is not intended for 186 // embedded words. Instead, relocate to the enclosing instruction. 187 188 // hack. call32 is too wide for mask so use disp32 189 if (format == call32_operand) 190 code_section()->relocate(inst_mark(), rspec, disp32_operand); 191 else 192 code_section()->relocate(inst_mark(), rspec, format); 193 } 194 emit_long(data); 195 } 196 197 static int encode(Register r) { 198 int enc = r->encoding(); 199 if (enc >= 8) { 200 enc -= 8; 201 } 202 return enc; 203 } 204 205 static int encode(XMMRegister r) { 206 int enc = r->encoding(); 207 if (enc >= 8) { 208 enc -= 8; 209 } 210 return enc; 211 } 212 213 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { 214 assert(dst->has_byte_register(), "must have byte register"); 215 assert(isByte(op1) && isByte(op2), "wrong opcode"); 216 assert(isByte(imm8), "not a byte"); 217 assert((op1 & 0x01) == 0, "should be 8bit operation"); 218 emit_byte(op1); 219 emit_byte(op2 | encode(dst)); 220 emit_byte(imm8); 221 } 222 223 224 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 225 assert(isByte(op1) && isByte(op2), "wrong opcode"); 226 assert((op1 & 0x01) == 1, "should be 32bit operation"); 227 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 228 if (is8bit(imm32)) { 229 emit_byte(op1 | 0x02); // set sign bit 230 emit_byte(op2 | encode(dst)); 231 emit_byte(imm32 & 0xFF); 232 } else { 233 emit_byte(op1); 234 emit_byte(op2 | encode(dst)); 235 emit_long(imm32); 236 } 237 } 238 239 // immediate-to-memory forms 240 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 241 assert((op1 & 0x01) == 1, "should be 32bit operation"); 242 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 243 if (is8bit(imm32)) { 244 emit_byte(op1 | 0x02); // set sign bit 245 emit_operand(rm, adr, 1); 246 emit_byte(imm32 & 0xFF); 247 } else { 248 emit_byte(op1); 249 emit_operand(rm, adr, 4); 250 emit_long(imm32); 251 } 252 } 253 254 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) { 255 LP64_ONLY(ShouldNotReachHere()); 256 assert(isByte(op1) && isByte(op2), "wrong opcode"); 257 assert((op1 & 0x01) == 1, "should be 32bit operation"); 258 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 259 InstructionMark im(this); 260 emit_byte(op1); 261 emit_byte(op2 | encode(dst)); 262 emit_data((intptr_t)obj, relocInfo::oop_type, 0); 263 } 264 265 266 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { 267 assert(isByte(op1) && isByte(op2), "wrong opcode"); 268 emit_byte(op1); 269 emit_byte(op2 | encode(dst) << 3 | encode(src)); 270 } 271 272 273 void Assembler::emit_operand(Register reg, Register base, Register index, 274 Address::ScaleFactor scale, int disp, 275 RelocationHolder const& rspec, 276 int rip_relative_correction) { 277 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type(); 278 279 // Encode the registers as needed in the fields they are used in 280 281 int regenc = encode(reg) << 3; 282 int indexenc = index->is_valid() ? encode(index) << 3 : 0; 283 int baseenc = base->is_valid() ? encode(base) : 0; 284 285 if (base->is_valid()) { 286 if (index->is_valid()) { 287 assert(scale != Address::no_scale, "inconsistent address"); 288 // [base + index*scale + disp] 289 if (disp == 0 && rtype == relocInfo::none && 290 base != rbp LP64_ONLY(&& base != r13)) { 291 // [base + index*scale] 292 // [00 reg 100][ss index base] 293 assert(index != rsp, "illegal addressing mode"); 294 emit_byte(0x04 | regenc); 295 emit_byte(scale << 6 | indexenc | baseenc); 296 } else if (is8bit(disp) && rtype == relocInfo::none) { 297 // [base + index*scale + imm8] 298 // [01 reg 100][ss index base] imm8 299 assert(index != rsp, "illegal addressing mode"); 300 emit_byte(0x44 | regenc); 301 emit_byte(scale << 6 | indexenc | baseenc); 302 emit_byte(disp & 0xFF); 303 } else { 304 // [base + index*scale + disp32] 305 // [10 reg 100][ss index base] disp32 306 assert(index != rsp, "illegal addressing mode"); 307 emit_byte(0x84 | regenc); 308 emit_byte(scale << 6 | indexenc | baseenc); 309 emit_data(disp, rspec, disp32_operand); 310 } 311 } else if (base == rsp LP64_ONLY(|| base == r12)) { 312 // [rsp + disp] 313 if (disp == 0 && rtype == relocInfo::none) { 314 // [rsp] 315 // [00 reg 100][00 100 100] 316 emit_byte(0x04 | regenc); 317 emit_byte(0x24); 318 } else if (is8bit(disp) && rtype == relocInfo::none) { 319 // [rsp + imm8] 320 // [01 reg 100][00 100 100] disp8 321 emit_byte(0x44 | regenc); 322 emit_byte(0x24); 323 emit_byte(disp & 0xFF); 324 } else { 325 // [rsp + imm32] 326 // [10 reg 100][00 100 100] disp32 327 emit_byte(0x84 | regenc); 328 emit_byte(0x24); 329 emit_data(disp, rspec, disp32_operand); 330 } 331 } else { 332 // [base + disp] 333 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode"); 334 if (disp == 0 && rtype == relocInfo::none && 335 base != rbp LP64_ONLY(&& base != r13)) { 336 // [base] 337 // [00 reg base] 338 emit_byte(0x00 | regenc | baseenc); 339 } else if (is8bit(disp) && rtype == relocInfo::none) { 340 // [base + disp8] 341 // [01 reg base] disp8 342 emit_byte(0x40 | regenc | baseenc); 343 emit_byte(disp & 0xFF); 344 } else { 345 // [base + disp32] 346 // [10 reg base] disp32 347 emit_byte(0x80 | regenc | baseenc); 348 emit_data(disp, rspec, disp32_operand); 349 } 350 } 351 } else { 352 if (index->is_valid()) { 353 assert(scale != Address::no_scale, "inconsistent address"); 354 // [index*scale + disp] 355 // [00 reg 100][ss index 101] disp32 356 assert(index != rsp, "illegal addressing mode"); 357 emit_byte(0x04 | regenc); 358 emit_byte(scale << 6 | indexenc | 0x05); 359 emit_data(disp, rspec, disp32_operand); 360 } else if (rtype != relocInfo::none ) { 361 // [disp] (64bit) RIP-RELATIVE (32bit) abs 362 // [00 000 101] disp32 363 364 emit_byte(0x05 | regenc); 365 // Note that the RIP-rel. correction applies to the generated 366 // disp field, but _not_ to the target address in the rspec. 367 368 // disp was created by converting the target address minus the pc 369 // at the start of the instruction. That needs more correction here. 370 // intptr_t disp = target - next_ip; 371 assert(inst_mark() != NULL, "must be inside InstructionMark"); 372 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction; 373 int64_t adjusted = disp; 374 // Do rip-rel adjustment for 64bit 375 LP64_ONLY(adjusted -= (next_ip - inst_mark())); 376 assert(is_simm32(adjusted), 377 "must be 32bit offset (RIP relative address)"); 378 emit_data((int32_t) adjusted, rspec, disp32_operand); 379 380 } else { 381 // 32bit never did this, did everything as the rip-rel/disp code above 382 // [disp] ABSOLUTE 383 // [00 reg 100][00 100 101] disp32 384 emit_byte(0x04 | regenc); 385 emit_byte(0x25); 386 emit_data(disp, rspec, disp32_operand); 387 } 388 } 389 } 390 391 void Assembler::emit_operand(XMMRegister reg, Register base, Register index, 392 Address::ScaleFactor scale, int disp, 393 RelocationHolder const& rspec) { 394 emit_operand((Register)reg, base, index, scale, disp, rspec); 395 } 396 397 // Secret local extension to Assembler::WhichOperand: 398 #define end_pc_operand (_WhichOperand_limit) 399 400 address Assembler::locate_operand(address inst, WhichOperand which) { 401 // Decode the given instruction, and return the address of 402 // an embedded 32-bit operand word. 403 404 // If "which" is disp32_operand, selects the displacement portion 405 // of an effective address specifier. 406 // If "which" is imm64_operand, selects the trailing immediate constant. 407 // If "which" is call32_operand, selects the displacement of a call or jump. 408 // Caller is responsible for ensuring that there is such an operand, 409 // and that it is 32/64 bits wide. 410 411 // If "which" is end_pc_operand, find the end of the instruction. 412 413 address ip = inst; 414 bool is_64bit = false; 415 416 debug_only(bool has_disp32 = false); 417 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn 418 419 again_after_prefix: 420 switch (0xFF & *ip++) { 421 422 // These convenience macros generate groups of "case" labels for the switch. 423 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3 424 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \ 425 case (x)+4: case (x)+5: case (x)+6: case (x)+7 426 #define REP16(x) REP8((x)+0): \ 427 case REP8((x)+8) 428 429 case CS_segment: 430 case SS_segment: 431 case DS_segment: 432 case ES_segment: 433 case FS_segment: 434 case GS_segment: 435 // Seems dubious 436 LP64_ONLY(assert(false, "shouldn't have that prefix")); 437 assert(ip == inst+1, "only one prefix allowed"); 438 goto again_after_prefix; 439 440 case 0x67: 441 case REX: 442 case REX_B: 443 case REX_X: 444 case REX_XB: 445 case REX_R: 446 case REX_RB: 447 case REX_RX: 448 case REX_RXB: 449 NOT_LP64(assert(false, "64bit prefixes")); 450 goto again_after_prefix; 451 452 case REX_W: 453 case REX_WB: 454 case REX_WX: 455 case REX_WXB: 456 case REX_WR: 457 case REX_WRB: 458 case REX_WRX: 459 case REX_WRXB: 460 NOT_LP64(assert(false, "64bit prefixes")); 461 is_64bit = true; 462 goto again_after_prefix; 463 464 case 0xFF: // pushq a; decl a; incl a; call a; jmp a 465 case 0x88: // movb a, r 466 case 0x89: // movl a, r 467 case 0x8A: // movb r, a 468 case 0x8B: // movl r, a 469 case 0x8F: // popl a 470 debug_only(has_disp32 = true); 471 break; 472 473 case 0x68: // pushq #32 474 if (which == end_pc_operand) { 475 return ip + 4; 476 } 477 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate"); 478 return ip; // not produced by emit_operand 479 480 case 0x66: // movw ... (size prefix) 481 again_after_size_prefix2: 482 switch (0xFF & *ip++) { 483 case REX: 484 case REX_B: 485 case REX_X: 486 case REX_XB: 487 case REX_R: 488 case REX_RB: 489 case REX_RX: 490 case REX_RXB: 491 case REX_W: 492 case REX_WB: 493 case REX_WX: 494 case REX_WXB: 495 case REX_WR: 496 case REX_WRB: 497 case REX_WRX: 498 case REX_WRXB: 499 NOT_LP64(assert(false, "64bit prefix found")); 500 goto again_after_size_prefix2; 501 case 0x8B: // movw r, a 502 case 0x89: // movw a, r 503 debug_only(has_disp32 = true); 504 break; 505 case 0xC7: // movw a, #16 506 debug_only(has_disp32 = true); 507 tail_size = 2; // the imm16 508 break; 509 case 0x0F: // several SSE/SSE2 variants 510 ip--; // reparse the 0x0F 511 goto again_after_prefix; 512 default: 513 ShouldNotReachHere(); 514 } 515 break; 516 517 case REP8(0xB8): // movl/q r, #32/#64(oop?) 518 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4); 519 // these asserts are somewhat nonsensical 520 #ifndef _LP64 521 assert(which == imm_operand || which == disp32_operand, ""); 522 #else 523 assert((which == call32_operand || which == imm_operand) && is_64bit || 524 which == narrow_oop_operand && !is_64bit, ""); 525 #endif // _LP64 526 return ip; 527 528 case 0x69: // imul r, a, #32 529 case 0xC7: // movl a, #32(oop?) 530 tail_size = 4; 531 debug_only(has_disp32 = true); // has both kinds of operands! 532 break; 533 534 case 0x0F: // movx..., etc. 535 switch (0xFF & *ip++) { 536 case 0x3A: // pcmpestri 537 tail_size = 1; 538 case 0x38: // ptest, pmovzxbw 539 ip++; // skip opcode 540 debug_only(has_disp32 = true); // has both kinds of operands! 541 break; 542 543 case 0x70: // pshufd r, r/a, #8 544 debug_only(has_disp32 = true); // has both kinds of operands! 545 case 0x73: // psrldq r, #8 546 tail_size = 1; 547 break; 548 549 case 0x12: // movlps 550 case 0x28: // movaps 551 case 0x2E: // ucomiss 552 case 0x2F: // comiss 553 case 0x54: // andps 554 case 0x55: // andnps 555 case 0x56: // orps 556 case 0x57: // xorps 557 case 0x6E: // movd 558 case 0x7E: // movd 559 case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush 560 debug_only(has_disp32 = true); 561 break; 562 563 case 0xAD: // shrd r, a, %cl 564 case 0xAF: // imul r, a 565 case 0xBE: // movsbl r, a (movsxb) 566 case 0xBF: // movswl r, a (movsxw) 567 case 0xB6: // movzbl r, a (movzxb) 568 case 0xB7: // movzwl r, a (movzxw) 569 case REP16(0x40): // cmovl cc, r, a 570 case 0xB0: // cmpxchgb 571 case 0xB1: // cmpxchg 572 case 0xC1: // xaddl 573 case 0xC7: // cmpxchg8 574 case REP16(0x90): // setcc a 575 debug_only(has_disp32 = true); 576 // fall out of the switch to decode the address 577 break; 578 579 case 0xC4: // pinsrw r, a, #8 580 debug_only(has_disp32 = true); 581 case 0xC5: // pextrw r, r, #8 582 tail_size = 1; // the imm8 583 break; 584 585 case 0xAC: // shrd r, a, #8 586 debug_only(has_disp32 = true); 587 tail_size = 1; // the imm8 588 break; 589 590 case REP16(0x80): // jcc rdisp32 591 if (which == end_pc_operand) return ip + 4; 592 assert(which == call32_operand, "jcc has no disp32 or imm"); 593 return ip; 594 default: 595 ShouldNotReachHere(); 596 } 597 break; 598 599 case 0x81: // addl a, #32; addl r, #32 600 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 601 // on 32bit in the case of cmpl, the imm might be an oop 602 tail_size = 4; 603 debug_only(has_disp32 = true); // has both kinds of operands! 604 break; 605 606 case 0x83: // addl a, #8; addl r, #8 607 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 608 debug_only(has_disp32 = true); // has both kinds of operands! 609 tail_size = 1; 610 break; 611 612 case 0x9B: 613 switch (0xFF & *ip++) { 614 case 0xD9: // fnstcw a 615 debug_only(has_disp32 = true); 616 break; 617 default: 618 ShouldNotReachHere(); 619 } 620 break; 621 622 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a 623 case REP4(0x10): // adc... 624 case REP4(0x20): // and... 625 case REP4(0x30): // xor... 626 case REP4(0x08): // or... 627 case REP4(0x18): // sbb... 628 case REP4(0x28): // sub... 629 case 0xF7: // mull a 630 case 0x8D: // lea r, a 631 case 0x87: // xchg r, a 632 case REP4(0x38): // cmp... 633 case 0x85: // test r, a 634 debug_only(has_disp32 = true); // has both kinds of operands! 635 break; 636 637 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8 638 case 0xC6: // movb a, #8 639 case 0x80: // cmpb a, #8 640 case 0x6B: // imul r, a, #8 641 debug_only(has_disp32 = true); // has both kinds of operands! 642 tail_size = 1; // the imm8 643 break; 644 645 case 0xC4: // VEX_3bytes 646 case 0xC5: // VEX_2bytes 647 assert((UseAVX > 0), "shouldn't have VEX prefix"); 648 assert(ip == inst+1, "no prefixes allowed"); 649 // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions 650 // but they have prefix 0x0F and processed when 0x0F processed above. 651 // 652 // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES 653 // instructions (these instructions are not supported in 64-bit mode). 654 // To distinguish them bits [7:6] are set in the VEX second byte since 655 // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set 656 // those VEX bits REX and vvvv bits are inverted. 657 // 658 // Fortunately C2 doesn't generate these instructions so we don't need 659 // to check for them in product version. 660 661 // Check second byte 662 NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions")); 663 664 // First byte 665 if ((0xFF & *inst) == VEX_3bytes) { 666 ip++; // third byte 667 is_64bit = ((VEX_W & *ip) == VEX_W); 668 } 669 ip++; // opcode 670 // To find the end of instruction (which == end_pc_operand). 671 switch (0xFF & *ip) { 672 case 0x61: // pcmpestri r, r/a, #8 673 case 0x70: // pshufd r, r/a, #8 674 case 0x73: // psrldq r, #8 675 tail_size = 1; // the imm8 676 break; 677 default: 678 break; 679 } 680 ip++; // skip opcode 681 debug_only(has_disp32 = true); // has both kinds of operands! 682 break; 683 684 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 685 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 686 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a 687 case 0xDD: // fld_d a; fst_d a; fstp_d a 688 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a 689 case 0xDF: // fild_d a; fistp_d a 690 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a 691 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a 692 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a 693 debug_only(has_disp32 = true); 694 break; 695 696 case 0xE8: // call rdisp32 697 case 0xE9: // jmp rdisp32 698 if (which == end_pc_operand) return ip + 4; 699 assert(which == call32_operand, "call has no disp32 or imm"); 700 return ip; 701 702 case 0xF0: // Lock 703 assert(os::is_MP(), "only on MP"); 704 goto again_after_prefix; 705 706 case 0xF3: // For SSE 707 case 0xF2: // For SSE2 708 switch (0xFF & *ip++) { 709 case REX: 710 case REX_B: 711 case REX_X: 712 case REX_XB: 713 case REX_R: 714 case REX_RB: 715 case REX_RX: 716 case REX_RXB: 717 case REX_W: 718 case REX_WB: 719 case REX_WX: 720 case REX_WXB: 721 case REX_WR: 722 case REX_WRB: 723 case REX_WRX: 724 case REX_WRXB: 725 NOT_LP64(assert(false, "found 64bit prefix")); 726 ip++; 727 default: 728 ip++; 729 } 730 debug_only(has_disp32 = true); // has both kinds of operands! 731 break; 732 733 default: 734 ShouldNotReachHere(); 735 736 #undef REP8 737 #undef REP16 738 } 739 740 assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); 741 #ifdef _LP64 742 assert(which != imm_operand, "instruction is not a movq reg, imm64"); 743 #else 744 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field"); 745 assert(which != imm_operand || has_disp32, "instruction has no imm32 field"); 746 #endif // LP64 747 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field"); 748 749 // parse the output of emit_operand 750 int op2 = 0xFF & *ip++; 751 int base = op2 & 0x07; 752 int op3 = -1; 753 const int b100 = 4; 754 const int b101 = 5; 755 if (base == b100 && (op2 >> 6) != 3) { 756 op3 = 0xFF & *ip++; 757 base = op3 & 0x07; // refetch the base 758 } 759 // now ip points at the disp (if any) 760 761 switch (op2 >> 6) { 762 case 0: 763 // [00 reg 100][ss index base] 764 // [00 reg 100][00 100 esp] 765 // [00 reg base] 766 // [00 reg 100][ss index 101][disp32] 767 // [00 reg 101] [disp32] 768 769 if (base == b101) { 770 if (which == disp32_operand) 771 return ip; // caller wants the disp32 772 ip += 4; // skip the disp32 773 } 774 break; 775 776 case 1: 777 // [01 reg 100][ss index base][disp8] 778 // [01 reg 100][00 100 esp][disp8] 779 // [01 reg base] [disp8] 780 ip += 1; // skip the disp8 781 break; 782 783 case 2: 784 // [10 reg 100][ss index base][disp32] 785 // [10 reg 100][00 100 esp][disp32] 786 // [10 reg base] [disp32] 787 if (which == disp32_operand) 788 return ip; // caller wants the disp32 789 ip += 4; // skip the disp32 790 break; 791 792 case 3: 793 // [11 reg base] (not a memory addressing mode) 794 break; 795 } 796 797 if (which == end_pc_operand) { 798 return ip + tail_size; 799 } 800 801 #ifdef _LP64 802 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32"); 803 #else 804 assert(which == imm_operand, "instruction has only an imm field"); 805 #endif // LP64 806 return ip; 807 } 808 809 address Assembler::locate_next_instruction(address inst) { 810 // Secretly share code with locate_operand: 811 return locate_operand(inst, end_pc_operand); 812 } 813 814 815 #ifdef ASSERT 816 void Assembler::check_relocation(RelocationHolder const& rspec, int format) { 817 address inst = inst_mark(); 818 assert(inst != NULL && inst < pc(), "must point to beginning of instruction"); 819 address opnd; 820 821 Relocation* r = rspec.reloc(); 822 if (r->type() == relocInfo::none) { 823 return; 824 } else if (r->is_call() || format == call32_operand) { 825 // assert(format == imm32_operand, "cannot specify a nonzero format"); 826 opnd = locate_operand(inst, call32_operand); 827 } else if (r->is_data()) { 828 assert(format == imm_operand || format == disp32_operand 829 LP64_ONLY(|| format == narrow_oop_operand), "format ok"); 830 opnd = locate_operand(inst, (WhichOperand)format); 831 } else { 832 assert(format == imm_operand, "cannot specify a format"); 833 return; 834 } 835 assert(opnd == pc(), "must put operand where relocs can find it"); 836 } 837 #endif // ASSERT 838 839 void Assembler::emit_operand32(Register reg, Address adr) { 840 assert(reg->encoding() < 8, "no extended registers"); 841 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 842 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 843 adr._rspec); 844 } 845 846 void Assembler::emit_operand(Register reg, Address adr, 847 int rip_relative_correction) { 848 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 849 adr._rspec, 850 rip_relative_correction); 851 } 852 853 void Assembler::emit_operand(XMMRegister reg, Address adr) { 854 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 855 adr._rspec); 856 } 857 858 // MMX operations 859 void Assembler::emit_operand(MMXRegister reg, Address adr) { 860 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 861 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 862 } 863 864 // work around gcc (3.2.1-7a) bug 865 void Assembler::emit_operand(Address adr, MMXRegister reg) { 866 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 867 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 868 } 869 870 871 void Assembler::emit_farith(int b1, int b2, int i) { 872 assert(isByte(b1) && isByte(b2), "wrong opcode"); 873 assert(0 <= i && i < 8, "illegal stack offset"); 874 emit_byte(b1); 875 emit_byte(b2 + i); 876 } 877 878 879 // Now the Assembler instructions (identical for 32/64 bits) 880 881 void Assembler::adcl(Address dst, int32_t imm32) { 882 InstructionMark im(this); 883 prefix(dst); 884 emit_arith_operand(0x81, rdx, dst, imm32); 885 } 886 887 void Assembler::adcl(Address dst, Register src) { 888 InstructionMark im(this); 889 prefix(dst, src); 890 emit_byte(0x11); 891 emit_operand(src, dst); 892 } 893 894 void Assembler::adcl(Register dst, int32_t imm32) { 895 prefix(dst); 896 emit_arith(0x81, 0xD0, dst, imm32); 897 } 898 899 void Assembler::adcl(Register dst, Address src) { 900 InstructionMark im(this); 901 prefix(src, dst); 902 emit_byte(0x13); 903 emit_operand(dst, src); 904 } 905 906 void Assembler::adcl(Register dst, Register src) { 907 (void) prefix_and_encode(dst->encoding(), src->encoding()); 908 emit_arith(0x13, 0xC0, dst, src); 909 } 910 911 void Assembler::addl(Address dst, int32_t imm32) { 912 InstructionMark im(this); 913 prefix(dst); 914 emit_arith_operand(0x81, rax, dst, imm32); 915 } 916 917 void Assembler::addl(Address dst, Register src) { 918 InstructionMark im(this); 919 prefix(dst, src); 920 emit_byte(0x01); 921 emit_operand(src, dst); 922 } 923 924 void Assembler::addl(Register dst, int32_t imm32) { 925 prefix(dst); 926 emit_arith(0x81, 0xC0, dst, imm32); 927 } 928 929 void Assembler::addl(Register dst, Address src) { 930 InstructionMark im(this); 931 prefix(src, dst); 932 emit_byte(0x03); 933 emit_operand(dst, src); 934 } 935 936 void Assembler::addl(Register dst, Register src) { 937 (void) prefix_and_encode(dst->encoding(), src->encoding()); 938 emit_arith(0x03, 0xC0, dst, src); 939 } 940 941 void Assembler::addr_nop_4() { 942 // 4 bytes: NOP DWORD PTR [EAX+0] 943 emit_byte(0x0F); 944 emit_byte(0x1F); 945 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 946 emit_byte(0); // 8-bits offset (1 byte) 947 } 948 949 void Assembler::addr_nop_5() { 950 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 951 emit_byte(0x0F); 952 emit_byte(0x1F); 953 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 954 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 955 emit_byte(0); // 8-bits offset (1 byte) 956 } 957 958 void Assembler::addr_nop_7() { 959 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 960 emit_byte(0x0F); 961 emit_byte(0x1F); 962 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 963 emit_long(0); // 32-bits offset (4 bytes) 964 } 965 966 void Assembler::addr_nop_8() { 967 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 968 emit_byte(0x0F); 969 emit_byte(0x1F); 970 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 971 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 972 emit_long(0); // 32-bits offset (4 bytes) 973 } 974 975 void Assembler::addsd(XMMRegister dst, XMMRegister src) { 976 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 977 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 978 emit_byte(0x58); 979 emit_byte(0xC0 | encode); 980 } 981 982 void Assembler::addsd(XMMRegister dst, Address src) { 983 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 984 InstructionMark im(this); 985 simd_prefix(dst, dst, src, VEX_SIMD_F2); 986 emit_byte(0x58); 987 emit_operand(dst, src); 988 } 989 990 void Assembler::addss(XMMRegister dst, XMMRegister src) { 991 NOT_LP64(assert(VM_Version::supports_sse(), "")); 992 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 993 emit_byte(0x58); 994 emit_byte(0xC0 | encode); 995 } 996 997 void Assembler::addss(XMMRegister dst, Address src) { 998 NOT_LP64(assert(VM_Version::supports_sse(), "")); 999 InstructionMark im(this); 1000 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1001 emit_byte(0x58); 1002 emit_operand(dst, src); 1003 } 1004 1005 void Assembler::andl(Address dst, int32_t imm32) { 1006 InstructionMark im(this); 1007 prefix(dst); 1008 emit_byte(0x81); 1009 emit_operand(rsp, dst, 4); 1010 emit_long(imm32); 1011 } 1012 1013 void Assembler::andl(Register dst, int32_t imm32) { 1014 prefix(dst); 1015 emit_arith(0x81, 0xE0, dst, imm32); 1016 } 1017 1018 void Assembler::andl(Register dst, Address src) { 1019 InstructionMark im(this); 1020 prefix(src, dst); 1021 emit_byte(0x23); 1022 emit_operand(dst, src); 1023 } 1024 1025 void Assembler::andl(Register dst, Register src) { 1026 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1027 emit_arith(0x23, 0xC0, dst, src); 1028 } 1029 1030 void Assembler::andpd(XMMRegister dst, Address src) { 1031 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1032 InstructionMark im(this); 1033 simd_prefix(dst, dst, src, VEX_SIMD_66); 1034 emit_byte(0x54); 1035 emit_operand(dst, src); 1036 } 1037 1038 void Assembler::andpd(XMMRegister dst, XMMRegister src) { 1039 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1040 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 1041 emit_byte(0x54); 1042 emit_byte(0xC0 | encode); 1043 } 1044 1045 void Assembler::andps(XMMRegister dst, Address src) { 1046 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1047 InstructionMark im(this); 1048 simd_prefix(dst, dst, src, VEX_SIMD_NONE); 1049 emit_byte(0x54); 1050 emit_operand(dst, src); 1051 } 1052 1053 void Assembler::andps(XMMRegister dst, XMMRegister src) { 1054 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1055 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); 1056 emit_byte(0x54); 1057 emit_byte(0xC0 | encode); 1058 } 1059 1060 void Assembler::bsfl(Register dst, Register src) { 1061 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1062 emit_byte(0x0F); 1063 emit_byte(0xBC); 1064 emit_byte(0xC0 | encode); 1065 } 1066 1067 void Assembler::bsrl(Register dst, Register src) { 1068 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 1069 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1070 emit_byte(0x0F); 1071 emit_byte(0xBD); 1072 emit_byte(0xC0 | encode); 1073 } 1074 1075 void Assembler::bswapl(Register reg) { // bswap 1076 int encode = prefix_and_encode(reg->encoding()); 1077 emit_byte(0x0F); 1078 emit_byte(0xC8 | encode); 1079 } 1080 1081 void Assembler::call(Label& L, relocInfo::relocType rtype) { 1082 // suspect disp32 is always good 1083 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); 1084 1085 if (L.is_bound()) { 1086 const int long_size = 5; 1087 int offs = (int)( target(L) - pc() ); 1088 assert(offs <= 0, "assembler error"); 1089 InstructionMark im(this); 1090 // 1110 1000 #32-bit disp 1091 emit_byte(0xE8); 1092 emit_data(offs - long_size, rtype, operand); 1093 } else { 1094 InstructionMark im(this); 1095 // 1110 1000 #32-bit disp 1096 L.add_patch_at(code(), locator()); 1097 1098 emit_byte(0xE8); 1099 emit_data(int(0), rtype, operand); 1100 } 1101 } 1102 1103 void Assembler::call(Register dst) { 1104 // This was originally using a 32bit register encoding 1105 // and surely we want 64bit! 1106 // this is a 32bit encoding but in 64bit mode the default 1107 // operand size is 64bit so there is no need for the 1108 // wide prefix. So prefix only happens if we use the 1109 // new registers. Much like push/pop. 1110 int x = offset(); 1111 // this may be true but dbx disassembles it as if it 1112 // were 32bits... 1113 // int encode = prefix_and_encode(dst->encoding()); 1114 // if (offset() != x) assert(dst->encoding() >= 8, "what?"); 1115 int encode = prefixq_and_encode(dst->encoding()); 1116 1117 emit_byte(0xFF); 1118 emit_byte(0xD0 | encode); 1119 } 1120 1121 1122 void Assembler::call(Address adr) { 1123 InstructionMark im(this); 1124 prefix(adr); 1125 emit_byte(0xFF); 1126 emit_operand(rdx, adr); 1127 } 1128 1129 void Assembler::call_literal(address entry, RelocationHolder const& rspec) { 1130 assert(entry != NULL, "call most probably wrong"); 1131 InstructionMark im(this); 1132 emit_byte(0xE8); 1133 intptr_t disp = entry - (_code_pos + sizeof(int32_t)); 1134 assert(is_simm32(disp), "must be 32bit offset (call2)"); 1135 // Technically, should use call32_operand, but this format is 1136 // implied by the fact that we're emitting a call instruction. 1137 1138 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand); 1139 emit_data((int) disp, rspec, operand); 1140 } 1141 1142 void Assembler::cdql() { 1143 emit_byte(0x99); 1144 } 1145 1146 void Assembler::cmovl(Condition cc, Register dst, Register src) { 1147 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1148 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1149 emit_byte(0x0F); 1150 emit_byte(0x40 | cc); 1151 emit_byte(0xC0 | encode); 1152 } 1153 1154 1155 void Assembler::cmovl(Condition cc, Register dst, Address src) { 1156 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1157 prefix(src, dst); 1158 emit_byte(0x0F); 1159 emit_byte(0x40 | cc); 1160 emit_operand(dst, src); 1161 } 1162 1163 void Assembler::cmpb(Address dst, int imm8) { 1164 InstructionMark im(this); 1165 prefix(dst); 1166 emit_byte(0x80); 1167 emit_operand(rdi, dst, 1); 1168 emit_byte(imm8); 1169 } 1170 1171 void Assembler::cmpl(Address dst, int32_t imm32) { 1172 InstructionMark im(this); 1173 prefix(dst); 1174 emit_byte(0x81); 1175 emit_operand(rdi, dst, 4); 1176 emit_long(imm32); 1177 } 1178 1179 void Assembler::cmpl(Register dst, int32_t imm32) { 1180 prefix(dst); 1181 emit_arith(0x81, 0xF8, dst, imm32); 1182 } 1183 1184 void Assembler::cmpl(Register dst, Register src) { 1185 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1186 emit_arith(0x3B, 0xC0, dst, src); 1187 } 1188 1189 1190 void Assembler::cmpl(Register dst, Address src) { 1191 InstructionMark im(this); 1192 prefix(src, dst); 1193 emit_byte(0x3B); 1194 emit_operand(dst, src); 1195 } 1196 1197 void Assembler::cmpw(Address dst, int imm16) { 1198 InstructionMark im(this); 1199 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers"); 1200 emit_byte(0x66); 1201 emit_byte(0x81); 1202 emit_operand(rdi, dst, 2); 1203 emit_word(imm16); 1204 } 1205 1206 // The 32-bit cmpxchg compares the value at adr with the contents of rax, 1207 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1208 // The ZF is set if the compared values were equal, and cleared otherwise. 1209 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg 1210 if (Atomics & 2) { 1211 // caveat: no instructionmark, so this isn't relocatable. 1212 // Emit a synthetic, non-atomic, CAS equivalent. 1213 // Beware. The synthetic form sets all ICCs, not just ZF. 1214 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r) 1215 cmpl(rax, adr); 1216 movl(rax, adr); 1217 if (reg != rax) { 1218 Label L ; 1219 jcc(Assembler::notEqual, L); 1220 movl(adr, reg); 1221 bind(L); 1222 } 1223 } else { 1224 InstructionMark im(this); 1225 prefix(adr, reg); 1226 emit_byte(0x0F); 1227 emit_byte(0xB1); 1228 emit_operand(reg, adr); 1229 } 1230 } 1231 1232 void Assembler::comisd(XMMRegister dst, Address src) { 1233 // NOTE: dbx seems to decode this as comiss even though the 1234 // 0x66 is there. Strangly ucomisd comes out correct 1235 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1236 InstructionMark im(this); 1237 simd_prefix(dst, src, VEX_SIMD_66); 1238 emit_byte(0x2F); 1239 emit_operand(dst, src); 1240 } 1241 1242 void Assembler::comisd(XMMRegister dst, XMMRegister src) { 1243 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1244 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1245 emit_byte(0x2F); 1246 emit_byte(0xC0 | encode); 1247 } 1248 1249 void Assembler::comiss(XMMRegister dst, Address src) { 1250 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1251 InstructionMark im(this); 1252 simd_prefix(dst, src, VEX_SIMD_NONE); 1253 emit_byte(0x2F); 1254 emit_operand(dst, src); 1255 } 1256 1257 void Assembler::comiss(XMMRegister dst, XMMRegister src) { 1258 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1259 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 1260 emit_byte(0x2F); 1261 emit_byte(0xC0 | encode); 1262 } 1263 1264 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 1265 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1266 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1267 emit_byte(0xE6); 1268 emit_byte(0xC0 | encode); 1269 } 1270 1271 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 1272 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1273 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 1274 emit_byte(0x5B); 1275 emit_byte(0xC0 | encode); 1276 } 1277 1278 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 1279 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1280 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1281 emit_byte(0x5A); 1282 emit_byte(0xC0 | encode); 1283 } 1284 1285 void Assembler::cvtsd2ss(XMMRegister dst, Address src) { 1286 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1287 InstructionMark im(this); 1288 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1289 emit_byte(0x5A); 1290 emit_operand(dst, src); 1291 } 1292 1293 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { 1294 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1295 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1296 emit_byte(0x2A); 1297 emit_byte(0xC0 | encode); 1298 } 1299 1300 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { 1301 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1302 InstructionMark im(this); 1303 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1304 emit_byte(0x2A); 1305 emit_operand(dst, src); 1306 } 1307 1308 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { 1309 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1310 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1311 emit_byte(0x2A); 1312 emit_byte(0xC0 | encode); 1313 } 1314 1315 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { 1316 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1317 InstructionMark im(this); 1318 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1319 emit_byte(0x2A); 1320 emit_operand(dst, src); 1321 } 1322 1323 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { 1324 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1325 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1326 emit_byte(0x5A); 1327 emit_byte(0xC0 | encode); 1328 } 1329 1330 void Assembler::cvtss2sd(XMMRegister dst, Address src) { 1331 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1332 InstructionMark im(this); 1333 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1334 emit_byte(0x5A); 1335 emit_operand(dst, src); 1336 } 1337 1338 1339 void Assembler::cvttsd2sil(Register dst, XMMRegister src) { 1340 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1341 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); 1342 emit_byte(0x2C); 1343 emit_byte(0xC0 | encode); 1344 } 1345 1346 void Assembler::cvttss2sil(Register dst, XMMRegister src) { 1347 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1348 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1349 emit_byte(0x2C); 1350 emit_byte(0xC0 | encode); 1351 } 1352 1353 void Assembler::decl(Address dst) { 1354 // Don't use it directly. Use MacroAssembler::decrement() instead. 1355 InstructionMark im(this); 1356 prefix(dst); 1357 emit_byte(0xFF); 1358 emit_operand(rcx, dst); 1359 } 1360 1361 void Assembler::divsd(XMMRegister dst, Address src) { 1362 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1363 InstructionMark im(this); 1364 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1365 emit_byte(0x5E); 1366 emit_operand(dst, src); 1367 } 1368 1369 void Assembler::divsd(XMMRegister dst, XMMRegister src) { 1370 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1371 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1372 emit_byte(0x5E); 1373 emit_byte(0xC0 | encode); 1374 } 1375 1376 void Assembler::divss(XMMRegister dst, Address src) { 1377 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1378 InstructionMark im(this); 1379 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1380 emit_byte(0x5E); 1381 emit_operand(dst, src); 1382 } 1383 1384 void Assembler::divss(XMMRegister dst, XMMRegister src) { 1385 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1386 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1387 emit_byte(0x5E); 1388 emit_byte(0xC0 | encode); 1389 } 1390 1391 void Assembler::emms() { 1392 NOT_LP64(assert(VM_Version::supports_mmx(), "")); 1393 emit_byte(0x0F); 1394 emit_byte(0x77); 1395 } 1396 1397 void Assembler::hlt() { 1398 emit_byte(0xF4); 1399 } 1400 1401 void Assembler::idivl(Register src) { 1402 int encode = prefix_and_encode(src->encoding()); 1403 emit_byte(0xF7); 1404 emit_byte(0xF8 | encode); 1405 } 1406 1407 void Assembler::divl(Register src) { // Unsigned 1408 int encode = prefix_and_encode(src->encoding()); 1409 emit_byte(0xF7); 1410 emit_byte(0xF0 | encode); 1411 } 1412 1413 void Assembler::imull(Register dst, Register src) { 1414 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1415 emit_byte(0x0F); 1416 emit_byte(0xAF); 1417 emit_byte(0xC0 | encode); 1418 } 1419 1420 1421 void Assembler::imull(Register dst, Register src, int value) { 1422 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1423 if (is8bit(value)) { 1424 emit_byte(0x6B); 1425 emit_byte(0xC0 | encode); 1426 emit_byte(value & 0xFF); 1427 } else { 1428 emit_byte(0x69); 1429 emit_byte(0xC0 | encode); 1430 emit_long(value); 1431 } 1432 } 1433 1434 void Assembler::incl(Address dst) { 1435 // Don't use it directly. Use MacroAssembler::increment() instead. 1436 InstructionMark im(this); 1437 prefix(dst); 1438 emit_byte(0xFF); 1439 emit_operand(rax, dst); 1440 } 1441 1442 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) { 1443 InstructionMark im(this); 1444 assert((0 <= cc) && (cc < 16), "illegal cc"); 1445 if (L.is_bound()) { 1446 address dst = target(L); 1447 assert(dst != NULL, "jcc most probably wrong"); 1448 1449 const int short_size = 2; 1450 const int long_size = 6; 1451 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos; 1452 if (maybe_short && is8bit(offs - short_size)) { 1453 // 0111 tttn #8-bit disp 1454 emit_byte(0x70 | cc); 1455 emit_byte((offs - short_size) & 0xFF); 1456 } else { 1457 // 0000 1111 1000 tttn #32-bit disp 1458 assert(is_simm32(offs - long_size), 1459 "must be 32bit offset (call4)"); 1460 emit_byte(0x0F); 1461 emit_byte(0x80 | cc); 1462 emit_long(offs - long_size); 1463 } 1464 } else { 1465 // Note: could eliminate cond. jumps to this jump if condition 1466 // is the same however, seems to be rather unlikely case. 1467 // Note: use jccb() if label to be bound is very close to get 1468 // an 8-bit displacement 1469 L.add_patch_at(code(), locator()); 1470 emit_byte(0x0F); 1471 emit_byte(0x80 | cc); 1472 emit_long(0); 1473 } 1474 } 1475 1476 void Assembler::jccb(Condition cc, Label& L) { 1477 if (L.is_bound()) { 1478 const int short_size = 2; 1479 address entry = target(L); 1480 assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)), 1481 "Dispacement too large for a short jmp"); 1482 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; 1483 // 0111 tttn #8-bit disp 1484 emit_byte(0x70 | cc); 1485 emit_byte((offs - short_size) & 0xFF); 1486 } else { 1487 InstructionMark im(this); 1488 L.add_patch_at(code(), locator()); 1489 emit_byte(0x70 | cc); 1490 emit_byte(0); 1491 } 1492 } 1493 1494 void Assembler::jmp(Address adr) { 1495 InstructionMark im(this); 1496 prefix(adr); 1497 emit_byte(0xFF); 1498 emit_operand(rsp, adr); 1499 } 1500 1501 void Assembler::jmp(Label& L, bool maybe_short) { 1502 if (L.is_bound()) { 1503 address entry = target(L); 1504 assert(entry != NULL, "jmp most probably wrong"); 1505 InstructionMark im(this); 1506 const int short_size = 2; 1507 const int long_size = 5; 1508 intptr_t offs = entry - _code_pos; 1509 if (maybe_short && is8bit(offs - short_size)) { 1510 emit_byte(0xEB); 1511 emit_byte((offs - short_size) & 0xFF); 1512 } else { 1513 emit_byte(0xE9); 1514 emit_long(offs - long_size); 1515 } 1516 } else { 1517 // By default, forward jumps are always 32-bit displacements, since 1518 // we can't yet know where the label will be bound. If you're sure that 1519 // the forward jump will not run beyond 256 bytes, use jmpb to 1520 // force an 8-bit displacement. 1521 InstructionMark im(this); 1522 L.add_patch_at(code(), locator()); 1523 emit_byte(0xE9); 1524 emit_long(0); 1525 } 1526 } 1527 1528 void Assembler::jmp(Register entry) { 1529 int encode = prefix_and_encode(entry->encoding()); 1530 emit_byte(0xFF); 1531 emit_byte(0xE0 | encode); 1532 } 1533 1534 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { 1535 InstructionMark im(this); 1536 emit_byte(0xE9); 1537 assert(dest != NULL, "must have a target"); 1538 intptr_t disp = dest - (_code_pos + sizeof(int32_t)); 1539 assert(is_simm32(disp), "must be 32bit offset (jmp)"); 1540 emit_data(disp, rspec.reloc(), call32_operand); 1541 } 1542 1543 void Assembler::jmpb(Label& L) { 1544 if (L.is_bound()) { 1545 const int short_size = 2; 1546 address entry = target(L); 1547 assert(is8bit((entry - _code_pos) + short_size), 1548 "Dispacement too large for a short jmp"); 1549 assert(entry != NULL, "jmp most probably wrong"); 1550 intptr_t offs = entry - _code_pos; 1551 emit_byte(0xEB); 1552 emit_byte((offs - short_size) & 0xFF); 1553 } else { 1554 InstructionMark im(this); 1555 L.add_patch_at(code(), locator()); 1556 emit_byte(0xEB); 1557 emit_byte(0); 1558 } 1559 } 1560 1561 void Assembler::ldmxcsr( Address src) { 1562 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1563 InstructionMark im(this); 1564 prefix(src); 1565 emit_byte(0x0F); 1566 emit_byte(0xAE); 1567 emit_operand(as_Register(2), src); 1568 } 1569 1570 void Assembler::leal(Register dst, Address src) { 1571 InstructionMark im(this); 1572 #ifdef _LP64 1573 emit_byte(0x67); // addr32 1574 prefix(src, dst); 1575 #endif // LP64 1576 emit_byte(0x8D); 1577 emit_operand(dst, src); 1578 } 1579 1580 void Assembler::lock() { 1581 if (Atomics & 1) { 1582 // Emit either nothing, a NOP, or a NOP: prefix 1583 emit_byte(0x90) ; 1584 } else { 1585 emit_byte(0xF0); 1586 } 1587 } 1588 1589 void Assembler::lzcntl(Register dst, Register src) { 1590 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 1591 emit_byte(0xF3); 1592 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1593 emit_byte(0x0F); 1594 emit_byte(0xBD); 1595 emit_byte(0xC0 | encode); 1596 } 1597 1598 // Emit mfence instruction 1599 void Assembler::mfence() { 1600 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) 1601 emit_byte( 0x0F ); 1602 emit_byte( 0xAE ); 1603 emit_byte( 0xF0 ); 1604 } 1605 1606 void Assembler::mov(Register dst, Register src) { 1607 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 1608 } 1609 1610 void Assembler::movapd(XMMRegister dst, XMMRegister src) { 1611 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1612 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1613 emit_byte(0x28); 1614 emit_byte(0xC0 | encode); 1615 } 1616 1617 void Assembler::movaps(XMMRegister dst, XMMRegister src) { 1618 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1619 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 1620 emit_byte(0x28); 1621 emit_byte(0xC0 | encode); 1622 } 1623 1624 void Assembler::movb(Register dst, Address src) { 1625 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 1626 InstructionMark im(this); 1627 prefix(src, dst, true); 1628 emit_byte(0x8A); 1629 emit_operand(dst, src); 1630 } 1631 1632 1633 void Assembler::movb(Address dst, int imm8) { 1634 InstructionMark im(this); 1635 prefix(dst); 1636 emit_byte(0xC6); 1637 emit_operand(rax, dst, 1); 1638 emit_byte(imm8); 1639 } 1640 1641 1642 void Assembler::movb(Address dst, Register src) { 1643 assert(src->has_byte_register(), "must have byte register"); 1644 InstructionMark im(this); 1645 prefix(dst, src, true); 1646 emit_byte(0x88); 1647 emit_operand(src, dst); 1648 } 1649 1650 void Assembler::movdl(XMMRegister dst, Register src) { 1651 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1652 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1653 emit_byte(0x6E); 1654 emit_byte(0xC0 | encode); 1655 } 1656 1657 void Assembler::movdl(Register dst, XMMRegister src) { 1658 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1659 // swap src/dst to get correct prefix 1660 int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66); 1661 emit_byte(0x7E); 1662 emit_byte(0xC0 | encode); 1663 } 1664 1665 void Assembler::movdl(XMMRegister dst, Address src) { 1666 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1667 InstructionMark im(this); 1668 simd_prefix(dst, src, VEX_SIMD_66); 1669 emit_byte(0x6E); 1670 emit_operand(dst, src); 1671 } 1672 1673 void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 1674 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1675 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1676 emit_byte(0x6F); 1677 emit_byte(0xC0 | encode); 1678 } 1679 1680 void Assembler::movdqu(XMMRegister dst, Address src) { 1681 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1682 InstructionMark im(this); 1683 simd_prefix(dst, src, VEX_SIMD_F3); 1684 emit_byte(0x6F); 1685 emit_operand(dst, src); 1686 } 1687 1688 void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 1689 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1690 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1691 emit_byte(0x6F); 1692 emit_byte(0xC0 | encode); 1693 } 1694 1695 void Assembler::movdqu(Address dst, XMMRegister src) { 1696 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1697 InstructionMark im(this); 1698 simd_prefix(dst, src, VEX_SIMD_F3); 1699 emit_byte(0x7F); 1700 emit_operand(src, dst); 1701 } 1702 1703 // Uses zero extension on 64bit 1704 1705 void Assembler::movl(Register dst, int32_t imm32) { 1706 int encode = prefix_and_encode(dst->encoding()); 1707 emit_byte(0xB8 | encode); 1708 emit_long(imm32); 1709 } 1710 1711 void Assembler::movl(Register dst, Register src) { 1712 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1713 emit_byte(0x8B); 1714 emit_byte(0xC0 | encode); 1715 } 1716 1717 void Assembler::movl(Register dst, Address src) { 1718 InstructionMark im(this); 1719 prefix(src, dst); 1720 emit_byte(0x8B); 1721 emit_operand(dst, src); 1722 } 1723 1724 void Assembler::movl(Address dst, int32_t imm32) { 1725 InstructionMark im(this); 1726 prefix(dst); 1727 emit_byte(0xC7); 1728 emit_operand(rax, dst, 4); 1729 emit_long(imm32); 1730 } 1731 1732 void Assembler::movl(Address dst, Register src) { 1733 InstructionMark im(this); 1734 prefix(dst, src); 1735 emit_byte(0x89); 1736 emit_operand(src, dst); 1737 } 1738 1739 // New cpus require to use movsd and movss to avoid partial register stall 1740 // when loading from memory. But for old Opteron use movlpd instead of movsd. 1741 // The selection is done in MacroAssembler::movdbl() and movflt(). 1742 void Assembler::movlpd(XMMRegister dst, Address src) { 1743 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1744 InstructionMark im(this); 1745 simd_prefix(dst, dst, src, VEX_SIMD_66); 1746 emit_byte(0x12); 1747 emit_operand(dst, src); 1748 } 1749 1750 void Assembler::movq( MMXRegister dst, Address src ) { 1751 assert( VM_Version::supports_mmx(), "" ); 1752 emit_byte(0x0F); 1753 emit_byte(0x6F); 1754 emit_operand(dst, src); 1755 } 1756 1757 void Assembler::movq( Address dst, MMXRegister src ) { 1758 assert( VM_Version::supports_mmx(), "" ); 1759 emit_byte(0x0F); 1760 emit_byte(0x7F); 1761 // workaround gcc (3.2.1-7a) bug 1762 // In that version of gcc with only an emit_operand(MMX, Address) 1763 // gcc will tail jump and try and reverse the parameters completely 1764 // obliterating dst in the process. By having a version available 1765 // that doesn't need to swap the args at the tail jump the bug is 1766 // avoided. 1767 emit_operand(dst, src); 1768 } 1769 1770 void Assembler::movq(XMMRegister dst, Address src) { 1771 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1772 InstructionMark im(this); 1773 simd_prefix(dst, src, VEX_SIMD_F3); 1774 emit_byte(0x7E); 1775 emit_operand(dst, src); 1776 } 1777 1778 void Assembler::movq(Address dst, XMMRegister src) { 1779 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1780 InstructionMark im(this); 1781 simd_prefix(dst, src, VEX_SIMD_66); 1782 emit_byte(0xD6); 1783 emit_operand(src, dst); 1784 } 1785 1786 void Assembler::movsbl(Register dst, Address src) { // movsxb 1787 InstructionMark im(this); 1788 prefix(src, dst); 1789 emit_byte(0x0F); 1790 emit_byte(0xBE); 1791 emit_operand(dst, src); 1792 } 1793 1794 void Assembler::movsbl(Register dst, Register src) { // movsxb 1795 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1796 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1797 emit_byte(0x0F); 1798 emit_byte(0xBE); 1799 emit_byte(0xC0 | encode); 1800 } 1801 1802 void Assembler::movsd(XMMRegister dst, XMMRegister src) { 1803 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1804 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1805 emit_byte(0x10); 1806 emit_byte(0xC0 | encode); 1807 } 1808 1809 void Assembler::movsd(XMMRegister dst, Address src) { 1810 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1811 InstructionMark im(this); 1812 simd_prefix(dst, src, VEX_SIMD_F2); 1813 emit_byte(0x10); 1814 emit_operand(dst, src); 1815 } 1816 1817 void Assembler::movsd(Address dst, XMMRegister src) { 1818 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1819 InstructionMark im(this); 1820 simd_prefix(dst, src, VEX_SIMD_F2); 1821 emit_byte(0x11); 1822 emit_operand(src, dst); 1823 } 1824 1825 void Assembler::movss(XMMRegister dst, XMMRegister src) { 1826 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1827 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1828 emit_byte(0x10); 1829 emit_byte(0xC0 | encode); 1830 } 1831 1832 void Assembler::movss(XMMRegister dst, Address src) { 1833 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1834 InstructionMark im(this); 1835 simd_prefix(dst, src, VEX_SIMD_F3); 1836 emit_byte(0x10); 1837 emit_operand(dst, src); 1838 } 1839 1840 void Assembler::movss(Address dst, XMMRegister src) { 1841 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1842 InstructionMark im(this); 1843 simd_prefix(dst, src, VEX_SIMD_F3); 1844 emit_byte(0x11); 1845 emit_operand(src, dst); 1846 } 1847 1848 void Assembler::movswl(Register dst, Address src) { // movsxw 1849 InstructionMark im(this); 1850 prefix(src, dst); 1851 emit_byte(0x0F); 1852 emit_byte(0xBF); 1853 emit_operand(dst, src); 1854 } 1855 1856 void Assembler::movswl(Register dst, Register src) { // movsxw 1857 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1858 emit_byte(0x0F); 1859 emit_byte(0xBF); 1860 emit_byte(0xC0 | encode); 1861 } 1862 1863 void Assembler::movw(Address dst, int imm16) { 1864 InstructionMark im(this); 1865 1866 emit_byte(0x66); // switch to 16-bit mode 1867 prefix(dst); 1868 emit_byte(0xC7); 1869 emit_operand(rax, dst, 2); 1870 emit_word(imm16); 1871 } 1872 1873 void Assembler::movw(Register dst, Address src) { 1874 InstructionMark im(this); 1875 emit_byte(0x66); 1876 prefix(src, dst); 1877 emit_byte(0x8B); 1878 emit_operand(dst, src); 1879 } 1880 1881 void Assembler::movw(Address dst, Register src) { 1882 InstructionMark im(this); 1883 emit_byte(0x66); 1884 prefix(dst, src); 1885 emit_byte(0x89); 1886 emit_operand(src, dst); 1887 } 1888 1889 void Assembler::movzbl(Register dst, Address src) { // movzxb 1890 InstructionMark im(this); 1891 prefix(src, dst); 1892 emit_byte(0x0F); 1893 emit_byte(0xB6); 1894 emit_operand(dst, src); 1895 } 1896 1897 void Assembler::movzbl(Register dst, Register src) { // movzxb 1898 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1899 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1900 emit_byte(0x0F); 1901 emit_byte(0xB6); 1902 emit_byte(0xC0 | encode); 1903 } 1904 1905 void Assembler::movzwl(Register dst, Address src) { // movzxw 1906 InstructionMark im(this); 1907 prefix(src, dst); 1908 emit_byte(0x0F); 1909 emit_byte(0xB7); 1910 emit_operand(dst, src); 1911 } 1912 1913 void Assembler::movzwl(Register dst, Register src) { // movzxw 1914 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1915 emit_byte(0x0F); 1916 emit_byte(0xB7); 1917 emit_byte(0xC0 | encode); 1918 } 1919 1920 void Assembler::mull(Address src) { 1921 InstructionMark im(this); 1922 prefix(src); 1923 emit_byte(0xF7); 1924 emit_operand(rsp, src); 1925 } 1926 1927 void Assembler::mull(Register src) { 1928 int encode = prefix_and_encode(src->encoding()); 1929 emit_byte(0xF7); 1930 emit_byte(0xE0 | encode); 1931 } 1932 1933 void Assembler::mulsd(XMMRegister dst, Address src) { 1934 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1935 InstructionMark im(this); 1936 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1937 emit_byte(0x59); 1938 emit_operand(dst, src); 1939 } 1940 1941 void Assembler::mulsd(XMMRegister dst, XMMRegister src) { 1942 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1943 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1944 emit_byte(0x59); 1945 emit_byte(0xC0 | encode); 1946 } 1947 1948 void Assembler::mulss(XMMRegister dst, Address src) { 1949 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1950 InstructionMark im(this); 1951 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1952 emit_byte(0x59); 1953 emit_operand(dst, src); 1954 } 1955 1956 void Assembler::mulss(XMMRegister dst, XMMRegister src) { 1957 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1958 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1959 emit_byte(0x59); 1960 emit_byte(0xC0 | encode); 1961 } 1962 1963 void Assembler::negl(Register dst) { 1964 int encode = prefix_and_encode(dst->encoding()); 1965 emit_byte(0xF7); 1966 emit_byte(0xD8 | encode); 1967 } 1968 1969 void Assembler::nop(int i) { 1970 #ifdef ASSERT 1971 assert(i > 0, " "); 1972 // The fancy nops aren't currently recognized by debuggers making it a 1973 // pain to disassemble code while debugging. If asserts are on clearly 1974 // speed is not an issue so simply use the single byte traditional nop 1975 // to do alignment. 1976 1977 for (; i > 0 ; i--) emit_byte(0x90); 1978 return; 1979 1980 #endif // ASSERT 1981 1982 if (UseAddressNop && VM_Version::is_intel()) { 1983 // 1984 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 1985 // 1: 0x90 1986 // 2: 0x66 0x90 1987 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 1988 // 4: 0x0F 0x1F 0x40 0x00 1989 // 5: 0x0F 0x1F 0x44 0x00 0x00 1990 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 1991 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1992 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1993 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1994 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1995 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1996 1997 // The rest coding is Intel specific - don't use consecutive address nops 1998 1999 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2000 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2001 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2002 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2003 2004 while(i >= 15) { 2005 // For Intel don't generate consecutive addess nops (mix with regular nops) 2006 i -= 15; 2007 emit_byte(0x66); // size prefix 2008 emit_byte(0x66); // size prefix 2009 emit_byte(0x66); // size prefix 2010 addr_nop_8(); 2011 emit_byte(0x66); // size prefix 2012 emit_byte(0x66); // size prefix 2013 emit_byte(0x66); // size prefix 2014 emit_byte(0x90); // nop 2015 } 2016 switch (i) { 2017 case 14: 2018 emit_byte(0x66); // size prefix 2019 case 13: 2020 emit_byte(0x66); // size prefix 2021 case 12: 2022 addr_nop_8(); 2023 emit_byte(0x66); // size prefix 2024 emit_byte(0x66); // size prefix 2025 emit_byte(0x66); // size prefix 2026 emit_byte(0x90); // nop 2027 break; 2028 case 11: 2029 emit_byte(0x66); // size prefix 2030 case 10: 2031 emit_byte(0x66); // size prefix 2032 case 9: 2033 emit_byte(0x66); // size prefix 2034 case 8: 2035 addr_nop_8(); 2036 break; 2037 case 7: 2038 addr_nop_7(); 2039 break; 2040 case 6: 2041 emit_byte(0x66); // size prefix 2042 case 5: 2043 addr_nop_5(); 2044 break; 2045 case 4: 2046 addr_nop_4(); 2047 break; 2048 case 3: 2049 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2050 emit_byte(0x66); // size prefix 2051 case 2: 2052 emit_byte(0x66); // size prefix 2053 case 1: 2054 emit_byte(0x90); // nop 2055 break; 2056 default: 2057 assert(i == 0, " "); 2058 } 2059 return; 2060 } 2061 if (UseAddressNop && VM_Version::is_amd()) { 2062 // 2063 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD. 2064 // 1: 0x90 2065 // 2: 0x66 0x90 2066 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2067 // 4: 0x0F 0x1F 0x40 0x00 2068 // 5: 0x0F 0x1F 0x44 0x00 0x00 2069 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2070 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2071 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2072 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2073 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2074 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2075 2076 // The rest coding is AMD specific - use consecutive address nops 2077 2078 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2079 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2080 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2081 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2082 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2083 // Size prefixes (0x66) are added for larger sizes 2084 2085 while(i >= 22) { 2086 i -= 11; 2087 emit_byte(0x66); // size prefix 2088 emit_byte(0x66); // size prefix 2089 emit_byte(0x66); // size prefix 2090 addr_nop_8(); 2091 } 2092 // Generate first nop for size between 21-12 2093 switch (i) { 2094 case 21: 2095 i -= 1; 2096 emit_byte(0x66); // size prefix 2097 case 20: 2098 case 19: 2099 i -= 1; 2100 emit_byte(0x66); // size prefix 2101 case 18: 2102 case 17: 2103 i -= 1; 2104 emit_byte(0x66); // size prefix 2105 case 16: 2106 case 15: 2107 i -= 8; 2108 addr_nop_8(); 2109 break; 2110 case 14: 2111 case 13: 2112 i -= 7; 2113 addr_nop_7(); 2114 break; 2115 case 12: 2116 i -= 6; 2117 emit_byte(0x66); // size prefix 2118 addr_nop_5(); 2119 break; 2120 default: 2121 assert(i < 12, " "); 2122 } 2123 2124 // Generate second nop for size between 11-1 2125 switch (i) { 2126 case 11: 2127 emit_byte(0x66); // size prefix 2128 case 10: 2129 emit_byte(0x66); // size prefix 2130 case 9: 2131 emit_byte(0x66); // size prefix 2132 case 8: 2133 addr_nop_8(); 2134 break; 2135 case 7: 2136 addr_nop_7(); 2137 break; 2138 case 6: 2139 emit_byte(0x66); // size prefix 2140 case 5: 2141 addr_nop_5(); 2142 break; 2143 case 4: 2144 addr_nop_4(); 2145 break; 2146 case 3: 2147 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2148 emit_byte(0x66); // size prefix 2149 case 2: 2150 emit_byte(0x66); // size prefix 2151 case 1: 2152 emit_byte(0x90); // nop 2153 break; 2154 default: 2155 assert(i == 0, " "); 2156 } 2157 return; 2158 } 2159 2160 // Using nops with size prefixes "0x66 0x90". 2161 // From AMD Optimization Guide: 2162 // 1: 0x90 2163 // 2: 0x66 0x90 2164 // 3: 0x66 0x66 0x90 2165 // 4: 0x66 0x66 0x66 0x90 2166 // 5: 0x66 0x66 0x90 0x66 0x90 2167 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2168 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2169 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2170 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2171 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2172 // 2173 while(i > 12) { 2174 i -= 4; 2175 emit_byte(0x66); // size prefix 2176 emit_byte(0x66); 2177 emit_byte(0x66); 2178 emit_byte(0x90); // nop 2179 } 2180 // 1 - 12 nops 2181 if(i > 8) { 2182 if(i > 9) { 2183 i -= 1; 2184 emit_byte(0x66); 2185 } 2186 i -= 3; 2187 emit_byte(0x66); 2188 emit_byte(0x66); 2189 emit_byte(0x90); 2190 } 2191 // 1 - 8 nops 2192 if(i > 4) { 2193 if(i > 6) { 2194 i -= 1; 2195 emit_byte(0x66); 2196 } 2197 i -= 3; 2198 emit_byte(0x66); 2199 emit_byte(0x66); 2200 emit_byte(0x90); 2201 } 2202 switch (i) { 2203 case 4: 2204 emit_byte(0x66); 2205 case 3: 2206 emit_byte(0x66); 2207 case 2: 2208 emit_byte(0x66); 2209 case 1: 2210 emit_byte(0x90); 2211 break; 2212 default: 2213 assert(i == 0, " "); 2214 } 2215 } 2216 2217 void Assembler::notl(Register dst) { 2218 int encode = prefix_and_encode(dst->encoding()); 2219 emit_byte(0xF7); 2220 emit_byte(0xD0 | encode ); 2221 } 2222 2223 void Assembler::orl(Address dst, int32_t imm32) { 2224 InstructionMark im(this); 2225 prefix(dst); 2226 emit_arith_operand(0x81, rcx, dst, imm32); 2227 } 2228 2229 void Assembler::orl(Register dst, int32_t imm32) { 2230 prefix(dst); 2231 emit_arith(0x81, 0xC8, dst, imm32); 2232 } 2233 2234 void Assembler::orl(Register dst, Address src) { 2235 InstructionMark im(this); 2236 prefix(src, dst); 2237 emit_byte(0x0B); 2238 emit_operand(dst, src); 2239 } 2240 2241 void Assembler::orl(Register dst, Register src) { 2242 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2243 emit_arith(0x0B, 0xC0, dst, src); 2244 } 2245 2246 void Assembler::packuswb(XMMRegister dst, Address src) { 2247 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2248 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2249 InstructionMark im(this); 2250 simd_prefix(dst, dst, src, VEX_SIMD_66); 2251 emit_byte(0x67); 2252 emit_operand(dst, src); 2253 } 2254 2255 void Assembler::packuswb(XMMRegister dst, XMMRegister src) { 2256 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2257 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2258 emit_byte(0x67); 2259 emit_byte(0xC0 | encode); 2260 } 2261 2262 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { 2263 assert(VM_Version::supports_sse4_2(), ""); 2264 InstructionMark im(this); 2265 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2266 emit_byte(0x61); 2267 emit_operand(dst, src); 2268 emit_byte(imm8); 2269 } 2270 2271 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { 2272 assert(VM_Version::supports_sse4_2(), ""); 2273 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2274 emit_byte(0x61); 2275 emit_byte(0xC0 | encode); 2276 emit_byte(imm8); 2277 } 2278 2279 void Assembler::pmovzxbw(XMMRegister dst, Address src) { 2280 assert(VM_Version::supports_sse4_1(), ""); 2281 InstructionMark im(this); 2282 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2283 emit_byte(0x30); 2284 emit_operand(dst, src); 2285 } 2286 2287 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { 2288 assert(VM_Version::supports_sse4_1(), ""); 2289 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2290 emit_byte(0x30); 2291 emit_byte(0xC0 | encode); 2292 } 2293 2294 // generic 2295 void Assembler::pop(Register dst) { 2296 int encode = prefix_and_encode(dst->encoding()); 2297 emit_byte(0x58 | encode); 2298 } 2299 2300 void Assembler::popcntl(Register dst, Address src) { 2301 assert(VM_Version::supports_popcnt(), "must support"); 2302 InstructionMark im(this); 2303 emit_byte(0xF3); 2304 prefix(src, dst); 2305 emit_byte(0x0F); 2306 emit_byte(0xB8); 2307 emit_operand(dst, src); 2308 } 2309 2310 void Assembler::popcntl(Register dst, Register src) { 2311 assert(VM_Version::supports_popcnt(), "must support"); 2312 emit_byte(0xF3); 2313 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2314 emit_byte(0x0F); 2315 emit_byte(0xB8); 2316 emit_byte(0xC0 | encode); 2317 } 2318 2319 void Assembler::popf() { 2320 emit_byte(0x9D); 2321 } 2322 2323 #ifndef _LP64 // no 32bit push/pop on amd64 2324 void Assembler::popl(Address dst) { 2325 // NOTE: this will adjust stack by 8byte on 64bits 2326 InstructionMark im(this); 2327 prefix(dst); 2328 emit_byte(0x8F); 2329 emit_operand(rax, dst); 2330 } 2331 #endif 2332 2333 void Assembler::prefetch_prefix(Address src) { 2334 prefix(src); 2335 emit_byte(0x0F); 2336 } 2337 2338 void Assembler::prefetchnta(Address src) { 2339 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2340 InstructionMark im(this); 2341 prefetch_prefix(src); 2342 emit_byte(0x18); 2343 emit_operand(rax, src); // 0, src 2344 } 2345 2346 void Assembler::prefetchr(Address src) { 2347 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2348 InstructionMark im(this); 2349 prefetch_prefix(src); 2350 emit_byte(0x0D); 2351 emit_operand(rax, src); // 0, src 2352 } 2353 2354 void Assembler::prefetcht0(Address src) { 2355 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2356 InstructionMark im(this); 2357 prefetch_prefix(src); 2358 emit_byte(0x18); 2359 emit_operand(rcx, src); // 1, src 2360 } 2361 2362 void Assembler::prefetcht1(Address src) { 2363 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2364 InstructionMark im(this); 2365 prefetch_prefix(src); 2366 emit_byte(0x18); 2367 emit_operand(rdx, src); // 2, src 2368 } 2369 2370 void Assembler::prefetcht2(Address src) { 2371 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2372 InstructionMark im(this); 2373 prefetch_prefix(src); 2374 emit_byte(0x18); 2375 emit_operand(rbx, src); // 3, src 2376 } 2377 2378 void Assembler::prefetchw(Address src) { 2379 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2380 InstructionMark im(this); 2381 prefetch_prefix(src); 2382 emit_byte(0x0D); 2383 emit_operand(rcx, src); // 1, src 2384 } 2385 2386 void Assembler::prefix(Prefix p) { 2387 a_byte(p); 2388 } 2389 2390 void Assembler::por(XMMRegister dst, XMMRegister src) { 2391 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2392 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2393 emit_byte(0xEB); 2394 emit_byte(0xC0 | encode); 2395 } 2396 2397 void Assembler::por(XMMRegister dst, Address src) { 2398 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2399 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2400 InstructionMark im(this); 2401 simd_prefix(dst, dst, src, VEX_SIMD_66); 2402 emit_byte(0xEB); 2403 emit_operand(dst, src); 2404 } 2405 2406 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 2407 assert(isByte(mode), "invalid value"); 2408 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2409 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 2410 emit_byte(0x70); 2411 emit_byte(0xC0 | encode); 2412 emit_byte(mode & 0xFF); 2413 2414 } 2415 2416 void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 2417 assert(isByte(mode), "invalid value"); 2418 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2419 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2420 InstructionMark im(this); 2421 simd_prefix(dst, src, VEX_SIMD_66); 2422 emit_byte(0x70); 2423 emit_operand(dst, src); 2424 emit_byte(mode & 0xFF); 2425 } 2426 2427 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 2428 assert(isByte(mode), "invalid value"); 2429 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2430 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); 2431 emit_byte(0x70); 2432 emit_byte(0xC0 | encode); 2433 emit_byte(mode & 0xFF); 2434 } 2435 2436 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 2437 assert(isByte(mode), "invalid value"); 2438 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2439 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2440 InstructionMark im(this); 2441 simd_prefix(dst, src, VEX_SIMD_F2); 2442 emit_byte(0x70); 2443 emit_operand(dst, src); 2444 emit_byte(mode & 0xFF); 2445 } 2446 2447 void Assembler::psrlq(XMMRegister dst, int shift) { 2448 // Shift 64 bit value logically right by specified number of bits. 2449 // HMM Table D-1 says sse2 or mmx. 2450 // Do not confuse it with psrldq SSE2 instruction which 2451 // shifts 128 bit value in xmm register by number of bytes. 2452 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2453 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 2454 emit_byte(0x73); 2455 emit_byte(0xC0 | encode); 2456 emit_byte(shift); 2457 } 2458 2459 void Assembler::psrldq(XMMRegister dst, int shift) { 2460 // Shift 128 bit value in xmm register by number of bytes. 2461 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2462 int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66); 2463 emit_byte(0x73); 2464 emit_byte(0xC0 | encode); 2465 emit_byte(shift); 2466 } 2467 2468 void Assembler::ptest(XMMRegister dst, Address src) { 2469 assert(VM_Version::supports_sse4_1(), ""); 2470 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2471 InstructionMark im(this); 2472 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2473 emit_byte(0x17); 2474 emit_operand(dst, src); 2475 } 2476 2477 void Assembler::ptest(XMMRegister dst, XMMRegister src) { 2478 assert(VM_Version::supports_sse4_1(), ""); 2479 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2480 emit_byte(0x17); 2481 emit_byte(0xC0 | encode); 2482 } 2483 2484 void Assembler::punpcklbw(XMMRegister dst, Address src) { 2485 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2486 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2487 InstructionMark im(this); 2488 simd_prefix(dst, dst, src, VEX_SIMD_66); 2489 emit_byte(0x60); 2490 emit_operand(dst, src); 2491 } 2492 2493 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 2494 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2495 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2496 emit_byte(0x60); 2497 emit_byte(0xC0 | encode); 2498 } 2499 2500 void Assembler::punpckldq(XMMRegister dst, Address src) { 2501 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2502 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2503 InstructionMark im(this); 2504 simd_prefix(dst, dst, src, VEX_SIMD_66); 2505 emit_byte(0x62); 2506 emit_operand(dst, src); 2507 } 2508 2509 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { 2510 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2511 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2512 emit_byte(0x62); 2513 emit_byte(0xC0 | encode); 2514 } 2515 2516 void Assembler::push(int32_t imm32) { 2517 // in 64bits we push 64bits onto the stack but only 2518 // take a 32bit immediate 2519 emit_byte(0x68); 2520 emit_long(imm32); 2521 } 2522 2523 void Assembler::push(Register src) { 2524 int encode = prefix_and_encode(src->encoding()); 2525 2526 emit_byte(0x50 | encode); 2527 } 2528 2529 void Assembler::pushf() { 2530 emit_byte(0x9C); 2531 } 2532 2533 #ifndef _LP64 // no 32bit push/pop on amd64 2534 void Assembler::pushl(Address src) { 2535 // Note this will push 64bit on 64bit 2536 InstructionMark im(this); 2537 prefix(src); 2538 emit_byte(0xFF); 2539 emit_operand(rsi, src); 2540 } 2541 #endif 2542 2543 void Assembler::pxor(XMMRegister dst, Address src) { 2544 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2545 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2546 InstructionMark im(this); 2547 simd_prefix(dst, dst, src, VEX_SIMD_66); 2548 emit_byte(0xEF); 2549 emit_operand(dst, src); 2550 } 2551 2552 void Assembler::pxor(XMMRegister dst, XMMRegister src) { 2553 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2554 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2555 emit_byte(0xEF); 2556 emit_byte(0xC0 | encode); 2557 } 2558 2559 void Assembler::rcll(Register dst, int imm8) { 2560 assert(isShiftCount(imm8), "illegal shift count"); 2561 int encode = prefix_and_encode(dst->encoding()); 2562 if (imm8 == 1) { 2563 emit_byte(0xD1); 2564 emit_byte(0xD0 | encode); 2565 } else { 2566 emit_byte(0xC1); 2567 emit_byte(0xD0 | encode); 2568 emit_byte(imm8); 2569 } 2570 } 2571 2572 // copies data from [esi] to [edi] using rcx pointer sized words 2573 // generic 2574 void Assembler::rep_mov() { 2575 emit_byte(0xF3); 2576 // MOVSQ 2577 LP64_ONLY(prefix(REX_W)); 2578 emit_byte(0xA5); 2579 } 2580 2581 // sets rcx pointer sized words with rax, value at [edi] 2582 // generic 2583 void Assembler::rep_set() { // rep_set 2584 emit_byte(0xF3); 2585 // STOSQ 2586 LP64_ONLY(prefix(REX_W)); 2587 emit_byte(0xAB); 2588 } 2589 2590 // scans rcx pointer sized words at [edi] for occurance of rax, 2591 // generic 2592 void Assembler::repne_scan() { // repne_scan 2593 emit_byte(0xF2); 2594 // SCASQ 2595 LP64_ONLY(prefix(REX_W)); 2596 emit_byte(0xAF); 2597 } 2598 2599 #ifdef _LP64 2600 // scans rcx 4 byte words at [edi] for occurance of rax, 2601 // generic 2602 void Assembler::repne_scanl() { // repne_scan 2603 emit_byte(0xF2); 2604 // SCASL 2605 emit_byte(0xAF); 2606 } 2607 #endif 2608 2609 void Assembler::ret(int imm16) { 2610 if (imm16 == 0) { 2611 emit_byte(0xC3); 2612 } else { 2613 emit_byte(0xC2); 2614 emit_word(imm16); 2615 } 2616 } 2617 2618 void Assembler::sahf() { 2619 #ifdef _LP64 2620 // Not supported in 64bit mode 2621 ShouldNotReachHere(); 2622 #endif 2623 emit_byte(0x9E); 2624 } 2625 2626 void Assembler::sarl(Register dst, int imm8) { 2627 int encode = prefix_and_encode(dst->encoding()); 2628 assert(isShiftCount(imm8), "illegal shift count"); 2629 if (imm8 == 1) { 2630 emit_byte(0xD1); 2631 emit_byte(0xF8 | encode); 2632 } else { 2633 emit_byte(0xC1); 2634 emit_byte(0xF8 | encode); 2635 emit_byte(imm8); 2636 } 2637 } 2638 2639 void Assembler::sarl(Register dst) { 2640 int encode = prefix_and_encode(dst->encoding()); 2641 emit_byte(0xD3); 2642 emit_byte(0xF8 | encode); 2643 } 2644 2645 void Assembler::sbbl(Address dst, int32_t imm32) { 2646 InstructionMark im(this); 2647 prefix(dst); 2648 emit_arith_operand(0x81, rbx, dst, imm32); 2649 } 2650 2651 void Assembler::sbbl(Register dst, int32_t imm32) { 2652 prefix(dst); 2653 emit_arith(0x81, 0xD8, dst, imm32); 2654 } 2655 2656 2657 void Assembler::sbbl(Register dst, Address src) { 2658 InstructionMark im(this); 2659 prefix(src, dst); 2660 emit_byte(0x1B); 2661 emit_operand(dst, src); 2662 } 2663 2664 void Assembler::sbbl(Register dst, Register src) { 2665 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2666 emit_arith(0x1B, 0xC0, dst, src); 2667 } 2668 2669 void Assembler::setb(Condition cc, Register dst) { 2670 assert(0 <= cc && cc < 16, "illegal cc"); 2671 int encode = prefix_and_encode(dst->encoding(), true); 2672 emit_byte(0x0F); 2673 emit_byte(0x90 | cc); 2674 emit_byte(0xC0 | encode); 2675 } 2676 2677 void Assembler::shll(Register dst, int imm8) { 2678 assert(isShiftCount(imm8), "illegal shift count"); 2679 int encode = prefix_and_encode(dst->encoding()); 2680 if (imm8 == 1 ) { 2681 emit_byte(0xD1); 2682 emit_byte(0xE0 | encode); 2683 } else { 2684 emit_byte(0xC1); 2685 emit_byte(0xE0 | encode); 2686 emit_byte(imm8); 2687 } 2688 } 2689 2690 void Assembler::shll(Register dst) { 2691 int encode = prefix_and_encode(dst->encoding()); 2692 emit_byte(0xD3); 2693 emit_byte(0xE0 | encode); 2694 } 2695 2696 void Assembler::shrl(Register dst, int imm8) { 2697 assert(isShiftCount(imm8), "illegal shift count"); 2698 int encode = prefix_and_encode(dst->encoding()); 2699 emit_byte(0xC1); 2700 emit_byte(0xE8 | encode); 2701 emit_byte(imm8); 2702 } 2703 2704 void Assembler::shrl(Register dst) { 2705 int encode = prefix_and_encode(dst->encoding()); 2706 emit_byte(0xD3); 2707 emit_byte(0xE8 | encode); 2708 } 2709 2710 // copies a single word from [esi] to [edi] 2711 void Assembler::smovl() { 2712 emit_byte(0xA5); 2713 } 2714 2715 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { 2716 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2717 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 2718 emit_byte(0x51); 2719 emit_byte(0xC0 | encode); 2720 } 2721 2722 void Assembler::sqrtsd(XMMRegister dst, Address src) { 2723 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2724 InstructionMark im(this); 2725 simd_prefix(dst, dst, src, VEX_SIMD_F2); 2726 emit_byte(0x51); 2727 emit_operand(dst, src); 2728 } 2729 2730 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { 2731 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2732 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 2733 emit_byte(0x51); 2734 emit_byte(0xC0 | encode); 2735 } 2736 2737 void Assembler::sqrtss(XMMRegister dst, Address src) { 2738 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2739 InstructionMark im(this); 2740 simd_prefix(dst, dst, src, VEX_SIMD_F3); 2741 emit_byte(0x51); 2742 emit_operand(dst, src); 2743 } 2744 2745 void Assembler::stmxcsr( Address dst) { 2746 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2747 InstructionMark im(this); 2748 prefix(dst); 2749 emit_byte(0x0F); 2750 emit_byte(0xAE); 2751 emit_operand(as_Register(3), dst); 2752 } 2753 2754 void Assembler::subl(Address dst, int32_t imm32) { 2755 InstructionMark im(this); 2756 prefix(dst); 2757 emit_arith_operand(0x81, rbp, dst, imm32); 2758 } 2759 2760 void Assembler::subl(Address dst, Register src) { 2761 InstructionMark im(this); 2762 prefix(dst, src); 2763 emit_byte(0x29); 2764 emit_operand(src, dst); 2765 } 2766 2767 void Assembler::subl(Register dst, int32_t imm32) { 2768 prefix(dst); 2769 emit_arith(0x81, 0xE8, dst, imm32); 2770 } 2771 2772 void Assembler::subl(Register dst, Address src) { 2773 InstructionMark im(this); 2774 prefix(src, dst); 2775 emit_byte(0x2B); 2776 emit_operand(dst, src); 2777 } 2778 2779 void Assembler::subl(Register dst, Register src) { 2780 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2781 emit_arith(0x2B, 0xC0, dst, src); 2782 } 2783 2784 void Assembler::subsd(XMMRegister dst, XMMRegister src) { 2785 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2786 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 2787 emit_byte(0x5C); 2788 emit_byte(0xC0 | encode); 2789 } 2790 2791 void Assembler::subsd(XMMRegister dst, Address src) { 2792 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2793 InstructionMark im(this); 2794 simd_prefix(dst, dst, src, VEX_SIMD_F2); 2795 emit_byte(0x5C); 2796 emit_operand(dst, src); 2797 } 2798 2799 void Assembler::subss(XMMRegister dst, XMMRegister src) { 2800 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2801 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 2802 emit_byte(0x5C); 2803 emit_byte(0xC0 | encode); 2804 } 2805 2806 void Assembler::subss(XMMRegister dst, Address src) { 2807 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2808 InstructionMark im(this); 2809 simd_prefix(dst, dst, src, VEX_SIMD_F3); 2810 emit_byte(0x5C); 2811 emit_operand(dst, src); 2812 } 2813 2814 void Assembler::testb(Register dst, int imm8) { 2815 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 2816 (void) prefix_and_encode(dst->encoding(), true); 2817 emit_arith_b(0xF6, 0xC0, dst, imm8); 2818 } 2819 2820 void Assembler::testl(Register dst, int32_t imm32) { 2821 // not using emit_arith because test 2822 // doesn't support sign-extension of 2823 // 8bit operands 2824 int encode = dst->encoding(); 2825 if (encode == 0) { 2826 emit_byte(0xA9); 2827 } else { 2828 encode = prefix_and_encode(encode); 2829 emit_byte(0xF7); 2830 emit_byte(0xC0 | encode); 2831 } 2832 emit_long(imm32); 2833 } 2834 2835 void Assembler::testl(Register dst, Register src) { 2836 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2837 emit_arith(0x85, 0xC0, dst, src); 2838 } 2839 2840 void Assembler::testl(Register dst, Address src) { 2841 InstructionMark im(this); 2842 prefix(src, dst); 2843 emit_byte(0x85); 2844 emit_operand(dst, src); 2845 } 2846 2847 void Assembler::ucomisd(XMMRegister dst, Address src) { 2848 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2849 InstructionMark im(this); 2850 simd_prefix(dst, src, VEX_SIMD_66); 2851 emit_byte(0x2E); 2852 emit_operand(dst, src); 2853 } 2854 2855 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { 2856 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2857 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 2858 emit_byte(0x2E); 2859 emit_byte(0xC0 | encode); 2860 } 2861 2862 void Assembler::ucomiss(XMMRegister dst, Address src) { 2863 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2864 InstructionMark im(this); 2865 simd_prefix(dst, src, VEX_SIMD_NONE); 2866 emit_byte(0x2E); 2867 emit_operand(dst, src); 2868 } 2869 2870 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { 2871 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2872 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 2873 emit_byte(0x2E); 2874 emit_byte(0xC0 | encode); 2875 } 2876 2877 2878 void Assembler::xaddl(Address dst, Register src) { 2879 InstructionMark im(this); 2880 prefix(dst, src); 2881 emit_byte(0x0F); 2882 emit_byte(0xC1); 2883 emit_operand(src, dst); 2884 } 2885 2886 void Assembler::xchgl(Register dst, Address src) { // xchg 2887 InstructionMark im(this); 2888 prefix(src, dst); 2889 emit_byte(0x87); 2890 emit_operand(dst, src); 2891 } 2892 2893 void Assembler::xchgl(Register dst, Register src) { 2894 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2895 emit_byte(0x87); 2896 emit_byte(0xc0 | encode); 2897 } 2898 2899 void Assembler::xorl(Register dst, int32_t imm32) { 2900 prefix(dst); 2901 emit_arith(0x81, 0xF0, dst, imm32); 2902 } 2903 2904 void Assembler::xorl(Register dst, Address src) { 2905 InstructionMark im(this); 2906 prefix(src, dst); 2907 emit_byte(0x33); 2908 emit_operand(dst, src); 2909 } 2910 2911 void Assembler::xorl(Register dst, Register src) { 2912 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2913 emit_arith(0x33, 0xC0, dst, src); 2914 } 2915 2916 void Assembler::xorpd(XMMRegister dst, XMMRegister src) { 2917 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2918 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2919 emit_byte(0x57); 2920 emit_byte(0xC0 | encode); 2921 } 2922 2923 void Assembler::xorpd(XMMRegister dst, Address src) { 2924 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2925 InstructionMark im(this); 2926 simd_prefix(dst, dst, src, VEX_SIMD_66); 2927 emit_byte(0x57); 2928 emit_operand(dst, src); 2929 } 2930 2931 2932 void Assembler::xorps(XMMRegister dst, XMMRegister src) { 2933 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2934 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); 2935 emit_byte(0x57); 2936 emit_byte(0xC0 | encode); 2937 } 2938 2939 void Assembler::xorps(XMMRegister dst, Address src) { 2940 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2941 InstructionMark im(this); 2942 simd_prefix(dst, dst, src, VEX_SIMD_NONE); 2943 emit_byte(0x57); 2944 emit_operand(dst, src); 2945 } 2946 2947 #ifndef _LP64 2948 // 32bit only pieces of the assembler 2949 2950 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { 2951 // NO PREFIX AS NEVER 64BIT 2952 InstructionMark im(this); 2953 emit_byte(0x81); 2954 emit_byte(0xF8 | src1->encoding()); 2955 emit_data(imm32, rspec, 0); 2956 } 2957 2958 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { 2959 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs 2960 InstructionMark im(this); 2961 emit_byte(0x81); 2962 emit_operand(rdi, src1); 2963 emit_data(imm32, rspec, 0); 2964 } 2965 2966 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax, 2967 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded 2968 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. 2969 void Assembler::cmpxchg8(Address adr) { 2970 InstructionMark im(this); 2971 emit_byte(0x0F); 2972 emit_byte(0xc7); 2973 emit_operand(rcx, adr); 2974 } 2975 2976 void Assembler::decl(Register dst) { 2977 // Don't use it directly. Use MacroAssembler::decrementl() instead. 2978 emit_byte(0x48 | dst->encoding()); 2979 } 2980 2981 #endif // _LP64 2982 2983 // 64bit typically doesn't use the x87 but needs to for the trig funcs 2984 2985 void Assembler::fabs() { 2986 emit_byte(0xD9); 2987 emit_byte(0xE1); 2988 } 2989 2990 void Assembler::fadd(int i) { 2991 emit_farith(0xD8, 0xC0, i); 2992 } 2993 2994 void Assembler::fadd_d(Address src) { 2995 InstructionMark im(this); 2996 emit_byte(0xDC); 2997 emit_operand32(rax, src); 2998 } 2999 3000 void Assembler::fadd_s(Address src) { 3001 InstructionMark im(this); 3002 emit_byte(0xD8); 3003 emit_operand32(rax, src); 3004 } 3005 3006 void Assembler::fadda(int i) { 3007 emit_farith(0xDC, 0xC0, i); 3008 } 3009 3010 void Assembler::faddp(int i) { 3011 emit_farith(0xDE, 0xC0, i); 3012 } 3013 3014 void Assembler::fchs() { 3015 emit_byte(0xD9); 3016 emit_byte(0xE0); 3017 } 3018 3019 void Assembler::fcom(int i) { 3020 emit_farith(0xD8, 0xD0, i); 3021 } 3022 3023 void Assembler::fcomp(int i) { 3024 emit_farith(0xD8, 0xD8, i); 3025 } 3026 3027 void Assembler::fcomp_d(Address src) { 3028 InstructionMark im(this); 3029 emit_byte(0xDC); 3030 emit_operand32(rbx, src); 3031 } 3032 3033 void Assembler::fcomp_s(Address src) { 3034 InstructionMark im(this); 3035 emit_byte(0xD8); 3036 emit_operand32(rbx, src); 3037 } 3038 3039 void Assembler::fcompp() { 3040 emit_byte(0xDE); 3041 emit_byte(0xD9); 3042 } 3043 3044 void Assembler::fcos() { 3045 emit_byte(0xD9); 3046 emit_byte(0xFF); 3047 } 3048 3049 void Assembler::fdecstp() { 3050 emit_byte(0xD9); 3051 emit_byte(0xF6); 3052 } 3053 3054 void Assembler::fdiv(int i) { 3055 emit_farith(0xD8, 0xF0, i); 3056 } 3057 3058 void Assembler::fdiv_d(Address src) { 3059 InstructionMark im(this); 3060 emit_byte(0xDC); 3061 emit_operand32(rsi, src); 3062 } 3063 3064 void Assembler::fdiv_s(Address src) { 3065 InstructionMark im(this); 3066 emit_byte(0xD8); 3067 emit_operand32(rsi, src); 3068 } 3069 3070 void Assembler::fdiva(int i) { 3071 emit_farith(0xDC, 0xF8, i); 3072 } 3073 3074 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) 3075 // is erroneous for some of the floating-point instructions below. 3076 3077 void Assembler::fdivp(int i) { 3078 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) 3079 } 3080 3081 void Assembler::fdivr(int i) { 3082 emit_farith(0xD8, 0xF8, i); 3083 } 3084 3085 void Assembler::fdivr_d(Address src) { 3086 InstructionMark im(this); 3087 emit_byte(0xDC); 3088 emit_operand32(rdi, src); 3089 } 3090 3091 void Assembler::fdivr_s(Address src) { 3092 InstructionMark im(this); 3093 emit_byte(0xD8); 3094 emit_operand32(rdi, src); 3095 } 3096 3097 void Assembler::fdivra(int i) { 3098 emit_farith(0xDC, 0xF0, i); 3099 } 3100 3101 void Assembler::fdivrp(int i) { 3102 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) 3103 } 3104 3105 void Assembler::ffree(int i) { 3106 emit_farith(0xDD, 0xC0, i); 3107 } 3108 3109 void Assembler::fild_d(Address adr) { 3110 InstructionMark im(this); 3111 emit_byte(0xDF); 3112 emit_operand32(rbp, adr); 3113 } 3114 3115 void Assembler::fild_s(Address adr) { 3116 InstructionMark im(this); 3117 emit_byte(0xDB); 3118 emit_operand32(rax, adr); 3119 } 3120 3121 void Assembler::fincstp() { 3122 emit_byte(0xD9); 3123 emit_byte(0xF7); 3124 } 3125 3126 void Assembler::finit() { 3127 emit_byte(0x9B); 3128 emit_byte(0xDB); 3129 emit_byte(0xE3); 3130 } 3131 3132 void Assembler::fist_s(Address adr) { 3133 InstructionMark im(this); 3134 emit_byte(0xDB); 3135 emit_operand32(rdx, adr); 3136 } 3137 3138 void Assembler::fistp_d(Address adr) { 3139 InstructionMark im(this); 3140 emit_byte(0xDF); 3141 emit_operand32(rdi, adr); 3142 } 3143 3144 void Assembler::fistp_s(Address adr) { 3145 InstructionMark im(this); 3146 emit_byte(0xDB); 3147 emit_operand32(rbx, adr); 3148 } 3149 3150 void Assembler::fld1() { 3151 emit_byte(0xD9); 3152 emit_byte(0xE8); 3153 } 3154 3155 void Assembler::fld_d(Address adr) { 3156 InstructionMark im(this); 3157 emit_byte(0xDD); 3158 emit_operand32(rax, adr); 3159 } 3160 3161 void Assembler::fld_s(Address adr) { 3162 InstructionMark im(this); 3163 emit_byte(0xD9); 3164 emit_operand32(rax, adr); 3165 } 3166 3167 3168 void Assembler::fld_s(int index) { 3169 emit_farith(0xD9, 0xC0, index); 3170 } 3171 3172 void Assembler::fld_x(Address adr) { 3173 InstructionMark im(this); 3174 emit_byte(0xDB); 3175 emit_operand32(rbp, adr); 3176 } 3177 3178 void Assembler::fldcw(Address src) { 3179 InstructionMark im(this); 3180 emit_byte(0xd9); 3181 emit_operand32(rbp, src); 3182 } 3183 3184 void Assembler::fldenv(Address src) { 3185 InstructionMark im(this); 3186 emit_byte(0xD9); 3187 emit_operand32(rsp, src); 3188 } 3189 3190 void Assembler::fldlg2() { 3191 emit_byte(0xD9); 3192 emit_byte(0xEC); 3193 } 3194 3195 void Assembler::fldln2() { 3196 emit_byte(0xD9); 3197 emit_byte(0xED); 3198 } 3199 3200 void Assembler::fldz() { 3201 emit_byte(0xD9); 3202 emit_byte(0xEE); 3203 } 3204 3205 void Assembler::flog() { 3206 fldln2(); 3207 fxch(); 3208 fyl2x(); 3209 } 3210 3211 void Assembler::flog10() { 3212 fldlg2(); 3213 fxch(); 3214 fyl2x(); 3215 } 3216 3217 void Assembler::fmul(int i) { 3218 emit_farith(0xD8, 0xC8, i); 3219 } 3220 3221 void Assembler::fmul_d(Address src) { 3222 InstructionMark im(this); 3223 emit_byte(0xDC); 3224 emit_operand32(rcx, src); 3225 } 3226 3227 void Assembler::fmul_s(Address src) { 3228 InstructionMark im(this); 3229 emit_byte(0xD8); 3230 emit_operand32(rcx, src); 3231 } 3232 3233 void Assembler::fmula(int i) { 3234 emit_farith(0xDC, 0xC8, i); 3235 } 3236 3237 void Assembler::fmulp(int i) { 3238 emit_farith(0xDE, 0xC8, i); 3239 } 3240 3241 void Assembler::fnsave(Address dst) { 3242 InstructionMark im(this); 3243 emit_byte(0xDD); 3244 emit_operand32(rsi, dst); 3245 } 3246 3247 void Assembler::fnstcw(Address src) { 3248 InstructionMark im(this); 3249 emit_byte(0x9B); 3250 emit_byte(0xD9); 3251 emit_operand32(rdi, src); 3252 } 3253 3254 void Assembler::fnstsw_ax() { 3255 emit_byte(0xdF); 3256 emit_byte(0xE0); 3257 } 3258 3259 void Assembler::fprem() { 3260 emit_byte(0xD9); 3261 emit_byte(0xF8); 3262 } 3263 3264 void Assembler::fprem1() { 3265 emit_byte(0xD9); 3266 emit_byte(0xF5); 3267 } 3268 3269 void Assembler::frstor(Address src) { 3270 InstructionMark im(this); 3271 emit_byte(0xDD); 3272 emit_operand32(rsp, src); 3273 } 3274 3275 void Assembler::fsin() { 3276 emit_byte(0xD9); 3277 emit_byte(0xFE); 3278 } 3279 3280 void Assembler::fsqrt() { 3281 emit_byte(0xD9); 3282 emit_byte(0xFA); 3283 } 3284 3285 void Assembler::fst_d(Address adr) { 3286 InstructionMark im(this); 3287 emit_byte(0xDD); 3288 emit_operand32(rdx, adr); 3289 } 3290 3291 void Assembler::fst_s(Address adr) { 3292 InstructionMark im(this); 3293 emit_byte(0xD9); 3294 emit_operand32(rdx, adr); 3295 } 3296 3297 void Assembler::fstp_d(Address adr) { 3298 InstructionMark im(this); 3299 emit_byte(0xDD); 3300 emit_operand32(rbx, adr); 3301 } 3302 3303 void Assembler::fstp_d(int index) { 3304 emit_farith(0xDD, 0xD8, index); 3305 } 3306 3307 void Assembler::fstp_s(Address adr) { 3308 InstructionMark im(this); 3309 emit_byte(0xD9); 3310 emit_operand32(rbx, adr); 3311 } 3312 3313 void Assembler::fstp_x(Address adr) { 3314 InstructionMark im(this); 3315 emit_byte(0xDB); 3316 emit_operand32(rdi, adr); 3317 } 3318 3319 void Assembler::fsub(int i) { 3320 emit_farith(0xD8, 0xE0, i); 3321 } 3322 3323 void Assembler::fsub_d(Address src) { 3324 InstructionMark im(this); 3325 emit_byte(0xDC); 3326 emit_operand32(rsp, src); 3327 } 3328 3329 void Assembler::fsub_s(Address src) { 3330 InstructionMark im(this); 3331 emit_byte(0xD8); 3332 emit_operand32(rsp, src); 3333 } 3334 3335 void Assembler::fsuba(int i) { 3336 emit_farith(0xDC, 0xE8, i); 3337 } 3338 3339 void Assembler::fsubp(int i) { 3340 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) 3341 } 3342 3343 void Assembler::fsubr(int i) { 3344 emit_farith(0xD8, 0xE8, i); 3345 } 3346 3347 void Assembler::fsubr_d(Address src) { 3348 InstructionMark im(this); 3349 emit_byte(0xDC); 3350 emit_operand32(rbp, src); 3351 } 3352 3353 void Assembler::fsubr_s(Address src) { 3354 InstructionMark im(this); 3355 emit_byte(0xD8); 3356 emit_operand32(rbp, src); 3357 } 3358 3359 void Assembler::fsubra(int i) { 3360 emit_farith(0xDC, 0xE0, i); 3361 } 3362 3363 void Assembler::fsubrp(int i) { 3364 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) 3365 } 3366 3367 void Assembler::ftan() { 3368 emit_byte(0xD9); 3369 emit_byte(0xF2); 3370 emit_byte(0xDD); 3371 emit_byte(0xD8); 3372 } 3373 3374 void Assembler::ftst() { 3375 emit_byte(0xD9); 3376 emit_byte(0xE4); 3377 } 3378 3379 void Assembler::fucomi(int i) { 3380 // make sure the instruction is supported (introduced for P6, together with cmov) 3381 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3382 emit_farith(0xDB, 0xE8, i); 3383 } 3384 3385 void Assembler::fucomip(int i) { 3386 // make sure the instruction is supported (introduced for P6, together with cmov) 3387 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3388 emit_farith(0xDF, 0xE8, i); 3389 } 3390 3391 void Assembler::fwait() { 3392 emit_byte(0x9B); 3393 } 3394 3395 void Assembler::fxch(int i) { 3396 emit_farith(0xD9, 0xC8, i); 3397 } 3398 3399 void Assembler::fyl2x() { 3400 emit_byte(0xD9); 3401 emit_byte(0xF1); 3402 } 3403 3404 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; 3405 static int simd_opc[4] = { 0, 0, 0x38, 0x3A }; 3406 3407 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 3408 if (pre > 0) { 3409 emit_byte(simd_pre[pre]); 3410 } 3411 if (rex_w) { 3412 prefixq(adr, xreg); 3413 } else { 3414 prefix(adr, xreg); 3415 } 3416 if (opc > 0) { 3417 emit_byte(0x0F); 3418 int opc2 = simd_opc[opc]; 3419 if (opc2 > 0) { 3420 emit_byte(opc2); 3421 } 3422 } 3423 } 3424 3425 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 3426 if (pre > 0) { 3427 emit_byte(simd_pre[pre]); 3428 } 3429 int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : 3430 prefix_and_encode(dst_enc, src_enc); 3431 if (opc > 0) { 3432 emit_byte(0x0F); 3433 int opc2 = simd_opc[opc]; 3434 if (opc2 > 0) { 3435 emit_byte(opc2); 3436 } 3437 } 3438 return encode; 3439 } 3440 3441 3442 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) { 3443 if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { 3444 prefix(VEX_3bytes); 3445 3446 int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0); 3447 byte1 = (~byte1) & 0xE0; 3448 byte1 |= opc; 3449 a_byte(byte1); 3450 3451 int byte2 = ((~nds_enc) & 0xf) << 3; 3452 byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre; 3453 emit_byte(byte2); 3454 } else { 3455 prefix(VEX_2bytes); 3456 3457 int byte1 = vex_r ? VEX_R : 0; 3458 byte1 = (~byte1) & 0x80; 3459 byte1 |= ((~nds_enc) & 0xf) << 3; 3460 byte1 |= (vector256 ? 4 : 0) | pre; 3461 emit_byte(byte1); 3462 } 3463 } 3464 3465 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){ 3466 bool vex_r = (xreg_enc >= 8); 3467 bool vex_b = adr.base_needs_rex(); 3468 bool vex_x = adr.index_needs_rex(); 3469 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 3470 } 3471 3472 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) { 3473 bool vex_r = (dst_enc >= 8); 3474 bool vex_b = (src_enc >= 8); 3475 bool vex_x = false; 3476 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 3477 return (((dst_enc & 7) << 3) | (src_enc & 7)); 3478 } 3479 3480 3481 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 3482 if (UseAVX > 0) { 3483 int xreg_enc = xreg->encoding(); 3484 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 3485 vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256); 3486 } else { 3487 assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); 3488 rex_prefix(adr, xreg, pre, opc, rex_w); 3489 } 3490 } 3491 3492 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 3493 int dst_enc = dst->encoding(); 3494 int src_enc = src->encoding(); 3495 if (UseAVX > 0) { 3496 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 3497 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256); 3498 } else { 3499 assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); 3500 return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); 3501 } 3502 } 3503 3504 #ifndef _LP64 3505 3506 void Assembler::incl(Register dst) { 3507 // Don't use it directly. Use MacroAssembler::incrementl() instead. 3508 emit_byte(0x40 | dst->encoding()); 3509 } 3510 3511 void Assembler::lea(Register dst, Address src) { 3512 leal(dst, src); 3513 } 3514 3515 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { 3516 InstructionMark im(this); 3517 emit_byte(0xC7); 3518 emit_operand(rax, dst); 3519 emit_data((int)imm32, rspec, 0); 3520 } 3521 3522 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { 3523 InstructionMark im(this); 3524 int encode = prefix_and_encode(dst->encoding()); 3525 emit_byte(0xB8 | encode); 3526 emit_data((int)imm32, rspec, 0); 3527 } 3528 3529 void Assembler::popa() { // 32bit 3530 emit_byte(0x61); 3531 } 3532 3533 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { 3534 InstructionMark im(this); 3535 emit_byte(0x68); 3536 emit_data(imm32, rspec, 0); 3537 } 3538 3539 void Assembler::pusha() { // 32bit 3540 emit_byte(0x60); 3541 } 3542 3543 void Assembler::set_byte_if_not_zero(Register dst) { 3544 emit_byte(0x0F); 3545 emit_byte(0x95); 3546 emit_byte(0xE0 | dst->encoding()); 3547 } 3548 3549 void Assembler::shldl(Register dst, Register src) { 3550 emit_byte(0x0F); 3551 emit_byte(0xA5); 3552 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3553 } 3554 3555 void Assembler::shrdl(Register dst, Register src) { 3556 emit_byte(0x0F); 3557 emit_byte(0xAD); 3558 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3559 } 3560 3561 #else // LP64 3562 3563 void Assembler::set_byte_if_not_zero(Register dst) { 3564 int enc = prefix_and_encode(dst->encoding(), true); 3565 emit_byte(0x0F); 3566 emit_byte(0x95); 3567 emit_byte(0xE0 | enc); 3568 } 3569 3570 // 64bit only pieces of the assembler 3571 // This should only be used by 64bit instructions that can use rip-relative 3572 // it cannot be used by instructions that want an immediate value. 3573 3574 bool Assembler::reachable(AddressLiteral adr) { 3575 int64_t disp; 3576 // None will force a 64bit literal to the code stream. Likely a placeholder 3577 // for something that will be patched later and we need to certain it will 3578 // always be reachable. 3579 if (adr.reloc() == relocInfo::none) { 3580 return false; 3581 } 3582 if (adr.reloc() == relocInfo::internal_word_type) { 3583 // This should be rip relative and easily reachable. 3584 return true; 3585 } 3586 if (adr.reloc() == relocInfo::virtual_call_type || 3587 adr.reloc() == relocInfo::opt_virtual_call_type || 3588 adr.reloc() == relocInfo::static_call_type || 3589 adr.reloc() == relocInfo::static_stub_type ) { 3590 // This should be rip relative within the code cache and easily 3591 // reachable until we get huge code caches. (At which point 3592 // ic code is going to have issues). 3593 return true; 3594 } 3595 if (adr.reloc() != relocInfo::external_word_type && 3596 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special 3597 adr.reloc() != relocInfo::poll_type && // relocs to identify them 3598 adr.reloc() != relocInfo::runtime_call_type ) { 3599 return false; 3600 } 3601 3602 // Stress the correction code 3603 if (ForceUnreachable) { 3604 // Must be runtimecall reloc, see if it is in the codecache 3605 // Flipping stuff in the codecache to be unreachable causes issues 3606 // with things like inline caches where the additional instructions 3607 // are not handled. 3608 if (CodeCache::find_blob(adr._target) == NULL) { 3609 return false; 3610 } 3611 } 3612 // For external_word_type/runtime_call_type if it is reachable from where we 3613 // are now (possibly a temp buffer) and where we might end up 3614 // anywhere in the codeCache then we are always reachable. 3615 // This would have to change if we ever save/restore shared code 3616 // to be more pessimistic. 3617 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int)); 3618 if (!is_simm32(disp)) return false; 3619 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int)); 3620 if (!is_simm32(disp)) return false; 3621 3622 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int)); 3623 3624 // Because rip relative is a disp + address_of_next_instruction and we 3625 // don't know the value of address_of_next_instruction we apply a fudge factor 3626 // to make sure we will be ok no matter the size of the instruction we get placed into. 3627 // We don't have to fudge the checks above here because they are already worst case. 3628 3629 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal 3630 // + 4 because better safe than sorry. 3631 const int fudge = 12 + 4; 3632 if (disp < 0) { 3633 disp -= fudge; 3634 } else { 3635 disp += fudge; 3636 } 3637 return is_simm32(disp); 3638 } 3639 3640 // Check if the polling page is not reachable from the code cache using rip-relative 3641 // addressing. 3642 bool Assembler::is_polling_page_far() { 3643 intptr_t addr = (intptr_t)os::get_polling_page(); 3644 return ForceUnreachable || 3645 !is_simm32(addr - (intptr_t)CodeCache::low_bound()) || 3646 !is_simm32(addr - (intptr_t)CodeCache::high_bound()); 3647 } 3648 3649 void Assembler::emit_data64(jlong data, 3650 relocInfo::relocType rtype, 3651 int format) { 3652 if (rtype == relocInfo::none) { 3653 emit_long64(data); 3654 } else { 3655 emit_data64(data, Relocation::spec_simple(rtype), format); 3656 } 3657 } 3658 3659 void Assembler::emit_data64(jlong data, 3660 RelocationHolder const& rspec, 3661 int format) { 3662 assert(imm_operand == 0, "default format must be immediate in this file"); 3663 assert(imm_operand == format, "must be immediate"); 3664 assert(inst_mark() != NULL, "must be inside InstructionMark"); 3665 // Do not use AbstractAssembler::relocate, which is not intended for 3666 // embedded words. Instead, relocate to the enclosing instruction. 3667 code_section()->relocate(inst_mark(), rspec, format); 3668 #ifdef ASSERT 3669 check_relocation(rspec, format); 3670 #endif 3671 emit_long64(data); 3672 } 3673 3674 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { 3675 if (reg_enc >= 8) { 3676 prefix(REX_B); 3677 reg_enc -= 8; 3678 } else if (byteinst && reg_enc >= 4) { 3679 prefix(REX); 3680 } 3681 return reg_enc; 3682 } 3683 3684 int Assembler::prefixq_and_encode(int reg_enc) { 3685 if (reg_enc < 8) { 3686 prefix(REX_W); 3687 } else { 3688 prefix(REX_WB); 3689 reg_enc -= 8; 3690 } 3691 return reg_enc; 3692 } 3693 3694 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { 3695 if (dst_enc < 8) { 3696 if (src_enc >= 8) { 3697 prefix(REX_B); 3698 src_enc -= 8; 3699 } else if (byteinst && src_enc >= 4) { 3700 prefix(REX); 3701 } 3702 } else { 3703 if (src_enc < 8) { 3704 prefix(REX_R); 3705 } else { 3706 prefix(REX_RB); 3707 src_enc -= 8; 3708 } 3709 dst_enc -= 8; 3710 } 3711 return dst_enc << 3 | src_enc; 3712 } 3713 3714 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { 3715 if (dst_enc < 8) { 3716 if (src_enc < 8) { 3717 prefix(REX_W); 3718 } else { 3719 prefix(REX_WB); 3720 src_enc -= 8; 3721 } 3722 } else { 3723 if (src_enc < 8) { 3724 prefix(REX_WR); 3725 } else { 3726 prefix(REX_WRB); 3727 src_enc -= 8; 3728 } 3729 dst_enc -= 8; 3730 } 3731 return dst_enc << 3 | src_enc; 3732 } 3733 3734 void Assembler::prefix(Register reg) { 3735 if (reg->encoding() >= 8) { 3736 prefix(REX_B); 3737 } 3738 } 3739 3740 void Assembler::prefix(Address adr) { 3741 if (adr.base_needs_rex()) { 3742 if (adr.index_needs_rex()) { 3743 prefix(REX_XB); 3744 } else { 3745 prefix(REX_B); 3746 } 3747 } else { 3748 if (adr.index_needs_rex()) { 3749 prefix(REX_X); 3750 } 3751 } 3752 } 3753 3754 void Assembler::prefixq(Address adr) { 3755 if (adr.base_needs_rex()) { 3756 if (adr.index_needs_rex()) { 3757 prefix(REX_WXB); 3758 } else { 3759 prefix(REX_WB); 3760 } 3761 } else { 3762 if (adr.index_needs_rex()) { 3763 prefix(REX_WX); 3764 } else { 3765 prefix(REX_W); 3766 } 3767 } 3768 } 3769 3770 3771 void Assembler::prefix(Address adr, Register reg, bool byteinst) { 3772 if (reg->encoding() < 8) { 3773 if (adr.base_needs_rex()) { 3774 if (adr.index_needs_rex()) { 3775 prefix(REX_XB); 3776 } else { 3777 prefix(REX_B); 3778 } 3779 } else { 3780 if (adr.index_needs_rex()) { 3781 prefix(REX_X); 3782 } else if (byteinst && reg->encoding() >= 4 ) { 3783 prefix(REX); 3784 } 3785 } 3786 } else { 3787 if (adr.base_needs_rex()) { 3788 if (adr.index_needs_rex()) { 3789 prefix(REX_RXB); 3790 } else { 3791 prefix(REX_RB); 3792 } 3793 } else { 3794 if (adr.index_needs_rex()) { 3795 prefix(REX_RX); 3796 } else { 3797 prefix(REX_R); 3798 } 3799 } 3800 } 3801 } 3802 3803 void Assembler::prefixq(Address adr, Register src) { 3804 if (src->encoding() < 8) { 3805 if (adr.base_needs_rex()) { 3806 if (adr.index_needs_rex()) { 3807 prefix(REX_WXB); 3808 } else { 3809 prefix(REX_WB); 3810 } 3811 } else { 3812 if (adr.index_needs_rex()) { 3813 prefix(REX_WX); 3814 } else { 3815 prefix(REX_W); 3816 } 3817 } 3818 } else { 3819 if (adr.base_needs_rex()) { 3820 if (adr.index_needs_rex()) { 3821 prefix(REX_WRXB); 3822 } else { 3823 prefix(REX_WRB); 3824 } 3825 } else { 3826 if (adr.index_needs_rex()) { 3827 prefix(REX_WRX); 3828 } else { 3829 prefix(REX_WR); 3830 } 3831 } 3832 } 3833 } 3834 3835 void Assembler::prefix(Address adr, XMMRegister reg) { 3836 if (reg->encoding() < 8) { 3837 if (adr.base_needs_rex()) { 3838 if (adr.index_needs_rex()) { 3839 prefix(REX_XB); 3840 } else { 3841 prefix(REX_B); 3842 } 3843 } else { 3844 if (adr.index_needs_rex()) { 3845 prefix(REX_X); 3846 } 3847 } 3848 } else { 3849 if (adr.base_needs_rex()) { 3850 if (adr.index_needs_rex()) { 3851 prefix(REX_RXB); 3852 } else { 3853 prefix(REX_RB); 3854 } 3855 } else { 3856 if (adr.index_needs_rex()) { 3857 prefix(REX_RX); 3858 } else { 3859 prefix(REX_R); 3860 } 3861 } 3862 } 3863 } 3864 3865 void Assembler::prefixq(Address adr, XMMRegister src) { 3866 if (src->encoding() < 8) { 3867 if (adr.base_needs_rex()) { 3868 if (adr.index_needs_rex()) { 3869 prefix(REX_WXB); 3870 } else { 3871 prefix(REX_WB); 3872 } 3873 } else { 3874 if (adr.index_needs_rex()) { 3875 prefix(REX_WX); 3876 } else { 3877 prefix(REX_W); 3878 } 3879 } 3880 } else { 3881 if (adr.base_needs_rex()) { 3882 if (adr.index_needs_rex()) { 3883 prefix(REX_WRXB); 3884 } else { 3885 prefix(REX_WRB); 3886 } 3887 } else { 3888 if (adr.index_needs_rex()) { 3889 prefix(REX_WRX); 3890 } else { 3891 prefix(REX_WR); 3892 } 3893 } 3894 } 3895 } 3896 3897 void Assembler::adcq(Register dst, int32_t imm32) { 3898 (void) prefixq_and_encode(dst->encoding()); 3899 emit_arith(0x81, 0xD0, dst, imm32); 3900 } 3901 3902 void Assembler::adcq(Register dst, Address src) { 3903 InstructionMark im(this); 3904 prefixq(src, dst); 3905 emit_byte(0x13); 3906 emit_operand(dst, src); 3907 } 3908 3909 void Assembler::adcq(Register dst, Register src) { 3910 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 3911 emit_arith(0x13, 0xC0, dst, src); 3912 } 3913 3914 void Assembler::addq(Address dst, int32_t imm32) { 3915 InstructionMark im(this); 3916 prefixq(dst); 3917 emit_arith_operand(0x81, rax, dst,imm32); 3918 } 3919 3920 void Assembler::addq(Address dst, Register src) { 3921 InstructionMark im(this); 3922 prefixq(dst, src); 3923 emit_byte(0x01); 3924 emit_operand(src, dst); 3925 } 3926 3927 void Assembler::addq(Register dst, int32_t imm32) { 3928 (void) prefixq_and_encode(dst->encoding()); 3929 emit_arith(0x81, 0xC0, dst, imm32); 3930 } 3931 3932 void Assembler::addq(Register dst, Address src) { 3933 InstructionMark im(this); 3934 prefixq(src, dst); 3935 emit_byte(0x03); 3936 emit_operand(dst, src); 3937 } 3938 3939 void Assembler::addq(Register dst, Register src) { 3940 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 3941 emit_arith(0x03, 0xC0, dst, src); 3942 } 3943 3944 void Assembler::andq(Address dst, int32_t imm32) { 3945 InstructionMark im(this); 3946 prefixq(dst); 3947 emit_byte(0x81); 3948 emit_operand(rsp, dst, 4); 3949 emit_long(imm32); 3950 } 3951 3952 void Assembler::andq(Register dst, int32_t imm32) { 3953 (void) prefixq_and_encode(dst->encoding()); 3954 emit_arith(0x81, 0xE0, dst, imm32); 3955 } 3956 3957 void Assembler::andq(Register dst, Address src) { 3958 InstructionMark im(this); 3959 prefixq(src, dst); 3960 emit_byte(0x23); 3961 emit_operand(dst, src); 3962 } 3963 3964 void Assembler::andq(Register dst, Register src) { 3965 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 3966 emit_arith(0x23, 0xC0, dst, src); 3967 } 3968 3969 void Assembler::bsfq(Register dst, Register src) { 3970 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3971 emit_byte(0x0F); 3972 emit_byte(0xBC); 3973 emit_byte(0xC0 | encode); 3974 } 3975 3976 void Assembler::bsrq(Register dst, Register src) { 3977 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 3978 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3979 emit_byte(0x0F); 3980 emit_byte(0xBD); 3981 emit_byte(0xC0 | encode); 3982 } 3983 3984 void Assembler::bswapq(Register reg) { 3985 int encode = prefixq_and_encode(reg->encoding()); 3986 emit_byte(0x0F); 3987 emit_byte(0xC8 | encode); 3988 } 3989 3990 void Assembler::cdqq() { 3991 prefix(REX_W); 3992 emit_byte(0x99); 3993 } 3994 3995 void Assembler::clflush(Address adr) { 3996 prefix(adr); 3997 emit_byte(0x0F); 3998 emit_byte(0xAE); 3999 emit_operand(rdi, adr); 4000 } 4001 4002 void Assembler::cmovq(Condition cc, Register dst, Register src) { 4003 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4004 emit_byte(0x0F); 4005 emit_byte(0x40 | cc); 4006 emit_byte(0xC0 | encode); 4007 } 4008 4009 void Assembler::cmovq(Condition cc, Register dst, Address src) { 4010 InstructionMark im(this); 4011 prefixq(src, dst); 4012 emit_byte(0x0F); 4013 emit_byte(0x40 | cc); 4014 emit_operand(dst, src); 4015 } 4016 4017 void Assembler::cmpq(Address dst, int32_t imm32) { 4018 InstructionMark im(this); 4019 prefixq(dst); 4020 emit_byte(0x81); 4021 emit_operand(rdi, dst, 4); 4022 emit_long(imm32); 4023 } 4024 4025 void Assembler::cmpq(Register dst, int32_t imm32) { 4026 (void) prefixq_and_encode(dst->encoding()); 4027 emit_arith(0x81, 0xF8, dst, imm32); 4028 } 4029 4030 void Assembler::cmpq(Address dst, Register src) { 4031 InstructionMark im(this); 4032 prefixq(dst, src); 4033 emit_byte(0x3B); 4034 emit_operand(src, dst); 4035 } 4036 4037 void Assembler::cmpq(Register dst, Register src) { 4038 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4039 emit_arith(0x3B, 0xC0, dst, src); 4040 } 4041 4042 void Assembler::cmpq(Register dst, Address src) { 4043 InstructionMark im(this); 4044 prefixq(src, dst); 4045 emit_byte(0x3B); 4046 emit_operand(dst, src); 4047 } 4048 4049 void Assembler::cmpxchgq(Register reg, Address adr) { 4050 InstructionMark im(this); 4051 prefixq(adr, reg); 4052 emit_byte(0x0F); 4053 emit_byte(0xB1); 4054 emit_operand(reg, adr); 4055 } 4056 4057 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { 4058 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4059 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2); 4060 emit_byte(0x2A); 4061 emit_byte(0xC0 | encode); 4062 } 4063 4064 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { 4065 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4066 InstructionMark im(this); 4067 simd_prefix_q(dst, dst, src, VEX_SIMD_F2); 4068 emit_byte(0x2A); 4069 emit_operand(dst, src); 4070 } 4071 4072 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { 4073 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4074 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3); 4075 emit_byte(0x2A); 4076 emit_byte(0xC0 | encode); 4077 } 4078 4079 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { 4080 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4081 InstructionMark im(this); 4082 simd_prefix_q(dst, dst, src, VEX_SIMD_F3); 4083 emit_byte(0x2A); 4084 emit_operand(dst, src); 4085 } 4086 4087 void Assembler::cvttsd2siq(Register dst, XMMRegister src) { 4088 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4089 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2); 4090 emit_byte(0x2C); 4091 emit_byte(0xC0 | encode); 4092 } 4093 4094 void Assembler::cvttss2siq(Register dst, XMMRegister src) { 4095 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4096 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3); 4097 emit_byte(0x2C); 4098 emit_byte(0xC0 | encode); 4099 } 4100 4101 void Assembler::decl(Register dst) { 4102 // Don't use it directly. Use MacroAssembler::decrementl() instead. 4103 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 4104 int encode = prefix_and_encode(dst->encoding()); 4105 emit_byte(0xFF); 4106 emit_byte(0xC8 | encode); 4107 } 4108 4109 void Assembler::decq(Register dst) { 4110 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4111 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4112 int encode = prefixq_and_encode(dst->encoding()); 4113 emit_byte(0xFF); 4114 emit_byte(0xC8 | encode); 4115 } 4116 4117 void Assembler::decq(Address dst) { 4118 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4119 InstructionMark im(this); 4120 prefixq(dst); 4121 emit_byte(0xFF); 4122 emit_operand(rcx, dst); 4123 } 4124 4125 void Assembler::fxrstor(Address src) { 4126 prefixq(src); 4127 emit_byte(0x0F); 4128 emit_byte(0xAE); 4129 emit_operand(as_Register(1), src); 4130 } 4131 4132 void Assembler::fxsave(Address dst) { 4133 prefixq(dst); 4134 emit_byte(0x0F); 4135 emit_byte(0xAE); 4136 emit_operand(as_Register(0), dst); 4137 } 4138 4139 void Assembler::idivq(Register src) { 4140 int encode = prefixq_and_encode(src->encoding()); 4141 emit_byte(0xF7); 4142 emit_byte(0xF8 | encode); 4143 } 4144 4145 void Assembler::imulq(Register dst, Register src) { 4146 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4147 emit_byte(0x0F); 4148 emit_byte(0xAF); 4149 emit_byte(0xC0 | encode); 4150 } 4151 4152 void Assembler::imulq(Register dst, Register src, int value) { 4153 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4154 if (is8bit(value)) { 4155 emit_byte(0x6B); 4156 emit_byte(0xC0 | encode); 4157 emit_byte(value & 0xFF); 4158 } else { 4159 emit_byte(0x69); 4160 emit_byte(0xC0 | encode); 4161 emit_long(value); 4162 } 4163 } 4164 4165 void Assembler::incl(Register dst) { 4166 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4167 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4168 int encode = prefix_and_encode(dst->encoding()); 4169 emit_byte(0xFF); 4170 emit_byte(0xC0 | encode); 4171 } 4172 4173 void Assembler::incq(Register dst) { 4174 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4175 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4176 int encode = prefixq_and_encode(dst->encoding()); 4177 emit_byte(0xFF); 4178 emit_byte(0xC0 | encode); 4179 } 4180 4181 void Assembler::incq(Address dst) { 4182 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4183 InstructionMark im(this); 4184 prefixq(dst); 4185 emit_byte(0xFF); 4186 emit_operand(rax, dst); 4187 } 4188 4189 void Assembler::lea(Register dst, Address src) { 4190 leaq(dst, src); 4191 } 4192 4193 void Assembler::leaq(Register dst, Address src) { 4194 InstructionMark im(this); 4195 prefixq(src, dst); 4196 emit_byte(0x8D); 4197 emit_operand(dst, src); 4198 } 4199 4200 void Assembler::mov64(Register dst, int64_t imm64) { 4201 InstructionMark im(this); 4202 int encode = prefixq_and_encode(dst->encoding()); 4203 emit_byte(0xB8 | encode); 4204 emit_long64(imm64); 4205 } 4206 4207 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { 4208 InstructionMark im(this); 4209 int encode = prefixq_and_encode(dst->encoding()); 4210 emit_byte(0xB8 | encode); 4211 emit_data64(imm64, rspec); 4212 } 4213 4214 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4215 InstructionMark im(this); 4216 int encode = prefix_and_encode(dst->encoding()); 4217 emit_byte(0xB8 | encode); 4218 emit_data((int)imm32, rspec, narrow_oop_operand); 4219 } 4220 4221 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4222 InstructionMark im(this); 4223 prefix(dst); 4224 emit_byte(0xC7); 4225 emit_operand(rax, dst, 4); 4226 emit_data((int)imm32, rspec, narrow_oop_operand); 4227 } 4228 4229 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) { 4230 InstructionMark im(this); 4231 int encode = prefix_and_encode(src1->encoding()); 4232 emit_byte(0x81); 4233 emit_byte(0xF8 | encode); 4234 emit_data((int)imm32, rspec, narrow_oop_operand); 4235 } 4236 4237 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) { 4238 InstructionMark im(this); 4239 prefix(src1); 4240 emit_byte(0x81); 4241 emit_operand(rax, src1, 4); 4242 emit_data((int)imm32, rspec, narrow_oop_operand); 4243 } 4244 4245 void Assembler::lzcntq(Register dst, Register src) { 4246 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 4247 emit_byte(0xF3); 4248 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4249 emit_byte(0x0F); 4250 emit_byte(0xBD); 4251 emit_byte(0xC0 | encode); 4252 } 4253 4254 void Assembler::movdq(XMMRegister dst, Register src) { 4255 // table D-1 says MMX/SSE2 4256 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4257 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66); 4258 emit_byte(0x6E); 4259 emit_byte(0xC0 | encode); 4260 } 4261 4262 void Assembler::movdq(Register dst, XMMRegister src) { 4263 // table D-1 says MMX/SSE2 4264 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4265 // swap src/dst to get correct prefix 4266 int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66); 4267 emit_byte(0x7E); 4268 emit_byte(0xC0 | encode); 4269 } 4270 4271 void Assembler::movq(Register dst, Register src) { 4272 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4273 emit_byte(0x8B); 4274 emit_byte(0xC0 | encode); 4275 } 4276 4277 void Assembler::movq(Register dst, Address src) { 4278 InstructionMark im(this); 4279 prefixq(src, dst); 4280 emit_byte(0x8B); 4281 emit_operand(dst, src); 4282 } 4283 4284 void Assembler::movq(Address dst, Register src) { 4285 InstructionMark im(this); 4286 prefixq(dst, src); 4287 emit_byte(0x89); 4288 emit_operand(src, dst); 4289 } 4290 4291 void Assembler::movsbq(Register dst, Address src) { 4292 InstructionMark im(this); 4293 prefixq(src, dst); 4294 emit_byte(0x0F); 4295 emit_byte(0xBE); 4296 emit_operand(dst, src); 4297 } 4298 4299 void Assembler::movsbq(Register dst, Register src) { 4300 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4301 emit_byte(0x0F); 4302 emit_byte(0xBE); 4303 emit_byte(0xC0 | encode); 4304 } 4305 4306 void Assembler::movslq(Register dst, int32_t imm32) { 4307 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx) 4308 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx) 4309 // as a result we shouldn't use until tested at runtime... 4310 ShouldNotReachHere(); 4311 InstructionMark im(this); 4312 int encode = prefixq_and_encode(dst->encoding()); 4313 emit_byte(0xC7 | encode); 4314 emit_long(imm32); 4315 } 4316 4317 void Assembler::movslq(Address dst, int32_t imm32) { 4318 assert(is_simm32(imm32), "lost bits"); 4319 InstructionMark im(this); 4320 prefixq(dst); 4321 emit_byte(0xC7); 4322 emit_operand(rax, dst, 4); 4323 emit_long(imm32); 4324 } 4325 4326 void Assembler::movslq(Register dst, Address src) { 4327 InstructionMark im(this); 4328 prefixq(src, dst); 4329 emit_byte(0x63); 4330 emit_operand(dst, src); 4331 } 4332 4333 void Assembler::movslq(Register dst, Register src) { 4334 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4335 emit_byte(0x63); 4336 emit_byte(0xC0 | encode); 4337 } 4338 4339 void Assembler::movswq(Register dst, Address src) { 4340 InstructionMark im(this); 4341 prefixq(src, dst); 4342 emit_byte(0x0F); 4343 emit_byte(0xBF); 4344 emit_operand(dst, src); 4345 } 4346 4347 void Assembler::movswq(Register dst, Register src) { 4348 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4349 emit_byte(0x0F); 4350 emit_byte(0xBF); 4351 emit_byte(0xC0 | encode); 4352 } 4353 4354 void Assembler::movzbq(Register dst, Address src) { 4355 InstructionMark im(this); 4356 prefixq(src, dst); 4357 emit_byte(0x0F); 4358 emit_byte(0xB6); 4359 emit_operand(dst, src); 4360 } 4361 4362 void Assembler::movzbq(Register dst, Register src) { 4363 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4364 emit_byte(0x0F); 4365 emit_byte(0xB6); 4366 emit_byte(0xC0 | encode); 4367 } 4368 4369 void Assembler::movzwq(Register dst, Address src) { 4370 InstructionMark im(this); 4371 prefixq(src, dst); 4372 emit_byte(0x0F); 4373 emit_byte(0xB7); 4374 emit_operand(dst, src); 4375 } 4376 4377 void Assembler::movzwq(Register dst, Register src) { 4378 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4379 emit_byte(0x0F); 4380 emit_byte(0xB7); 4381 emit_byte(0xC0 | encode); 4382 } 4383 4384 void Assembler::negq(Register dst) { 4385 int encode = prefixq_and_encode(dst->encoding()); 4386 emit_byte(0xF7); 4387 emit_byte(0xD8 | encode); 4388 } 4389 4390 void Assembler::notq(Register dst) { 4391 int encode = prefixq_and_encode(dst->encoding()); 4392 emit_byte(0xF7); 4393 emit_byte(0xD0 | encode); 4394 } 4395 4396 void Assembler::orq(Address dst, int32_t imm32) { 4397 InstructionMark im(this); 4398 prefixq(dst); 4399 emit_byte(0x81); 4400 emit_operand(rcx, dst, 4); 4401 emit_long(imm32); 4402 } 4403 4404 void Assembler::orq(Register dst, int32_t imm32) { 4405 (void) prefixq_and_encode(dst->encoding()); 4406 emit_arith(0x81, 0xC8, dst, imm32); 4407 } 4408 4409 void Assembler::orq(Register dst, Address src) { 4410 InstructionMark im(this); 4411 prefixq(src, dst); 4412 emit_byte(0x0B); 4413 emit_operand(dst, src); 4414 } 4415 4416 void Assembler::orq(Register dst, Register src) { 4417 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4418 emit_arith(0x0B, 0xC0, dst, src); 4419 } 4420 4421 void Assembler::popa() { // 64bit 4422 movq(r15, Address(rsp, 0)); 4423 movq(r14, Address(rsp, wordSize)); 4424 movq(r13, Address(rsp, 2 * wordSize)); 4425 movq(r12, Address(rsp, 3 * wordSize)); 4426 movq(r11, Address(rsp, 4 * wordSize)); 4427 movq(r10, Address(rsp, 5 * wordSize)); 4428 movq(r9, Address(rsp, 6 * wordSize)); 4429 movq(r8, Address(rsp, 7 * wordSize)); 4430 movq(rdi, Address(rsp, 8 * wordSize)); 4431 movq(rsi, Address(rsp, 9 * wordSize)); 4432 movq(rbp, Address(rsp, 10 * wordSize)); 4433 // skip rsp 4434 movq(rbx, Address(rsp, 12 * wordSize)); 4435 movq(rdx, Address(rsp, 13 * wordSize)); 4436 movq(rcx, Address(rsp, 14 * wordSize)); 4437 movq(rax, Address(rsp, 15 * wordSize)); 4438 4439 addq(rsp, 16 * wordSize); 4440 } 4441 4442 void Assembler::popcntq(Register dst, Address src) { 4443 assert(VM_Version::supports_popcnt(), "must support"); 4444 InstructionMark im(this); 4445 emit_byte(0xF3); 4446 prefixq(src, dst); 4447 emit_byte(0x0F); 4448 emit_byte(0xB8); 4449 emit_operand(dst, src); 4450 } 4451 4452 void Assembler::popcntq(Register dst, Register src) { 4453 assert(VM_Version::supports_popcnt(), "must support"); 4454 emit_byte(0xF3); 4455 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4456 emit_byte(0x0F); 4457 emit_byte(0xB8); 4458 emit_byte(0xC0 | encode); 4459 } 4460 4461 void Assembler::popq(Address dst) { 4462 InstructionMark im(this); 4463 prefixq(dst); 4464 emit_byte(0x8F); 4465 emit_operand(rax, dst); 4466 } 4467 4468 void Assembler::pusha() { // 64bit 4469 // we have to store original rsp. ABI says that 128 bytes 4470 // below rsp are local scratch. 4471 movq(Address(rsp, -5 * wordSize), rsp); 4472 4473 subq(rsp, 16 * wordSize); 4474 4475 movq(Address(rsp, 15 * wordSize), rax); 4476 movq(Address(rsp, 14 * wordSize), rcx); 4477 movq(Address(rsp, 13 * wordSize), rdx); 4478 movq(Address(rsp, 12 * wordSize), rbx); 4479 // skip rsp 4480 movq(Address(rsp, 10 * wordSize), rbp); 4481 movq(Address(rsp, 9 * wordSize), rsi); 4482 movq(Address(rsp, 8 * wordSize), rdi); 4483 movq(Address(rsp, 7 * wordSize), r8); 4484 movq(Address(rsp, 6 * wordSize), r9); 4485 movq(Address(rsp, 5 * wordSize), r10); 4486 movq(Address(rsp, 4 * wordSize), r11); 4487 movq(Address(rsp, 3 * wordSize), r12); 4488 movq(Address(rsp, 2 * wordSize), r13); 4489 movq(Address(rsp, wordSize), r14); 4490 movq(Address(rsp, 0), r15); 4491 } 4492 4493 void Assembler::pushq(Address src) { 4494 InstructionMark im(this); 4495 prefixq(src); 4496 emit_byte(0xFF); 4497 emit_operand(rsi, src); 4498 } 4499 4500 void Assembler::rclq(Register dst, int imm8) { 4501 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4502 int encode = prefixq_and_encode(dst->encoding()); 4503 if (imm8 == 1) { 4504 emit_byte(0xD1); 4505 emit_byte(0xD0 | encode); 4506 } else { 4507 emit_byte(0xC1); 4508 emit_byte(0xD0 | encode); 4509 emit_byte(imm8); 4510 } 4511 } 4512 void Assembler::sarq(Register dst, int imm8) { 4513 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4514 int encode = prefixq_and_encode(dst->encoding()); 4515 if (imm8 == 1) { 4516 emit_byte(0xD1); 4517 emit_byte(0xF8 | encode); 4518 } else { 4519 emit_byte(0xC1); 4520 emit_byte(0xF8 | encode); 4521 emit_byte(imm8); 4522 } 4523 } 4524 4525 void Assembler::sarq(Register dst) { 4526 int encode = prefixq_and_encode(dst->encoding()); 4527 emit_byte(0xD3); 4528 emit_byte(0xF8 | encode); 4529 } 4530 4531 void Assembler::sbbq(Address dst, int32_t imm32) { 4532 InstructionMark im(this); 4533 prefixq(dst); 4534 emit_arith_operand(0x81, rbx, dst, imm32); 4535 } 4536 4537 void Assembler::sbbq(Register dst, int32_t imm32) { 4538 (void) prefixq_and_encode(dst->encoding()); 4539 emit_arith(0x81, 0xD8, dst, imm32); 4540 } 4541 4542 void Assembler::sbbq(Register dst, Address src) { 4543 InstructionMark im(this); 4544 prefixq(src, dst); 4545 emit_byte(0x1B); 4546 emit_operand(dst, src); 4547 } 4548 4549 void Assembler::sbbq(Register dst, Register src) { 4550 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4551 emit_arith(0x1B, 0xC0, dst, src); 4552 } 4553 4554 void Assembler::shlq(Register dst, int imm8) { 4555 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4556 int encode = prefixq_and_encode(dst->encoding()); 4557 if (imm8 == 1) { 4558 emit_byte(0xD1); 4559 emit_byte(0xE0 | encode); 4560 } else { 4561 emit_byte(0xC1); 4562 emit_byte(0xE0 | encode); 4563 emit_byte(imm8); 4564 } 4565 } 4566 4567 void Assembler::shlq(Register dst) { 4568 int encode = prefixq_and_encode(dst->encoding()); 4569 emit_byte(0xD3); 4570 emit_byte(0xE0 | encode); 4571 } 4572 4573 void Assembler::shrq(Register dst, int imm8) { 4574 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4575 int encode = prefixq_and_encode(dst->encoding()); 4576 emit_byte(0xC1); 4577 emit_byte(0xE8 | encode); 4578 emit_byte(imm8); 4579 } 4580 4581 void Assembler::shrq(Register dst) { 4582 int encode = prefixq_and_encode(dst->encoding()); 4583 emit_byte(0xD3); 4584 emit_byte(0xE8 | encode); 4585 } 4586 4587 void Assembler::subq(Address dst, int32_t imm32) { 4588 InstructionMark im(this); 4589 prefixq(dst); 4590 emit_arith_operand(0x81, rbp, dst, imm32); 4591 } 4592 4593 void Assembler::subq(Address dst, Register src) { 4594 InstructionMark im(this); 4595 prefixq(dst, src); 4596 emit_byte(0x29); 4597 emit_operand(src, dst); 4598 } 4599 4600 void Assembler::subq(Register dst, int32_t imm32) { 4601 (void) prefixq_and_encode(dst->encoding()); 4602 emit_arith(0x81, 0xE8, dst, imm32); 4603 } 4604 4605 void Assembler::subq(Register dst, Address src) { 4606 InstructionMark im(this); 4607 prefixq(src, dst); 4608 emit_byte(0x2B); 4609 emit_operand(dst, src); 4610 } 4611 4612 void Assembler::subq(Register dst, Register src) { 4613 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4614 emit_arith(0x2B, 0xC0, dst, src); 4615 } 4616 4617 void Assembler::testq(Register dst, int32_t imm32) { 4618 // not using emit_arith because test 4619 // doesn't support sign-extension of 4620 // 8bit operands 4621 int encode = dst->encoding(); 4622 if (encode == 0) { 4623 prefix(REX_W); 4624 emit_byte(0xA9); 4625 } else { 4626 encode = prefixq_and_encode(encode); 4627 emit_byte(0xF7); 4628 emit_byte(0xC0 | encode); 4629 } 4630 emit_long(imm32); 4631 } 4632 4633 void Assembler::testq(Register dst, Register src) { 4634 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4635 emit_arith(0x85, 0xC0, dst, src); 4636 } 4637 4638 void Assembler::xaddq(Address dst, Register src) { 4639 InstructionMark im(this); 4640 prefixq(dst, src); 4641 emit_byte(0x0F); 4642 emit_byte(0xC1); 4643 emit_operand(src, dst); 4644 } 4645 4646 void Assembler::xchgq(Register dst, Address src) { 4647 InstructionMark im(this); 4648 prefixq(src, dst); 4649 emit_byte(0x87); 4650 emit_operand(dst, src); 4651 } 4652 4653 void Assembler::xchgq(Register dst, Register src) { 4654 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4655 emit_byte(0x87); 4656 emit_byte(0xc0 | encode); 4657 } 4658 4659 void Assembler::xorq(Register dst, Register src) { 4660 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4661 emit_arith(0x33, 0xC0, dst, src); 4662 } 4663 4664 void Assembler::xorq(Register dst, Address src) { 4665 InstructionMark im(this); 4666 prefixq(src, dst); 4667 emit_byte(0x33); 4668 emit_operand(dst, src); 4669 } 4670 4671 #endif // !LP64 4672 4673 static Assembler::Condition reverse[] = { 4674 Assembler::noOverflow /* overflow = 0x0 */ , 4675 Assembler::overflow /* noOverflow = 0x1 */ , 4676 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 4677 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 4678 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 4679 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 4680 Assembler::above /* belowEqual = 0x6 */ , 4681 Assembler::belowEqual /* above = 0x7 */ , 4682 Assembler::positive /* negative = 0x8 */ , 4683 Assembler::negative /* positive = 0x9 */ , 4684 Assembler::noParity /* parity = 0xa */ , 4685 Assembler::parity /* noParity = 0xb */ , 4686 Assembler::greaterEqual /* less = 0xc */ , 4687 Assembler::less /* greaterEqual = 0xd */ , 4688 Assembler::greater /* lessEqual = 0xe */ , 4689 Assembler::lessEqual /* greater = 0xf, */ 4690 4691 }; 4692 4693 4694 // Implementation of MacroAssembler 4695 4696 // First all the versions that have distinct versions depending on 32/64 bit 4697 // Unless the difference is trivial (1 line or so). 4698 4699 #ifndef _LP64 4700 4701 // 32bit versions 4702 4703 Address MacroAssembler::as_Address(AddressLiteral adr) { 4704 return Address(adr.target(), adr.rspec()); 4705 } 4706 4707 Address MacroAssembler::as_Address(ArrayAddress adr) { 4708 return Address::make_array(adr); 4709 } 4710 4711 int MacroAssembler::biased_locking_enter(Register lock_reg, 4712 Register obj_reg, 4713 Register swap_reg, 4714 Register tmp_reg, 4715 bool swap_reg_contains_mark, 4716 Label& done, 4717 Label* slow_case, 4718 BiasedLockingCounters* counters) { 4719 assert(UseBiasedLocking, "why call this otherwise?"); 4720 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 4721 assert_different_registers(lock_reg, obj_reg, swap_reg); 4722 4723 if (PrintBiasedLockingStatistics && counters == NULL) 4724 counters = BiasedLocking::counters(); 4725 4726 bool need_tmp_reg = false; 4727 if (tmp_reg == noreg) { 4728 need_tmp_reg = true; 4729 tmp_reg = lock_reg; 4730 } else { 4731 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 4732 } 4733 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 4734 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 4735 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 4736 Address saved_mark_addr(lock_reg, 0); 4737 4738 // Biased locking 4739 // See whether the lock is currently biased toward our thread and 4740 // whether the epoch is still valid 4741 // Note that the runtime guarantees sufficient alignment of JavaThread 4742 // pointers to allow age to be placed into low bits 4743 // First check to see whether biasing is even enabled for this object 4744 Label cas_label; 4745 int null_check_offset = -1; 4746 if (!swap_reg_contains_mark) { 4747 null_check_offset = offset(); 4748 movl(swap_reg, mark_addr); 4749 } 4750 if (need_tmp_reg) { 4751 push(tmp_reg); 4752 } 4753 movl(tmp_reg, swap_reg); 4754 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 4755 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 4756 if (need_tmp_reg) { 4757 pop(tmp_reg); 4758 } 4759 jcc(Assembler::notEqual, cas_label); 4760 // The bias pattern is present in the object's header. Need to check 4761 // whether the bias owner and the epoch are both still current. 4762 // Note that because there is no current thread register on x86 we 4763 // need to store off the mark word we read out of the object to 4764 // avoid reloading it and needing to recheck invariants below. This 4765 // store is unfortunate but it makes the overall code shorter and 4766 // simpler. 4767 movl(saved_mark_addr, swap_reg); 4768 if (need_tmp_reg) { 4769 push(tmp_reg); 4770 } 4771 get_thread(tmp_reg); 4772 xorl(swap_reg, tmp_reg); 4773 if (swap_reg_contains_mark) { 4774 null_check_offset = offset(); 4775 } 4776 movl(tmp_reg, klass_addr); 4777 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4778 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 4779 if (need_tmp_reg) { 4780 pop(tmp_reg); 4781 } 4782 if (counters != NULL) { 4783 cond_inc32(Assembler::zero, 4784 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 4785 } 4786 jcc(Assembler::equal, done); 4787 4788 Label try_revoke_bias; 4789 Label try_rebias; 4790 4791 // At this point we know that the header has the bias pattern and 4792 // that we are not the bias owner in the current epoch. We need to 4793 // figure out more details about the state of the header in order to 4794 // know what operations can be legally performed on the object's 4795 // header. 4796 4797 // If the low three bits in the xor result aren't clear, that means 4798 // the prototype header is no longer biased and we have to revoke 4799 // the bias on this object. 4800 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 4801 jcc(Assembler::notZero, try_revoke_bias); 4802 4803 // Biasing is still enabled for this data type. See whether the 4804 // epoch of the current bias is still valid, meaning that the epoch 4805 // bits of the mark word are equal to the epoch bits of the 4806 // prototype header. (Note that the prototype header's epoch bits 4807 // only change at a safepoint.) If not, attempt to rebias the object 4808 // toward the current thread. Note that we must be absolutely sure 4809 // that the current epoch is invalid in order to do this because 4810 // otherwise the manipulations it performs on the mark word are 4811 // illegal. 4812 testl(swap_reg, markOopDesc::epoch_mask_in_place); 4813 jcc(Assembler::notZero, try_rebias); 4814 4815 // The epoch of the current bias is still valid but we know nothing 4816 // about the owner; it might be set or it might be clear. Try to 4817 // acquire the bias of the object using an atomic operation. If this 4818 // fails we will go in to the runtime to revoke the object's bias. 4819 // Note that we first construct the presumed unbiased header so we 4820 // don't accidentally blow away another thread's valid bias. 4821 movl(swap_reg, saved_mark_addr); 4822 andl(swap_reg, 4823 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 4824 if (need_tmp_reg) { 4825 push(tmp_reg); 4826 } 4827 get_thread(tmp_reg); 4828 orl(tmp_reg, swap_reg); 4829 if (os::is_MP()) { 4830 lock(); 4831 } 4832 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4833 if (need_tmp_reg) { 4834 pop(tmp_reg); 4835 } 4836 // If the biasing toward our thread failed, this means that 4837 // another thread succeeded in biasing it toward itself and we 4838 // need to revoke that bias. The revocation will occur in the 4839 // interpreter runtime in the slow case. 4840 if (counters != NULL) { 4841 cond_inc32(Assembler::zero, 4842 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 4843 } 4844 if (slow_case != NULL) { 4845 jcc(Assembler::notZero, *slow_case); 4846 } 4847 jmp(done); 4848 4849 bind(try_rebias); 4850 // At this point we know the epoch has expired, meaning that the 4851 // current "bias owner", if any, is actually invalid. Under these 4852 // circumstances _only_, we are allowed to use the current header's 4853 // value as the comparison value when doing the cas to acquire the 4854 // bias in the current epoch. In other words, we allow transfer of 4855 // the bias from one thread to another directly in this situation. 4856 // 4857 // FIXME: due to a lack of registers we currently blow away the age 4858 // bits in this situation. Should attempt to preserve them. 4859 if (need_tmp_reg) { 4860 push(tmp_reg); 4861 } 4862 get_thread(tmp_reg); 4863 movl(swap_reg, klass_addr); 4864 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4865 movl(swap_reg, saved_mark_addr); 4866 if (os::is_MP()) { 4867 lock(); 4868 } 4869 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4870 if (need_tmp_reg) { 4871 pop(tmp_reg); 4872 } 4873 // If the biasing toward our thread failed, then another thread 4874 // succeeded in biasing it toward itself and we need to revoke that 4875 // bias. The revocation will occur in the runtime in the slow case. 4876 if (counters != NULL) { 4877 cond_inc32(Assembler::zero, 4878 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 4879 } 4880 if (slow_case != NULL) { 4881 jcc(Assembler::notZero, *slow_case); 4882 } 4883 jmp(done); 4884 4885 bind(try_revoke_bias); 4886 // The prototype mark in the klass doesn't have the bias bit set any 4887 // more, indicating that objects of this data type are not supposed 4888 // to be biased any more. We are going to try to reset the mark of 4889 // this object to the prototype value and fall through to the 4890 // CAS-based locking scheme. Note that if our CAS fails, it means 4891 // that another thread raced us for the privilege of revoking the 4892 // bias of this particular object, so it's okay to continue in the 4893 // normal locking code. 4894 // 4895 // FIXME: due to a lack of registers we currently blow away the age 4896 // bits in this situation. Should attempt to preserve them. 4897 movl(swap_reg, saved_mark_addr); 4898 if (need_tmp_reg) { 4899 push(tmp_reg); 4900 } 4901 movl(tmp_reg, klass_addr); 4902 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4903 if (os::is_MP()) { 4904 lock(); 4905 } 4906 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4907 if (need_tmp_reg) { 4908 pop(tmp_reg); 4909 } 4910 // Fall through to the normal CAS-based lock, because no matter what 4911 // the result of the above CAS, some thread must have succeeded in 4912 // removing the bias bit from the object's header. 4913 if (counters != NULL) { 4914 cond_inc32(Assembler::zero, 4915 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 4916 } 4917 4918 bind(cas_label); 4919 4920 return null_check_offset; 4921 } 4922 void MacroAssembler::call_VM_leaf_base(address entry_point, 4923 int number_of_arguments) { 4924 call(RuntimeAddress(entry_point)); 4925 increment(rsp, number_of_arguments * wordSize); 4926 } 4927 4928 void MacroAssembler::cmpoop(Address src1, jobject obj) { 4929 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4930 } 4931 4932 void MacroAssembler::cmpoop(Register src1, jobject obj) { 4933 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4934 } 4935 4936 void MacroAssembler::extend_sign(Register hi, Register lo) { 4937 // According to Intel Doc. AP-526, "Integer Divide", p.18. 4938 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 4939 cdql(); 4940 } else { 4941 movl(hi, lo); 4942 sarl(hi, 31); 4943 } 4944 } 4945 4946 void MacroAssembler::fat_nop() { 4947 // A 5 byte nop that is safe for patching (see patch_verified_entry) 4948 emit_byte(0x26); // es: 4949 emit_byte(0x2e); // cs: 4950 emit_byte(0x64); // fs: 4951 emit_byte(0x65); // gs: 4952 emit_byte(0x90); 4953 } 4954 4955 void MacroAssembler::jC2(Register tmp, Label& L) { 4956 // set parity bit if FPU flag C2 is set (via rax) 4957 save_rax(tmp); 4958 fwait(); fnstsw_ax(); 4959 sahf(); 4960 restore_rax(tmp); 4961 // branch 4962 jcc(Assembler::parity, L); 4963 } 4964 4965 void MacroAssembler::jnC2(Register tmp, Label& L) { 4966 // set parity bit if FPU flag C2 is set (via rax) 4967 save_rax(tmp); 4968 fwait(); fnstsw_ax(); 4969 sahf(); 4970 restore_rax(tmp); 4971 // branch 4972 jcc(Assembler::noParity, L); 4973 } 4974 4975 // 32bit can do a case table jump in one instruction but we no longer allow the base 4976 // to be installed in the Address class 4977 void MacroAssembler::jump(ArrayAddress entry) { 4978 jmp(as_Address(entry)); 4979 } 4980 4981 // Note: y_lo will be destroyed 4982 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 4983 // Long compare for Java (semantics as described in JVM spec.) 4984 Label high, low, done; 4985 4986 cmpl(x_hi, y_hi); 4987 jcc(Assembler::less, low); 4988 jcc(Assembler::greater, high); 4989 // x_hi is the return register 4990 xorl(x_hi, x_hi); 4991 cmpl(x_lo, y_lo); 4992 jcc(Assembler::below, low); 4993 jcc(Assembler::equal, done); 4994 4995 bind(high); 4996 xorl(x_hi, x_hi); 4997 increment(x_hi); 4998 jmp(done); 4999 5000 bind(low); 5001 xorl(x_hi, x_hi); 5002 decrementl(x_hi); 5003 5004 bind(done); 5005 } 5006 5007 void MacroAssembler::lea(Register dst, AddressLiteral src) { 5008 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 5009 } 5010 5011 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5012 // leal(dst, as_Address(adr)); 5013 // see note in movl as to why we must use a move 5014 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 5015 } 5016 5017 void MacroAssembler::leave() { 5018 mov(rsp, rbp); 5019 pop(rbp); 5020 } 5021 5022 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 5023 // Multiplication of two Java long values stored on the stack 5024 // as illustrated below. Result is in rdx:rax. 5025 // 5026 // rsp ---> [ ?? ] \ \ 5027 // .... | y_rsp_offset | 5028 // [ y_lo ] / (in bytes) | x_rsp_offset 5029 // [ y_hi ] | (in bytes) 5030 // .... | 5031 // [ x_lo ] / 5032 // [ x_hi ] 5033 // .... 5034 // 5035 // Basic idea: lo(result) = lo(x_lo * y_lo) 5036 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 5037 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 5038 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 5039 Label quick; 5040 // load x_hi, y_hi and check if quick 5041 // multiplication is possible 5042 movl(rbx, x_hi); 5043 movl(rcx, y_hi); 5044 movl(rax, rbx); 5045 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 5046 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 5047 // do full multiplication 5048 // 1st step 5049 mull(y_lo); // x_hi * y_lo 5050 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 5051 // 2nd step 5052 movl(rax, x_lo); 5053 mull(rcx); // x_lo * y_hi 5054 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 5055 // 3rd step 5056 bind(quick); // note: rbx, = 0 if quick multiply! 5057 movl(rax, x_lo); 5058 mull(y_lo); // x_lo * y_lo 5059 addl(rdx, rbx); // correct hi(x_lo * y_lo) 5060 } 5061 5062 void MacroAssembler::lneg(Register hi, Register lo) { 5063 negl(lo); 5064 adcl(hi, 0); 5065 negl(hi); 5066 } 5067 5068 void MacroAssembler::lshl(Register hi, Register lo) { 5069 // Java shift left long support (semantics as described in JVM spec., p.305) 5070 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 5071 // shift value is in rcx ! 5072 assert(hi != rcx, "must not use rcx"); 5073 assert(lo != rcx, "must not use rcx"); 5074 const Register s = rcx; // shift count 5075 const int n = BitsPerWord; 5076 Label L; 5077 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5078 cmpl(s, n); // if (s < n) 5079 jcc(Assembler::less, L); // else (s >= n) 5080 movl(hi, lo); // x := x << n 5081 xorl(lo, lo); 5082 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5083 bind(L); // s (mod n) < n 5084 shldl(hi, lo); // x := x << s 5085 shll(lo); 5086 } 5087 5088 5089 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 5090 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 5091 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 5092 assert(hi != rcx, "must not use rcx"); 5093 assert(lo != rcx, "must not use rcx"); 5094 const Register s = rcx; // shift count 5095 const int n = BitsPerWord; 5096 Label L; 5097 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5098 cmpl(s, n); // if (s < n) 5099 jcc(Assembler::less, L); // else (s >= n) 5100 movl(lo, hi); // x := x >> n 5101 if (sign_extension) sarl(hi, 31); 5102 else xorl(hi, hi); 5103 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5104 bind(L); // s (mod n) < n 5105 shrdl(lo, hi); // x := x >> s 5106 if (sign_extension) sarl(hi); 5107 else shrl(hi); 5108 } 5109 5110 void MacroAssembler::movoop(Register dst, jobject obj) { 5111 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5112 } 5113 5114 void MacroAssembler::movoop(Address dst, jobject obj) { 5115 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5116 } 5117 5118 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5119 if (src.is_lval()) { 5120 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 5121 } else { 5122 movl(dst, as_Address(src)); 5123 } 5124 } 5125 5126 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5127 movl(as_Address(dst), src); 5128 } 5129 5130 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5131 movl(dst, as_Address(src)); 5132 } 5133 5134 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 5135 void MacroAssembler::movptr(Address dst, intptr_t src) { 5136 movl(dst, src); 5137 } 5138 5139 5140 void MacroAssembler::pop_callee_saved_registers() { 5141 pop(rcx); 5142 pop(rdx); 5143 pop(rdi); 5144 pop(rsi); 5145 } 5146 5147 void MacroAssembler::pop_fTOS() { 5148 fld_d(Address(rsp, 0)); 5149 addl(rsp, 2 * wordSize); 5150 } 5151 5152 void MacroAssembler::push_callee_saved_registers() { 5153 push(rsi); 5154 push(rdi); 5155 push(rdx); 5156 push(rcx); 5157 } 5158 5159 void MacroAssembler::push_fTOS() { 5160 subl(rsp, 2 * wordSize); 5161 fstp_d(Address(rsp, 0)); 5162 } 5163 5164 5165 void MacroAssembler::pushoop(jobject obj) { 5166 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 5167 } 5168 5169 5170 void MacroAssembler::pushptr(AddressLiteral src) { 5171 if (src.is_lval()) { 5172 push_literal32((int32_t)src.target(), src.rspec()); 5173 } else { 5174 pushl(as_Address(src)); 5175 } 5176 } 5177 5178 void MacroAssembler::set_word_if_not_zero(Register dst) { 5179 xorl(dst, dst); 5180 set_byte_if_not_zero(dst); 5181 } 5182 5183 static void pass_arg0(MacroAssembler* masm, Register arg) { 5184 masm->push(arg); 5185 } 5186 5187 static void pass_arg1(MacroAssembler* masm, Register arg) { 5188 masm->push(arg); 5189 } 5190 5191 static void pass_arg2(MacroAssembler* masm, Register arg) { 5192 masm->push(arg); 5193 } 5194 5195 static void pass_arg3(MacroAssembler* masm, Register arg) { 5196 masm->push(arg); 5197 } 5198 5199 #ifndef PRODUCT 5200 extern "C" void findpc(intptr_t x); 5201 #endif 5202 5203 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 5204 // In order to get locks to work, we need to fake a in_VM state 5205 JavaThread* thread = JavaThread::current(); 5206 JavaThreadState saved_state = thread->thread_state(); 5207 thread->set_thread_state(_thread_in_vm); 5208 if (ShowMessageBoxOnError) { 5209 JavaThread* thread = JavaThread::current(); 5210 JavaThreadState saved_state = thread->thread_state(); 5211 thread->set_thread_state(_thread_in_vm); 5212 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5213 ttyLocker ttyl; 5214 BytecodeCounter::print(); 5215 } 5216 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5217 // This is the value of eip which points to where verify_oop will return. 5218 if (os::message_box(msg, "Execution stopped, print registers?")) { 5219 ttyLocker ttyl; 5220 tty->print_cr("eip = 0x%08x", eip); 5221 #ifndef PRODUCT 5222 if ((WizardMode || Verbose) && PrintMiscellaneous) { 5223 tty->cr(); 5224 findpc(eip); 5225 tty->cr(); 5226 } 5227 #endif 5228 tty->print_cr("rax = 0x%08x", rax); 5229 tty->print_cr("rbx = 0x%08x", rbx); 5230 tty->print_cr("rcx = 0x%08x", rcx); 5231 tty->print_cr("rdx = 0x%08x", rdx); 5232 tty->print_cr("rdi = 0x%08x", rdi); 5233 tty->print_cr("rsi = 0x%08x", rsi); 5234 tty->print_cr("rbp = 0x%08x", rbp); 5235 tty->print_cr("rsp = 0x%08x", rsp); 5236 BREAKPOINT; 5237 assert(false, "start up GDB"); 5238 } 5239 } else { 5240 ttyLocker ttyl; 5241 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 5242 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 5243 } 5244 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5245 } 5246 5247 void MacroAssembler::stop(const char* msg) { 5248 ExternalAddress message((address)msg); 5249 // push address of message 5250 pushptr(message.addr()); 5251 { Label L; call(L, relocInfo::none); bind(L); } // push eip 5252 pusha(); // push registers 5253 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 5254 hlt(); 5255 } 5256 5257 void MacroAssembler::warn(const char* msg) { 5258 push_CPU_state(); 5259 5260 ExternalAddress message((address) msg); 5261 // push address of message 5262 pushptr(message.addr()); 5263 5264 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 5265 addl(rsp, wordSize); // discard argument 5266 pop_CPU_state(); 5267 } 5268 5269 #else // _LP64 5270 5271 // 64 bit versions 5272 5273 Address MacroAssembler::as_Address(AddressLiteral adr) { 5274 // amd64 always does this as a pc-rel 5275 // we can be absolute or disp based on the instruction type 5276 // jmp/call are displacements others are absolute 5277 assert(!adr.is_lval(), "must be rval"); 5278 assert(reachable(adr), "must be"); 5279 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 5280 5281 } 5282 5283 Address MacroAssembler::as_Address(ArrayAddress adr) { 5284 AddressLiteral base = adr.base(); 5285 lea(rscratch1, base); 5286 Address index = adr.index(); 5287 assert(index._disp == 0, "must not have disp"); // maybe it can? 5288 Address array(rscratch1, index._index, index._scale, index._disp); 5289 return array; 5290 } 5291 5292 int MacroAssembler::biased_locking_enter(Register lock_reg, 5293 Register obj_reg, 5294 Register swap_reg, 5295 Register tmp_reg, 5296 bool swap_reg_contains_mark, 5297 Label& done, 5298 Label* slow_case, 5299 BiasedLockingCounters* counters) { 5300 assert(UseBiasedLocking, "why call this otherwise?"); 5301 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 5302 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 5303 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 5304 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 5305 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 5306 Address saved_mark_addr(lock_reg, 0); 5307 5308 if (PrintBiasedLockingStatistics && counters == NULL) 5309 counters = BiasedLocking::counters(); 5310 5311 // Biased locking 5312 // See whether the lock is currently biased toward our thread and 5313 // whether the epoch is still valid 5314 // Note that the runtime guarantees sufficient alignment of JavaThread 5315 // pointers to allow age to be placed into low bits 5316 // First check to see whether biasing is even enabled for this object 5317 Label cas_label; 5318 int null_check_offset = -1; 5319 if (!swap_reg_contains_mark) { 5320 null_check_offset = offset(); 5321 movq(swap_reg, mark_addr); 5322 } 5323 movq(tmp_reg, swap_reg); 5324 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5325 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 5326 jcc(Assembler::notEqual, cas_label); 5327 // The bias pattern is present in the object's header. Need to check 5328 // whether the bias owner and the epoch are both still current. 5329 load_prototype_header(tmp_reg, obj_reg); 5330 orq(tmp_reg, r15_thread); 5331 xorq(tmp_reg, swap_reg); 5332 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 5333 if (counters != NULL) { 5334 cond_inc32(Assembler::zero, 5335 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5336 } 5337 jcc(Assembler::equal, done); 5338 5339 Label try_revoke_bias; 5340 Label try_rebias; 5341 5342 // At this point we know that the header has the bias pattern and 5343 // that we are not the bias owner in the current epoch. We need to 5344 // figure out more details about the state of the header in order to 5345 // know what operations can be legally performed on the object's 5346 // header. 5347 5348 // If the low three bits in the xor result aren't clear, that means 5349 // the prototype header is no longer biased and we have to revoke 5350 // the bias on this object. 5351 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5352 jcc(Assembler::notZero, try_revoke_bias); 5353 5354 // Biasing is still enabled for this data type. See whether the 5355 // epoch of the current bias is still valid, meaning that the epoch 5356 // bits of the mark word are equal to the epoch bits of the 5357 // prototype header. (Note that the prototype header's epoch bits 5358 // only change at a safepoint.) If not, attempt to rebias the object 5359 // toward the current thread. Note that we must be absolutely sure 5360 // that the current epoch is invalid in order to do this because 5361 // otherwise the manipulations it performs on the mark word are 5362 // illegal. 5363 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 5364 jcc(Assembler::notZero, try_rebias); 5365 5366 // The epoch of the current bias is still valid but we know nothing 5367 // about the owner; it might be set or it might be clear. Try to 5368 // acquire the bias of the object using an atomic operation. If this 5369 // fails we will go in to the runtime to revoke the object's bias. 5370 // Note that we first construct the presumed unbiased header so we 5371 // don't accidentally blow away another thread's valid bias. 5372 andq(swap_reg, 5373 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 5374 movq(tmp_reg, swap_reg); 5375 orq(tmp_reg, r15_thread); 5376 if (os::is_MP()) { 5377 lock(); 5378 } 5379 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5380 // If the biasing toward our thread failed, this means that 5381 // another thread succeeded in biasing it toward itself and we 5382 // need to revoke that bias. The revocation will occur in the 5383 // interpreter runtime in the slow case. 5384 if (counters != NULL) { 5385 cond_inc32(Assembler::zero, 5386 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5387 } 5388 if (slow_case != NULL) { 5389 jcc(Assembler::notZero, *slow_case); 5390 } 5391 jmp(done); 5392 5393 bind(try_rebias); 5394 // At this point we know the epoch has expired, meaning that the 5395 // current "bias owner", if any, is actually invalid. Under these 5396 // circumstances _only_, we are allowed to use the current header's 5397 // value as the comparison value when doing the cas to acquire the 5398 // bias in the current epoch. In other words, we allow transfer of 5399 // the bias from one thread to another directly in this situation. 5400 // 5401 // FIXME: due to a lack of registers we currently blow away the age 5402 // bits in this situation. Should attempt to preserve them. 5403 load_prototype_header(tmp_reg, obj_reg); 5404 orq(tmp_reg, r15_thread); 5405 if (os::is_MP()) { 5406 lock(); 5407 } 5408 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5409 // If the biasing toward our thread failed, then another thread 5410 // succeeded in biasing it toward itself and we need to revoke that 5411 // bias. The revocation will occur in the runtime in the slow case. 5412 if (counters != NULL) { 5413 cond_inc32(Assembler::zero, 5414 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 5415 } 5416 if (slow_case != NULL) { 5417 jcc(Assembler::notZero, *slow_case); 5418 } 5419 jmp(done); 5420 5421 bind(try_revoke_bias); 5422 // The prototype mark in the klass doesn't have the bias bit set any 5423 // more, indicating that objects of this data type are not supposed 5424 // to be biased any more. We are going to try to reset the mark of 5425 // this object to the prototype value and fall through to the 5426 // CAS-based locking scheme. Note that if our CAS fails, it means 5427 // that another thread raced us for the privilege of revoking the 5428 // bias of this particular object, so it's okay to continue in the 5429 // normal locking code. 5430 // 5431 // FIXME: due to a lack of registers we currently blow away the age 5432 // bits in this situation. Should attempt to preserve them. 5433 load_prototype_header(tmp_reg, obj_reg); 5434 if (os::is_MP()) { 5435 lock(); 5436 } 5437 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5438 // Fall through to the normal CAS-based lock, because no matter what 5439 // the result of the above CAS, some thread must have succeeded in 5440 // removing the bias bit from the object's header. 5441 if (counters != NULL) { 5442 cond_inc32(Assembler::zero, 5443 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 5444 } 5445 5446 bind(cas_label); 5447 5448 return null_check_offset; 5449 } 5450 5451 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 5452 Label L, E; 5453 5454 #ifdef _WIN64 5455 // Windows always allocates space for it's register args 5456 assert(num_args <= 4, "only register arguments supported"); 5457 subq(rsp, frame::arg_reg_save_area_bytes); 5458 #endif 5459 5460 // Align stack if necessary 5461 testl(rsp, 15); 5462 jcc(Assembler::zero, L); 5463 5464 subq(rsp, 8); 5465 { 5466 call(RuntimeAddress(entry_point)); 5467 } 5468 addq(rsp, 8); 5469 jmp(E); 5470 5471 bind(L); 5472 { 5473 call(RuntimeAddress(entry_point)); 5474 } 5475 5476 bind(E); 5477 5478 #ifdef _WIN64 5479 // restore stack pointer 5480 addq(rsp, frame::arg_reg_save_area_bytes); 5481 #endif 5482 5483 } 5484 5485 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 5486 assert(!src2.is_lval(), "should use cmpptr"); 5487 5488 if (reachable(src2)) { 5489 cmpq(src1, as_Address(src2)); 5490 } else { 5491 lea(rscratch1, src2); 5492 Assembler::cmpq(src1, Address(rscratch1, 0)); 5493 } 5494 } 5495 5496 int MacroAssembler::corrected_idivq(Register reg) { 5497 // Full implementation of Java ldiv and lrem; checks for special 5498 // case as described in JVM spec., p.243 & p.271. The function 5499 // returns the (pc) offset of the idivl instruction - may be needed 5500 // for implicit exceptions. 5501 // 5502 // normal case special case 5503 // 5504 // input : rax: dividend min_long 5505 // reg: divisor (may not be eax/edx) -1 5506 // 5507 // output: rax: quotient (= rax idiv reg) min_long 5508 // rdx: remainder (= rax irem reg) 0 5509 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 5510 static const int64_t min_long = 0x8000000000000000; 5511 Label normal_case, special_case; 5512 5513 // check for special case 5514 cmp64(rax, ExternalAddress((address) &min_long)); 5515 jcc(Assembler::notEqual, normal_case); 5516 xorl(rdx, rdx); // prepare rdx for possible special case (where 5517 // remainder = 0) 5518 cmpq(reg, -1); 5519 jcc(Assembler::equal, special_case); 5520 5521 // handle normal case 5522 bind(normal_case); 5523 cdqq(); 5524 int idivq_offset = offset(); 5525 idivq(reg); 5526 5527 // normal and special case exit 5528 bind(special_case); 5529 5530 return idivq_offset; 5531 } 5532 5533 void MacroAssembler::decrementq(Register reg, int value) { 5534 if (value == min_jint) { subq(reg, value); return; } 5535 if (value < 0) { incrementq(reg, -value); return; } 5536 if (value == 0) { ; return; } 5537 if (value == 1 && UseIncDec) { decq(reg) ; return; } 5538 /* else */ { subq(reg, value) ; return; } 5539 } 5540 5541 void MacroAssembler::decrementq(Address dst, int value) { 5542 if (value == min_jint) { subq(dst, value); return; } 5543 if (value < 0) { incrementq(dst, -value); return; } 5544 if (value == 0) { ; return; } 5545 if (value == 1 && UseIncDec) { decq(dst) ; return; } 5546 /* else */ { subq(dst, value) ; return; } 5547 } 5548 5549 void MacroAssembler::fat_nop() { 5550 // A 5 byte nop that is safe for patching (see patch_verified_entry) 5551 // Recommened sequence from 'Software Optimization Guide for the AMD 5552 // Hammer Processor' 5553 emit_byte(0x66); 5554 emit_byte(0x66); 5555 emit_byte(0x90); 5556 emit_byte(0x66); 5557 emit_byte(0x90); 5558 } 5559 5560 void MacroAssembler::incrementq(Register reg, int value) { 5561 if (value == min_jint) { addq(reg, value); return; } 5562 if (value < 0) { decrementq(reg, -value); return; } 5563 if (value == 0) { ; return; } 5564 if (value == 1 && UseIncDec) { incq(reg) ; return; } 5565 /* else */ { addq(reg, value) ; return; } 5566 } 5567 5568 void MacroAssembler::incrementq(Address dst, int value) { 5569 if (value == min_jint) { addq(dst, value); return; } 5570 if (value < 0) { decrementq(dst, -value); return; } 5571 if (value == 0) { ; return; } 5572 if (value == 1 && UseIncDec) { incq(dst) ; return; } 5573 /* else */ { addq(dst, value) ; return; } 5574 } 5575 5576 // 32bit can do a case table jump in one instruction but we no longer allow the base 5577 // to be installed in the Address class 5578 void MacroAssembler::jump(ArrayAddress entry) { 5579 lea(rscratch1, entry.base()); 5580 Address dispatch = entry.index(); 5581 assert(dispatch._base == noreg, "must be"); 5582 dispatch._base = rscratch1; 5583 jmp(dispatch); 5584 } 5585 5586 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5587 ShouldNotReachHere(); // 64bit doesn't use two regs 5588 cmpq(x_lo, y_lo); 5589 } 5590 5591 void MacroAssembler::lea(Register dst, AddressLiteral src) { 5592 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5593 } 5594 5595 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5596 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 5597 movptr(dst, rscratch1); 5598 } 5599 5600 void MacroAssembler::leave() { 5601 // %%% is this really better? Why not on 32bit too? 5602 emit_byte(0xC9); // LEAVE 5603 } 5604 5605 void MacroAssembler::lneg(Register hi, Register lo) { 5606 ShouldNotReachHere(); // 64bit doesn't use two regs 5607 negq(lo); 5608 } 5609 5610 void MacroAssembler::movoop(Register dst, jobject obj) { 5611 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5612 } 5613 5614 void MacroAssembler::movoop(Address dst, jobject obj) { 5615 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5616 movq(dst, rscratch1); 5617 } 5618 5619 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5620 if (src.is_lval()) { 5621 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5622 } else { 5623 if (reachable(src)) { 5624 movq(dst, as_Address(src)); 5625 } else { 5626 lea(rscratch1, src); 5627 movq(dst, Address(rscratch1,0)); 5628 } 5629 } 5630 } 5631 5632 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5633 movq(as_Address(dst), src); 5634 } 5635 5636 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5637 movq(dst, as_Address(src)); 5638 } 5639 5640 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 5641 void MacroAssembler::movptr(Address dst, intptr_t src) { 5642 mov64(rscratch1, src); 5643 movq(dst, rscratch1); 5644 } 5645 5646 // These are mostly for initializing NULL 5647 void MacroAssembler::movptr(Address dst, int32_t src) { 5648 movslq(dst, src); 5649 } 5650 5651 void MacroAssembler::movptr(Register dst, int32_t src) { 5652 mov64(dst, (intptr_t)src); 5653 } 5654 5655 void MacroAssembler::pushoop(jobject obj) { 5656 movoop(rscratch1, obj); 5657 push(rscratch1); 5658 } 5659 5660 void MacroAssembler::pushptr(AddressLiteral src) { 5661 lea(rscratch1, src); 5662 if (src.is_lval()) { 5663 push(rscratch1); 5664 } else { 5665 pushq(Address(rscratch1, 0)); 5666 } 5667 } 5668 5669 void MacroAssembler::reset_last_Java_frame(bool clear_fp, 5670 bool clear_pc) { 5671 // we must set sp to zero to clear frame 5672 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 5673 // must clear fp, so that compiled frames are not confused; it is 5674 // possible that we need it only for debugging 5675 if (clear_fp) { 5676 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 5677 } 5678 5679 if (clear_pc) { 5680 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 5681 } 5682 } 5683 5684 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 5685 Register last_java_fp, 5686 address last_java_pc) { 5687 // determine last_java_sp register 5688 if (!last_java_sp->is_valid()) { 5689 last_java_sp = rsp; 5690 } 5691 5692 // last_java_fp is optional 5693 if (last_java_fp->is_valid()) { 5694 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 5695 last_java_fp); 5696 } 5697 5698 // last_java_pc is optional 5699 if (last_java_pc != NULL) { 5700 Address java_pc(r15_thread, 5701 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 5702 lea(rscratch1, InternalAddress(last_java_pc)); 5703 movptr(java_pc, rscratch1); 5704 } 5705 5706 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 5707 } 5708 5709 static void pass_arg0(MacroAssembler* masm, Register arg) { 5710 if (c_rarg0 != arg ) { 5711 masm->mov(c_rarg0, arg); 5712 } 5713 } 5714 5715 static void pass_arg1(MacroAssembler* masm, Register arg) { 5716 if (c_rarg1 != arg ) { 5717 masm->mov(c_rarg1, arg); 5718 } 5719 } 5720 5721 static void pass_arg2(MacroAssembler* masm, Register arg) { 5722 if (c_rarg2 != arg ) { 5723 masm->mov(c_rarg2, arg); 5724 } 5725 } 5726 5727 static void pass_arg3(MacroAssembler* masm, Register arg) { 5728 if (c_rarg3 != arg ) { 5729 masm->mov(c_rarg3, arg); 5730 } 5731 } 5732 5733 void MacroAssembler::stop(const char* msg) { 5734 address rip = pc(); 5735 pusha(); // get regs on stack 5736 lea(c_rarg0, ExternalAddress((address) msg)); 5737 lea(c_rarg1, InternalAddress(rip)); 5738 movq(c_rarg2, rsp); // pass pointer to regs array 5739 andq(rsp, -16); // align stack as required by ABI 5740 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 5741 hlt(); 5742 } 5743 5744 void MacroAssembler::warn(const char* msg) { 5745 push(rsp); 5746 andq(rsp, -16); // align stack as required by push_CPU_state and call 5747 5748 push_CPU_state(); // keeps alignment at 16 bytes 5749 lea(c_rarg0, ExternalAddress((address) msg)); 5750 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 5751 pop_CPU_state(); 5752 pop(rsp); 5753 } 5754 5755 #ifndef PRODUCT 5756 extern "C" void findpc(intptr_t x); 5757 #endif 5758 5759 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 5760 // In order to get locks to work, we need to fake a in_VM state 5761 if (ShowMessageBoxOnError ) { 5762 JavaThread* thread = JavaThread::current(); 5763 JavaThreadState saved_state = thread->thread_state(); 5764 thread->set_thread_state(_thread_in_vm); 5765 #ifndef PRODUCT 5766 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5767 ttyLocker ttyl; 5768 BytecodeCounter::print(); 5769 } 5770 #endif 5771 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5772 // XXX correct this offset for amd64 5773 // This is the value of eip which points to where verify_oop will return. 5774 if (os::message_box(msg, "Execution stopped, print registers?")) { 5775 ttyLocker ttyl; 5776 tty->print_cr("rip = 0x%016lx", pc); 5777 #ifndef PRODUCT 5778 tty->cr(); 5779 findpc(pc); 5780 tty->cr(); 5781 #endif 5782 tty->print_cr("rax = 0x%016lx", regs[15]); 5783 tty->print_cr("rbx = 0x%016lx", regs[12]); 5784 tty->print_cr("rcx = 0x%016lx", regs[14]); 5785 tty->print_cr("rdx = 0x%016lx", regs[13]); 5786 tty->print_cr("rdi = 0x%016lx", regs[8]); 5787 tty->print_cr("rsi = 0x%016lx", regs[9]); 5788 tty->print_cr("rbp = 0x%016lx", regs[10]); 5789 tty->print_cr("rsp = 0x%016lx", regs[11]); 5790 tty->print_cr("r8 = 0x%016lx", regs[7]); 5791 tty->print_cr("r9 = 0x%016lx", regs[6]); 5792 tty->print_cr("r10 = 0x%016lx", regs[5]); 5793 tty->print_cr("r11 = 0x%016lx", regs[4]); 5794 tty->print_cr("r12 = 0x%016lx", regs[3]); 5795 tty->print_cr("r13 = 0x%016lx", regs[2]); 5796 tty->print_cr("r14 = 0x%016lx", regs[1]); 5797 tty->print_cr("r15 = 0x%016lx", regs[0]); 5798 BREAKPOINT; 5799 } 5800 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5801 } else { 5802 ttyLocker ttyl; 5803 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 5804 msg); 5805 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 5806 } 5807 } 5808 5809 #endif // _LP64 5810 5811 // Now versions that are common to 32/64 bit 5812 5813 void MacroAssembler::addptr(Register dst, int32_t imm32) { 5814 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 5815 } 5816 5817 void MacroAssembler::addptr(Register dst, Register src) { 5818 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 5819 } 5820 5821 void MacroAssembler::addptr(Address dst, Register src) { 5822 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 5823 } 5824 5825 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { 5826 if (reachable(src)) { 5827 Assembler::addsd(dst, as_Address(src)); 5828 } else { 5829 lea(rscratch1, src); 5830 Assembler::addsd(dst, Address(rscratch1, 0)); 5831 } 5832 } 5833 5834 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { 5835 if (reachable(src)) { 5836 addss(dst, as_Address(src)); 5837 } else { 5838 lea(rscratch1, src); 5839 addss(dst, Address(rscratch1, 0)); 5840 } 5841 } 5842 5843 void MacroAssembler::align(int modulus) { 5844 if (offset() % modulus != 0) { 5845 nop(modulus - (offset() % modulus)); 5846 } 5847 } 5848 5849 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 5850 // Used in sign-masking with aligned address. 5851 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 5852 if (reachable(src)) { 5853 Assembler::andpd(dst, as_Address(src)); 5854 } else { 5855 lea(rscratch1, src); 5856 Assembler::andpd(dst, Address(rscratch1, 0)); 5857 } 5858 } 5859 5860 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { 5861 // Used in sign-masking with aligned address. 5862 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 5863 if (reachable(src)) { 5864 Assembler::andps(dst, as_Address(src)); 5865 } else { 5866 lea(rscratch1, src); 5867 Assembler::andps(dst, Address(rscratch1, 0)); 5868 } 5869 } 5870 5871 void MacroAssembler::andptr(Register dst, int32_t imm32) { 5872 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 5873 } 5874 5875 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 5876 pushf(); 5877 if (os::is_MP()) 5878 lock(); 5879 incrementl(counter_addr); 5880 popf(); 5881 } 5882 5883 // Writes to stack successive pages until offset reached to check for 5884 // stack overflow + shadow pages. This clobbers tmp. 5885 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 5886 movptr(tmp, rsp); 5887 // Bang stack for total size given plus shadow page size. 5888 // Bang one page at a time because large size can bang beyond yellow and 5889 // red zones. 5890 Label loop; 5891 bind(loop); 5892 movl(Address(tmp, (-os::vm_page_size())), size ); 5893 subptr(tmp, os::vm_page_size()); 5894 subl(size, os::vm_page_size()); 5895 jcc(Assembler::greater, loop); 5896 5897 // Bang down shadow pages too. 5898 // The -1 because we already subtracted 1 page. 5899 for (int i = 0; i< StackShadowPages-1; i++) { 5900 // this could be any sized move but this is can be a debugging crumb 5901 // so the bigger the better. 5902 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 5903 } 5904 } 5905 5906 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 5907 assert(UseBiasedLocking, "why call this otherwise?"); 5908 5909 // Check for biased locking unlock case, which is a no-op 5910 // Note: we do not have to check the thread ID for two reasons. 5911 // First, the interpreter checks for IllegalMonitorStateException at 5912 // a higher level. Second, if the bias was revoked while we held the 5913 // lock, the object could not be rebiased toward another thread, so 5914 // the bias bit would be clear. 5915 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 5916 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 5917 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 5918 jcc(Assembler::equal, done); 5919 } 5920 5921 void MacroAssembler::c2bool(Register x) { 5922 // implements x == 0 ? 0 : 1 5923 // note: must only look at least-significant byte of x 5924 // since C-style booleans are stored in one byte 5925 // only! (was bug) 5926 andl(x, 0xFF); 5927 setb(Assembler::notZero, x); 5928 } 5929 5930 // Wouldn't need if AddressLiteral version had new name 5931 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 5932 Assembler::call(L, rtype); 5933 } 5934 5935 void MacroAssembler::call(Register entry) { 5936 Assembler::call(entry); 5937 } 5938 5939 void MacroAssembler::call(AddressLiteral entry) { 5940 if (reachable(entry)) { 5941 Assembler::call_literal(entry.target(), entry.rspec()); 5942 } else { 5943 lea(rscratch1, entry); 5944 Assembler::call(rscratch1); 5945 } 5946 } 5947 5948 // Implementation of call_VM versions 5949 5950 void MacroAssembler::call_VM(Register oop_result, 5951 address entry_point, 5952 bool check_exceptions) { 5953 Label C, E; 5954 call(C, relocInfo::none); 5955 jmp(E); 5956 5957 bind(C); 5958 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 5959 ret(0); 5960 5961 bind(E); 5962 } 5963 5964 void MacroAssembler::call_VM(Register oop_result, 5965 address entry_point, 5966 Register arg_1, 5967 bool check_exceptions) { 5968 Label C, E; 5969 call(C, relocInfo::none); 5970 jmp(E); 5971 5972 bind(C); 5973 pass_arg1(this, arg_1); 5974 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 5975 ret(0); 5976 5977 bind(E); 5978 } 5979 5980 void MacroAssembler::call_VM(Register oop_result, 5981 address entry_point, 5982 Register arg_1, 5983 Register arg_2, 5984 bool check_exceptions) { 5985 Label C, E; 5986 call(C, relocInfo::none); 5987 jmp(E); 5988 5989 bind(C); 5990 5991 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5992 5993 pass_arg2(this, arg_2); 5994 pass_arg1(this, arg_1); 5995 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 5996 ret(0); 5997 5998 bind(E); 5999 } 6000 6001 void MacroAssembler::call_VM(Register oop_result, 6002 address entry_point, 6003 Register arg_1, 6004 Register arg_2, 6005 Register arg_3, 6006 bool check_exceptions) { 6007 Label C, E; 6008 call(C, relocInfo::none); 6009 jmp(E); 6010 6011 bind(C); 6012 6013 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6014 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6015 pass_arg3(this, arg_3); 6016 6017 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6018 pass_arg2(this, arg_2); 6019 6020 pass_arg1(this, arg_1); 6021 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 6022 ret(0); 6023 6024 bind(E); 6025 } 6026 6027 void MacroAssembler::call_VM(Register oop_result, 6028 Register last_java_sp, 6029 address entry_point, 6030 int number_of_arguments, 6031 bool check_exceptions) { 6032 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6033 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6034 } 6035 6036 void MacroAssembler::call_VM(Register oop_result, 6037 Register last_java_sp, 6038 address entry_point, 6039 Register arg_1, 6040 bool check_exceptions) { 6041 pass_arg1(this, arg_1); 6042 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6043 } 6044 6045 void MacroAssembler::call_VM(Register oop_result, 6046 Register last_java_sp, 6047 address entry_point, 6048 Register arg_1, 6049 Register arg_2, 6050 bool check_exceptions) { 6051 6052 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6053 pass_arg2(this, arg_2); 6054 pass_arg1(this, arg_1); 6055 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6056 } 6057 6058 void MacroAssembler::call_VM(Register oop_result, 6059 Register last_java_sp, 6060 address entry_point, 6061 Register arg_1, 6062 Register arg_2, 6063 Register arg_3, 6064 bool check_exceptions) { 6065 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6066 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6067 pass_arg3(this, arg_3); 6068 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6069 pass_arg2(this, arg_2); 6070 pass_arg1(this, arg_1); 6071 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6072 } 6073 6074 void MacroAssembler::super_call_VM(Register oop_result, 6075 Register last_java_sp, 6076 address entry_point, 6077 int number_of_arguments, 6078 bool check_exceptions) { 6079 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6080 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6081 } 6082 6083 void MacroAssembler::super_call_VM(Register oop_result, 6084 Register last_java_sp, 6085 address entry_point, 6086 Register arg_1, 6087 bool check_exceptions) { 6088 pass_arg1(this, arg_1); 6089 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6090 } 6091 6092 void MacroAssembler::super_call_VM(Register oop_result, 6093 Register last_java_sp, 6094 address entry_point, 6095 Register arg_1, 6096 Register arg_2, 6097 bool check_exceptions) { 6098 6099 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6100 pass_arg2(this, arg_2); 6101 pass_arg1(this, arg_1); 6102 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6103 } 6104 6105 void MacroAssembler::super_call_VM(Register oop_result, 6106 Register last_java_sp, 6107 address entry_point, 6108 Register arg_1, 6109 Register arg_2, 6110 Register arg_3, 6111 bool check_exceptions) { 6112 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6113 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6114 pass_arg3(this, arg_3); 6115 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6116 pass_arg2(this, arg_2); 6117 pass_arg1(this, arg_1); 6118 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6119 } 6120 6121 void MacroAssembler::call_VM_base(Register oop_result, 6122 Register java_thread, 6123 Register last_java_sp, 6124 address entry_point, 6125 int number_of_arguments, 6126 bool check_exceptions) { 6127 // determine java_thread register 6128 if (!java_thread->is_valid()) { 6129 #ifdef _LP64 6130 java_thread = r15_thread; 6131 #else 6132 java_thread = rdi; 6133 get_thread(java_thread); 6134 #endif // LP64 6135 } 6136 // determine last_java_sp register 6137 if (!last_java_sp->is_valid()) { 6138 last_java_sp = rsp; 6139 } 6140 // debugging support 6141 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 6142 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 6143 #ifdef ASSERT 6144 LP64_ONLY(if (UseCompressedOops) verify_heapbase("call_VM_base");) 6145 #endif // ASSERT 6146 6147 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 6148 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 6149 6150 // push java thread (becomes first argument of C function) 6151 6152 NOT_LP64(push(java_thread); number_of_arguments++); 6153 LP64_ONLY(mov(c_rarg0, r15_thread)); 6154 6155 // set last Java frame before call 6156 assert(last_java_sp != rbp, "can't use ebp/rbp"); 6157 6158 // Only interpreter should have to set fp 6159 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 6160 6161 // do the call, remove parameters 6162 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 6163 6164 // restore the thread (cannot use the pushed argument since arguments 6165 // may be overwritten by C code generated by an optimizing compiler); 6166 // however can use the register value directly if it is callee saved. 6167 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 6168 // rdi & rsi (also r15) are callee saved -> nothing to do 6169 #ifdef ASSERT 6170 guarantee(java_thread != rax, "change this code"); 6171 push(rax); 6172 { Label L; 6173 get_thread(rax); 6174 cmpptr(java_thread, rax); 6175 jcc(Assembler::equal, L); 6176 stop("MacroAssembler::call_VM_base: rdi not callee saved?"); 6177 bind(L); 6178 } 6179 pop(rax); 6180 #endif 6181 } else { 6182 get_thread(java_thread); 6183 } 6184 // reset last Java frame 6185 // Only interpreter should have to clear fp 6186 reset_last_Java_frame(java_thread, true, false); 6187 6188 #ifndef CC_INTERP 6189 // C++ interp handles this in the interpreter 6190 check_and_handle_popframe(java_thread); 6191 check_and_handle_earlyret(java_thread); 6192 #endif /* CC_INTERP */ 6193 6194 if (check_exceptions) { 6195 // check for pending exceptions (java_thread is set upon return) 6196 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 6197 #ifndef _LP64 6198 jump_cc(Assembler::notEqual, 6199 RuntimeAddress(StubRoutines::forward_exception_entry())); 6200 #else 6201 // This used to conditionally jump to forward_exception however it is 6202 // possible if we relocate that the branch will not reach. So we must jump 6203 // around so we can always reach 6204 6205 Label ok; 6206 jcc(Assembler::equal, ok); 6207 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 6208 bind(ok); 6209 #endif // LP64 6210 } 6211 6212 // get oop result if there is one and reset the value in the thread 6213 if (oop_result->is_valid()) { 6214 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 6215 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 6216 verify_oop(oop_result, "broken oop in call_VM_base"); 6217 } 6218 } 6219 6220 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 6221 6222 // Calculate the value for last_Java_sp 6223 // somewhat subtle. call_VM does an intermediate call 6224 // which places a return address on the stack just under the 6225 // stack pointer as the user finsihed with it. This allows 6226 // use to retrieve last_Java_pc from last_Java_sp[-1]. 6227 // On 32bit we then have to push additional args on the stack to accomplish 6228 // the actual requested call. On 64bit call_VM only can use register args 6229 // so the only extra space is the return address that call_VM created. 6230 // This hopefully explains the calculations here. 6231 6232 #ifdef _LP64 6233 // We've pushed one address, correct last_Java_sp 6234 lea(rax, Address(rsp, wordSize)); 6235 #else 6236 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 6237 #endif // LP64 6238 6239 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 6240 6241 } 6242 6243 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 6244 call_VM_leaf_base(entry_point, number_of_arguments); 6245 } 6246 6247 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 6248 pass_arg0(this, arg_0); 6249 call_VM_leaf(entry_point, 1); 6250 } 6251 6252 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 6253 6254 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6255 pass_arg1(this, arg_1); 6256 pass_arg0(this, arg_0); 6257 call_VM_leaf(entry_point, 2); 6258 } 6259 6260 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 6261 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6262 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6263 pass_arg2(this, arg_2); 6264 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6265 pass_arg1(this, arg_1); 6266 pass_arg0(this, arg_0); 6267 call_VM_leaf(entry_point, 3); 6268 } 6269 6270 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 6271 pass_arg0(this, arg_0); 6272 MacroAssembler::call_VM_leaf_base(entry_point, 1); 6273 } 6274 6275 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 6276 6277 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6278 pass_arg1(this, arg_1); 6279 pass_arg0(this, arg_0); 6280 MacroAssembler::call_VM_leaf_base(entry_point, 2); 6281 } 6282 6283 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 6284 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6285 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6286 pass_arg2(this, arg_2); 6287 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6288 pass_arg1(this, arg_1); 6289 pass_arg0(this, arg_0); 6290 MacroAssembler::call_VM_leaf_base(entry_point, 3); 6291 } 6292 6293 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 6294 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); 6295 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6296 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6297 pass_arg3(this, arg_3); 6298 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6299 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6300 pass_arg2(this, arg_2); 6301 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6302 pass_arg1(this, arg_1); 6303 pass_arg0(this, arg_0); 6304 MacroAssembler::call_VM_leaf_base(entry_point, 4); 6305 } 6306 6307 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 6308 } 6309 6310 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 6311 } 6312 6313 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 6314 if (reachable(src1)) { 6315 cmpl(as_Address(src1), imm); 6316 } else { 6317 lea(rscratch1, src1); 6318 cmpl(Address(rscratch1, 0), imm); 6319 } 6320 } 6321 6322 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 6323 assert(!src2.is_lval(), "use cmpptr"); 6324 if (reachable(src2)) { 6325 cmpl(src1, as_Address(src2)); 6326 } else { 6327 lea(rscratch1, src2); 6328 cmpl(src1, Address(rscratch1, 0)); 6329 } 6330 } 6331 6332 void MacroAssembler::cmp32(Register src1, int32_t imm) { 6333 Assembler::cmpl(src1, imm); 6334 } 6335 6336 void MacroAssembler::cmp32(Register src1, Address src2) { 6337 Assembler::cmpl(src1, src2); 6338 } 6339 6340 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6341 ucomisd(opr1, opr2); 6342 6343 Label L; 6344 if (unordered_is_less) { 6345 movl(dst, -1); 6346 jcc(Assembler::parity, L); 6347 jcc(Assembler::below , L); 6348 movl(dst, 0); 6349 jcc(Assembler::equal , L); 6350 increment(dst); 6351 } else { // unordered is greater 6352 movl(dst, 1); 6353 jcc(Assembler::parity, L); 6354 jcc(Assembler::above , L); 6355 movl(dst, 0); 6356 jcc(Assembler::equal , L); 6357 decrementl(dst); 6358 } 6359 bind(L); 6360 } 6361 6362 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6363 ucomiss(opr1, opr2); 6364 6365 Label L; 6366 if (unordered_is_less) { 6367 movl(dst, -1); 6368 jcc(Assembler::parity, L); 6369 jcc(Assembler::below , L); 6370 movl(dst, 0); 6371 jcc(Assembler::equal , L); 6372 increment(dst); 6373 } else { // unordered is greater 6374 movl(dst, 1); 6375 jcc(Assembler::parity, L); 6376 jcc(Assembler::above , L); 6377 movl(dst, 0); 6378 jcc(Assembler::equal , L); 6379 decrementl(dst); 6380 } 6381 bind(L); 6382 } 6383 6384 6385 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 6386 if (reachable(src1)) { 6387 cmpb(as_Address(src1), imm); 6388 } else { 6389 lea(rscratch1, src1); 6390 cmpb(Address(rscratch1, 0), imm); 6391 } 6392 } 6393 6394 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 6395 #ifdef _LP64 6396 if (src2.is_lval()) { 6397 movptr(rscratch1, src2); 6398 Assembler::cmpq(src1, rscratch1); 6399 } else if (reachable(src2)) { 6400 cmpq(src1, as_Address(src2)); 6401 } else { 6402 lea(rscratch1, src2); 6403 Assembler::cmpq(src1, Address(rscratch1, 0)); 6404 } 6405 #else 6406 if (src2.is_lval()) { 6407 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 6408 } else { 6409 cmpl(src1, as_Address(src2)); 6410 } 6411 #endif // _LP64 6412 } 6413 6414 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 6415 assert(src2.is_lval(), "not a mem-mem compare"); 6416 #ifdef _LP64 6417 // moves src2's literal address 6418 movptr(rscratch1, src2); 6419 Assembler::cmpq(src1, rscratch1); 6420 #else 6421 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 6422 #endif // _LP64 6423 } 6424 6425 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 6426 if (reachable(adr)) { 6427 if (os::is_MP()) 6428 lock(); 6429 cmpxchgptr(reg, as_Address(adr)); 6430 } else { 6431 lea(rscratch1, adr); 6432 if (os::is_MP()) 6433 lock(); 6434 cmpxchgptr(reg, Address(rscratch1, 0)); 6435 } 6436 } 6437 6438 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 6439 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 6440 } 6441 6442 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 6443 if (reachable(src)) { 6444 Assembler::comisd(dst, as_Address(src)); 6445 } else { 6446 lea(rscratch1, src); 6447 Assembler::comisd(dst, Address(rscratch1, 0)); 6448 } 6449 } 6450 6451 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 6452 if (reachable(src)) { 6453 Assembler::comiss(dst, as_Address(src)); 6454 } else { 6455 lea(rscratch1, src); 6456 Assembler::comiss(dst, Address(rscratch1, 0)); 6457 } 6458 } 6459 6460 6461 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 6462 Condition negated_cond = negate_condition(cond); 6463 Label L; 6464 jcc(negated_cond, L); 6465 atomic_incl(counter_addr); 6466 bind(L); 6467 } 6468 6469 int MacroAssembler::corrected_idivl(Register reg) { 6470 // Full implementation of Java idiv and irem; checks for 6471 // special case as described in JVM spec., p.243 & p.271. 6472 // The function returns the (pc) offset of the idivl 6473 // instruction - may be needed for implicit exceptions. 6474 // 6475 // normal case special case 6476 // 6477 // input : rax,: dividend min_int 6478 // reg: divisor (may not be rax,/rdx) -1 6479 // 6480 // output: rax,: quotient (= rax, idiv reg) min_int 6481 // rdx: remainder (= rax, irem reg) 0 6482 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 6483 const int min_int = 0x80000000; 6484 Label normal_case, special_case; 6485 6486 // check for special case 6487 cmpl(rax, min_int); 6488 jcc(Assembler::notEqual, normal_case); 6489 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 6490 cmpl(reg, -1); 6491 jcc(Assembler::equal, special_case); 6492 6493 // handle normal case 6494 bind(normal_case); 6495 cdql(); 6496 int idivl_offset = offset(); 6497 idivl(reg); 6498 6499 // normal and special case exit 6500 bind(special_case); 6501 6502 return idivl_offset; 6503 } 6504 6505 6506 6507 void MacroAssembler::decrementl(Register reg, int value) { 6508 if (value == min_jint) {subl(reg, value) ; return; } 6509 if (value < 0) { incrementl(reg, -value); return; } 6510 if (value == 0) { ; return; } 6511 if (value == 1 && UseIncDec) { decl(reg) ; return; } 6512 /* else */ { subl(reg, value) ; return; } 6513 } 6514 6515 void MacroAssembler::decrementl(Address dst, int value) { 6516 if (value == min_jint) {subl(dst, value) ; return; } 6517 if (value < 0) { incrementl(dst, -value); return; } 6518 if (value == 0) { ; return; } 6519 if (value == 1 && UseIncDec) { decl(dst) ; return; } 6520 /* else */ { subl(dst, value) ; return; } 6521 } 6522 6523 void MacroAssembler::division_with_shift (Register reg, int shift_value) { 6524 assert (shift_value > 0, "illegal shift value"); 6525 Label _is_positive; 6526 testl (reg, reg); 6527 jcc (Assembler::positive, _is_positive); 6528 int offset = (1 << shift_value) - 1 ; 6529 6530 if (offset == 1) { 6531 incrementl(reg); 6532 } else { 6533 addl(reg, offset); 6534 } 6535 6536 bind (_is_positive); 6537 sarl(reg, shift_value); 6538 } 6539 6540 void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { 6541 if (reachable(src)) { 6542 Assembler::divsd(dst, as_Address(src)); 6543 } else { 6544 lea(rscratch1, src); 6545 Assembler::divsd(dst, Address(rscratch1, 0)); 6546 } 6547 } 6548 6549 void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { 6550 if (reachable(src)) { 6551 Assembler::divss(dst, as_Address(src)); 6552 } else { 6553 lea(rscratch1, src); 6554 Assembler::divss(dst, Address(rscratch1, 0)); 6555 } 6556 } 6557 6558 // !defined(COMPILER2) is because of stupid core builds 6559 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 6560 void MacroAssembler::empty_FPU_stack() { 6561 if (VM_Version::supports_mmx()) { 6562 emms(); 6563 } else { 6564 for (int i = 8; i-- > 0; ) ffree(i); 6565 } 6566 } 6567 #endif // !LP64 || C1 || !C2 6568 6569 6570 // Defines obj, preserves var_size_in_bytes 6571 void MacroAssembler::eden_allocate(Register obj, 6572 Register var_size_in_bytes, 6573 int con_size_in_bytes, 6574 Register t1, 6575 Label& slow_case) { 6576 assert(obj == rax, "obj must be in rax, for cmpxchg"); 6577 assert_different_registers(obj, var_size_in_bytes, t1); 6578 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 6579 jmp(slow_case); 6580 } else { 6581 Register end = t1; 6582 Label retry; 6583 bind(retry); 6584 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 6585 movptr(obj, heap_top); 6586 if (var_size_in_bytes == noreg) { 6587 lea(end, Address(obj, con_size_in_bytes)); 6588 } else { 6589 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 6590 } 6591 // if end < obj then we wrapped around => object too long => slow case 6592 cmpptr(end, obj); 6593 jcc(Assembler::below, slow_case); 6594 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 6595 jcc(Assembler::above, slow_case); 6596 // Compare obj with the top addr, and if still equal, store the new top addr in 6597 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 6598 // it otherwise. Use lock prefix for atomicity on MPs. 6599 locked_cmpxchgptr(end, heap_top); 6600 jcc(Assembler::notEqual, retry); 6601 } 6602 } 6603 6604 void MacroAssembler::enter() { 6605 push(rbp); 6606 mov(rbp, rsp); 6607 } 6608 6609 void MacroAssembler::fcmp(Register tmp) { 6610 fcmp(tmp, 1, true, true); 6611 } 6612 6613 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 6614 assert(!pop_right || pop_left, "usage error"); 6615 if (VM_Version::supports_cmov()) { 6616 assert(tmp == noreg, "unneeded temp"); 6617 if (pop_left) { 6618 fucomip(index); 6619 } else { 6620 fucomi(index); 6621 } 6622 if (pop_right) { 6623 fpop(); 6624 } 6625 } else { 6626 assert(tmp != noreg, "need temp"); 6627 if (pop_left) { 6628 if (pop_right) { 6629 fcompp(); 6630 } else { 6631 fcomp(index); 6632 } 6633 } else { 6634 fcom(index); 6635 } 6636 // convert FPU condition into eflags condition via rax, 6637 save_rax(tmp); 6638 fwait(); fnstsw_ax(); 6639 sahf(); 6640 restore_rax(tmp); 6641 } 6642 // condition codes set as follows: 6643 // 6644 // CF (corresponds to C0) if x < y 6645 // PF (corresponds to C2) if unordered 6646 // ZF (corresponds to C3) if x = y 6647 } 6648 6649 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 6650 fcmp2int(dst, unordered_is_less, 1, true, true); 6651 } 6652 6653 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 6654 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 6655 Label L; 6656 if (unordered_is_less) { 6657 movl(dst, -1); 6658 jcc(Assembler::parity, L); 6659 jcc(Assembler::below , L); 6660 movl(dst, 0); 6661 jcc(Assembler::equal , L); 6662 increment(dst); 6663 } else { // unordered is greater 6664 movl(dst, 1); 6665 jcc(Assembler::parity, L); 6666 jcc(Assembler::above , L); 6667 movl(dst, 0); 6668 jcc(Assembler::equal , L); 6669 decrementl(dst); 6670 } 6671 bind(L); 6672 } 6673 6674 void MacroAssembler::fld_d(AddressLiteral src) { 6675 fld_d(as_Address(src)); 6676 } 6677 6678 void MacroAssembler::fld_s(AddressLiteral src) { 6679 fld_s(as_Address(src)); 6680 } 6681 6682 void MacroAssembler::fld_x(AddressLiteral src) { 6683 Assembler::fld_x(as_Address(src)); 6684 } 6685 6686 void MacroAssembler::fldcw(AddressLiteral src) { 6687 Assembler::fldcw(as_Address(src)); 6688 } 6689 6690 void MacroAssembler::fpop() { 6691 ffree(); 6692 fincstp(); 6693 } 6694 6695 void MacroAssembler::fremr(Register tmp) { 6696 save_rax(tmp); 6697 { Label L; 6698 bind(L); 6699 fprem(); 6700 fwait(); fnstsw_ax(); 6701 #ifdef _LP64 6702 testl(rax, 0x400); 6703 jcc(Assembler::notEqual, L); 6704 #else 6705 sahf(); 6706 jcc(Assembler::parity, L); 6707 #endif // _LP64 6708 } 6709 restore_rax(tmp); 6710 // Result is in ST0. 6711 // Note: fxch & fpop to get rid of ST1 6712 // (otherwise FPU stack could overflow eventually) 6713 fxch(1); 6714 fpop(); 6715 } 6716 6717 6718 void MacroAssembler::incrementl(AddressLiteral dst) { 6719 if (reachable(dst)) { 6720 incrementl(as_Address(dst)); 6721 } else { 6722 lea(rscratch1, dst); 6723 incrementl(Address(rscratch1, 0)); 6724 } 6725 } 6726 6727 void MacroAssembler::incrementl(ArrayAddress dst) { 6728 incrementl(as_Address(dst)); 6729 } 6730 6731 void MacroAssembler::incrementl(Register reg, int value) { 6732 if (value == min_jint) {addl(reg, value) ; return; } 6733 if (value < 0) { decrementl(reg, -value); return; } 6734 if (value == 0) { ; return; } 6735 if (value == 1 && UseIncDec) { incl(reg) ; return; } 6736 /* else */ { addl(reg, value) ; return; } 6737 } 6738 6739 void MacroAssembler::incrementl(Address dst, int value) { 6740 if (value == min_jint) {addl(dst, value) ; return; } 6741 if (value < 0) { decrementl(dst, -value); return; } 6742 if (value == 0) { ; return; } 6743 if (value == 1 && UseIncDec) { incl(dst) ; return; } 6744 /* else */ { addl(dst, value) ; return; } 6745 } 6746 6747 void MacroAssembler::jump(AddressLiteral dst) { 6748 if (reachable(dst)) { 6749 jmp_literal(dst.target(), dst.rspec()); 6750 } else { 6751 lea(rscratch1, dst); 6752 jmp(rscratch1); 6753 } 6754 } 6755 6756 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 6757 if (reachable(dst)) { 6758 InstructionMark im(this); 6759 relocate(dst.reloc()); 6760 const int short_size = 2; 6761 const int long_size = 6; 6762 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos); 6763 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 6764 // 0111 tttn #8-bit disp 6765 emit_byte(0x70 | cc); 6766 emit_byte((offs - short_size) & 0xFF); 6767 } else { 6768 // 0000 1111 1000 tttn #32-bit disp 6769 emit_byte(0x0F); 6770 emit_byte(0x80 | cc); 6771 emit_long(offs - long_size); 6772 } 6773 } else { 6774 #ifdef ASSERT 6775 warning("reversing conditional branch"); 6776 #endif /* ASSERT */ 6777 Label skip; 6778 jccb(reverse[cc], skip); 6779 lea(rscratch1, dst); 6780 Assembler::jmp(rscratch1); 6781 bind(skip); 6782 } 6783 } 6784 6785 void MacroAssembler::ldmxcsr(AddressLiteral src) { 6786 if (reachable(src)) { 6787 Assembler::ldmxcsr(as_Address(src)); 6788 } else { 6789 lea(rscratch1, src); 6790 Assembler::ldmxcsr(Address(rscratch1, 0)); 6791 } 6792 } 6793 6794 int MacroAssembler::load_signed_byte(Register dst, Address src) { 6795 int off; 6796 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6797 off = offset(); 6798 movsbl(dst, src); // movsxb 6799 } else { 6800 off = load_unsigned_byte(dst, src); 6801 shll(dst, 24); 6802 sarl(dst, 24); 6803 } 6804 return off; 6805 } 6806 6807 // Note: load_signed_short used to be called load_signed_word. 6808 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler 6809 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. 6810 // The term "word" in HotSpot means a 32- or 64-bit machine word. 6811 int MacroAssembler::load_signed_short(Register dst, Address src) { 6812 int off; 6813 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6814 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 6815 // version but this is what 64bit has always done. This seems to imply 6816 // that users are only using 32bits worth. 6817 off = offset(); 6818 movswl(dst, src); // movsxw 6819 } else { 6820 off = load_unsigned_short(dst, src); 6821 shll(dst, 16); 6822 sarl(dst, 16); 6823 } 6824 return off; 6825 } 6826 6827 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 6828 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 6829 // and "3.9 Partial Register Penalties", p. 22). 6830 int off; 6831 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 6832 off = offset(); 6833 movzbl(dst, src); // movzxb 6834 } else { 6835 xorl(dst, dst); 6836 off = offset(); 6837 movb(dst, src); 6838 } 6839 return off; 6840 } 6841 6842 // Note: load_unsigned_short used to be called load_unsigned_word. 6843 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 6844 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 6845 // and "3.9 Partial Register Penalties", p. 22). 6846 int off; 6847 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 6848 off = offset(); 6849 movzwl(dst, src); // movzxw 6850 } else { 6851 xorl(dst, dst); 6852 off = offset(); 6853 movw(dst, src); 6854 } 6855 return off; 6856 } 6857 6858 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 6859 switch (size_in_bytes) { 6860 #ifndef _LP64 6861 case 8: 6862 assert(dst2 != noreg, "second dest register required"); 6863 movl(dst, src); 6864 movl(dst2, src.plus_disp(BytesPerInt)); 6865 break; 6866 #else 6867 case 8: movq(dst, src); break; 6868 #endif 6869 case 4: movl(dst, src); break; 6870 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 6871 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 6872 default: ShouldNotReachHere(); 6873 } 6874 } 6875 6876 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 6877 switch (size_in_bytes) { 6878 #ifndef _LP64 6879 case 8: 6880 assert(src2 != noreg, "second source register required"); 6881 movl(dst, src); 6882 movl(dst.plus_disp(BytesPerInt), src2); 6883 break; 6884 #else 6885 case 8: movq(dst, src); break; 6886 #endif 6887 case 4: movl(dst, src); break; 6888 case 2: movw(dst, src); break; 6889 case 1: movb(dst, src); break; 6890 default: ShouldNotReachHere(); 6891 } 6892 } 6893 6894 void MacroAssembler::mov32(AddressLiteral dst, Register src) { 6895 if (reachable(dst)) { 6896 movl(as_Address(dst), src); 6897 } else { 6898 lea(rscratch1, dst); 6899 movl(Address(rscratch1, 0), src); 6900 } 6901 } 6902 6903 void MacroAssembler::mov32(Register dst, AddressLiteral src) { 6904 if (reachable(src)) { 6905 movl(dst, as_Address(src)); 6906 } else { 6907 lea(rscratch1, src); 6908 movl(dst, Address(rscratch1, 0)); 6909 } 6910 } 6911 6912 // C++ bool manipulation 6913 6914 void MacroAssembler::movbool(Register dst, Address src) { 6915 if(sizeof(bool) == 1) 6916 movb(dst, src); 6917 else if(sizeof(bool) == 2) 6918 movw(dst, src); 6919 else if(sizeof(bool) == 4) 6920 movl(dst, src); 6921 else 6922 // unsupported 6923 ShouldNotReachHere(); 6924 } 6925 6926 void MacroAssembler::movbool(Address dst, bool boolconst) { 6927 if(sizeof(bool) == 1) 6928 movb(dst, (int) boolconst); 6929 else if(sizeof(bool) == 2) 6930 movw(dst, (int) boolconst); 6931 else if(sizeof(bool) == 4) 6932 movl(dst, (int) boolconst); 6933 else 6934 // unsupported 6935 ShouldNotReachHere(); 6936 } 6937 6938 void MacroAssembler::movbool(Address dst, Register src) { 6939 if(sizeof(bool) == 1) 6940 movb(dst, src); 6941 else if(sizeof(bool) == 2) 6942 movw(dst, src); 6943 else if(sizeof(bool) == 4) 6944 movl(dst, src); 6945 else 6946 // unsupported 6947 ShouldNotReachHere(); 6948 } 6949 6950 void MacroAssembler::movbyte(ArrayAddress dst, int src) { 6951 movb(as_Address(dst), src); 6952 } 6953 6954 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 6955 if (reachable(src)) { 6956 if (UseXmmLoadAndClearUpper) { 6957 movsd (dst, as_Address(src)); 6958 } else { 6959 movlpd(dst, as_Address(src)); 6960 } 6961 } else { 6962 lea(rscratch1, src); 6963 if (UseXmmLoadAndClearUpper) { 6964 movsd (dst, Address(rscratch1, 0)); 6965 } else { 6966 movlpd(dst, Address(rscratch1, 0)); 6967 } 6968 } 6969 } 6970 6971 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 6972 if (reachable(src)) { 6973 movss(dst, as_Address(src)); 6974 } else { 6975 lea(rscratch1, src); 6976 movss(dst, Address(rscratch1, 0)); 6977 } 6978 } 6979 6980 void MacroAssembler::movptr(Register dst, Register src) { 6981 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6982 } 6983 6984 void MacroAssembler::movptr(Register dst, Address src) { 6985 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6986 } 6987 6988 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 6989 void MacroAssembler::movptr(Register dst, intptr_t src) { 6990 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 6991 } 6992 6993 void MacroAssembler::movptr(Address dst, Register src) { 6994 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6995 } 6996 6997 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 6998 if (reachable(src)) { 6999 Assembler::movsd(dst, as_Address(src)); 7000 } else { 7001 lea(rscratch1, src); 7002 Assembler::movsd(dst, Address(rscratch1, 0)); 7003 } 7004 } 7005 7006 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 7007 if (reachable(src)) { 7008 Assembler::movss(dst, as_Address(src)); 7009 } else { 7010 lea(rscratch1, src); 7011 Assembler::movss(dst, Address(rscratch1, 0)); 7012 } 7013 } 7014 7015 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { 7016 if (reachable(src)) { 7017 Assembler::mulsd(dst, as_Address(src)); 7018 } else { 7019 lea(rscratch1, src); 7020 Assembler::mulsd(dst, Address(rscratch1, 0)); 7021 } 7022 } 7023 7024 void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { 7025 if (reachable(src)) { 7026 Assembler::mulss(dst, as_Address(src)); 7027 } else { 7028 lea(rscratch1, src); 7029 Assembler::mulss(dst, Address(rscratch1, 0)); 7030 } 7031 } 7032 7033 void MacroAssembler::null_check(Register reg, int offset) { 7034 if (needs_explicit_null_check(offset)) { 7035 // provoke OS NULL exception if reg = NULL by 7036 // accessing M[reg] w/o changing any (non-CC) registers 7037 // NOTE: cmpl is plenty here to provoke a segv 7038 cmpptr(rax, Address(reg, 0)); 7039 // Note: should probably use testl(rax, Address(reg, 0)); 7040 // may be shorter code (however, this version of 7041 // testl needs to be implemented first) 7042 } else { 7043 // nothing to do, (later) access of M[reg + offset] 7044 // will provoke OS NULL exception if reg = NULL 7045 } 7046 } 7047 7048 void MacroAssembler::os_breakpoint() { 7049 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 7050 // (e.g., MSVC can't call ps() otherwise) 7051 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 7052 } 7053 7054 void MacroAssembler::pop_CPU_state() { 7055 pop_FPU_state(); 7056 pop_IU_state(); 7057 } 7058 7059 void MacroAssembler::pop_FPU_state() { 7060 NOT_LP64(frstor(Address(rsp, 0));) 7061 LP64_ONLY(fxrstor(Address(rsp, 0));) 7062 addptr(rsp, FPUStateSizeInWords * wordSize); 7063 } 7064 7065 void MacroAssembler::pop_IU_state() { 7066 popa(); 7067 LP64_ONLY(addq(rsp, 8)); 7068 popf(); 7069 } 7070 7071 // Save Integer and Float state 7072 // Warning: Stack must be 16 byte aligned (64bit) 7073 void MacroAssembler::push_CPU_state() { 7074 push_IU_state(); 7075 push_FPU_state(); 7076 } 7077 7078 void MacroAssembler::push_FPU_state() { 7079 subptr(rsp, FPUStateSizeInWords * wordSize); 7080 #ifndef _LP64 7081 fnsave(Address(rsp, 0)); 7082 fwait(); 7083 #else 7084 fxsave(Address(rsp, 0)); 7085 #endif // LP64 7086 } 7087 7088 void MacroAssembler::push_IU_state() { 7089 // Push flags first because pusha kills them 7090 pushf(); 7091 // Make sure rsp stays 16-byte aligned 7092 LP64_ONLY(subq(rsp, 8)); 7093 pusha(); 7094 } 7095 7096 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 7097 // determine java_thread register 7098 if (!java_thread->is_valid()) { 7099 java_thread = rdi; 7100 get_thread(java_thread); 7101 } 7102 // we must set sp to zero to clear frame 7103 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 7104 if (clear_fp) { 7105 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 7106 } 7107 7108 if (clear_pc) 7109 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 7110 7111 } 7112 7113 void MacroAssembler::restore_rax(Register tmp) { 7114 if (tmp == noreg) pop(rax); 7115 else if (tmp != rax) mov(rax, tmp); 7116 } 7117 7118 void MacroAssembler::round_to(Register reg, int modulus) { 7119 addptr(reg, modulus - 1); 7120 andptr(reg, -modulus); 7121 } 7122 7123 void MacroAssembler::save_rax(Register tmp) { 7124 if (tmp == noreg) push(rax); 7125 else if (tmp != rax) mov(tmp, rax); 7126 } 7127 7128 // Write serialization page so VM thread can do a pseudo remote membar. 7129 // We use the current thread pointer to calculate a thread specific 7130 // offset to write to within the page. This minimizes bus traffic 7131 // due to cache line collision. 7132 void MacroAssembler::serialize_memory(Register thread, Register tmp) { 7133 movl(tmp, thread); 7134 shrl(tmp, os::get_serialize_page_shift_count()); 7135 andl(tmp, (os::vm_page_size() - sizeof(int))); 7136 7137 Address index(noreg, tmp, Address::times_1); 7138 ExternalAddress page(os::get_memory_serialize_page()); 7139 7140 // Size of store must match masking code above 7141 movl(as_Address(ArrayAddress(page, index)), tmp); 7142 } 7143 7144 // Calls to C land 7145 // 7146 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded 7147 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 7148 // has to be reset to 0. This is required to allow proper stack traversal. 7149 void MacroAssembler::set_last_Java_frame(Register java_thread, 7150 Register last_java_sp, 7151 Register last_java_fp, 7152 address last_java_pc) { 7153 // determine java_thread register 7154 if (!java_thread->is_valid()) { 7155 java_thread = rdi; 7156 get_thread(java_thread); 7157 } 7158 // determine last_java_sp register 7159 if (!last_java_sp->is_valid()) { 7160 last_java_sp = rsp; 7161 } 7162 7163 // last_java_fp is optional 7164 7165 if (last_java_fp->is_valid()) { 7166 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 7167 } 7168 7169 // last_java_pc is optional 7170 7171 if (last_java_pc != NULL) { 7172 lea(Address(java_thread, 7173 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 7174 InternalAddress(last_java_pc)); 7175 7176 } 7177 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 7178 } 7179 7180 void MacroAssembler::shlptr(Register dst, int imm8) { 7181 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 7182 } 7183 7184 void MacroAssembler::shrptr(Register dst, int imm8) { 7185 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 7186 } 7187 7188 void MacroAssembler::sign_extend_byte(Register reg) { 7189 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 7190 movsbl(reg, reg); // movsxb 7191 } else { 7192 shll(reg, 24); 7193 sarl(reg, 24); 7194 } 7195 } 7196 7197 void MacroAssembler::sign_extend_short(Register reg) { 7198 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7199 movswl(reg, reg); // movsxw 7200 } else { 7201 shll(reg, 16); 7202 sarl(reg, 16); 7203 } 7204 } 7205 7206 void MacroAssembler::testl(Register dst, AddressLiteral src) { 7207 assert(reachable(src), "Address should be reachable"); 7208 testl(dst, as_Address(src)); 7209 } 7210 7211 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { 7212 if (reachable(src)) { 7213 Assembler::sqrtsd(dst, as_Address(src)); 7214 } else { 7215 lea(rscratch1, src); 7216 Assembler::sqrtsd(dst, Address(rscratch1, 0)); 7217 } 7218 } 7219 7220 void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { 7221 if (reachable(src)) { 7222 Assembler::sqrtss(dst, as_Address(src)); 7223 } else { 7224 lea(rscratch1, src); 7225 Assembler::sqrtss(dst, Address(rscratch1, 0)); 7226 } 7227 } 7228 7229 void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { 7230 if (reachable(src)) { 7231 Assembler::subsd(dst, as_Address(src)); 7232 } else { 7233 lea(rscratch1, src); 7234 Assembler::subsd(dst, Address(rscratch1, 0)); 7235 } 7236 } 7237 7238 void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { 7239 if (reachable(src)) { 7240 Assembler::subss(dst, as_Address(src)); 7241 } else { 7242 lea(rscratch1, src); 7243 Assembler::subss(dst, Address(rscratch1, 0)); 7244 } 7245 } 7246 7247 ////////////////////////////////////////////////////////////////////////////////// 7248 #ifndef SERIALGC 7249 7250 void MacroAssembler::g1_write_barrier_pre(Register obj, 7251 Register pre_val, 7252 Register thread, 7253 Register tmp, 7254 bool tosca_live, 7255 bool expand_call) { 7256 7257 // If expand_call is true then we expand the call_VM_leaf macro 7258 // directly to skip generating the check by 7259 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 7260 7261 #ifdef _LP64 7262 assert(thread == r15_thread, "must be"); 7263 #endif // _LP64 7264 7265 Label done; 7266 Label runtime; 7267 7268 assert(pre_val != noreg, "check this code"); 7269 7270 if (obj != noreg) { 7271 assert_different_registers(obj, pre_val, tmp); 7272 assert(pre_val != rax, "check this code"); 7273 } 7274 7275 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 7276 PtrQueue::byte_offset_of_active())); 7277 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 7278 PtrQueue::byte_offset_of_index())); 7279 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 7280 PtrQueue::byte_offset_of_buf())); 7281 7282 7283 // Is marking active? 7284 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 7285 cmpl(in_progress, 0); 7286 } else { 7287 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 7288 cmpb(in_progress, 0); 7289 } 7290 jcc(Assembler::equal, done); 7291 7292 // Do we need to load the previous value? 7293 if (obj != noreg) { 7294 load_heap_oop(pre_val, Address(obj, 0)); 7295 } 7296 7297 // Is the previous value null? 7298 cmpptr(pre_val, (int32_t) NULL_WORD); 7299 jcc(Assembler::equal, done); 7300 7301 // Can we store original value in the thread's buffer? 7302 // Is index == 0? 7303 // (The index field is typed as size_t.) 7304 7305 movptr(tmp, index); // tmp := *index_adr 7306 cmpptr(tmp, 0); // tmp == 0? 7307 jcc(Assembler::equal, runtime); // If yes, goto runtime 7308 7309 subptr(tmp, wordSize); // tmp := tmp - wordSize 7310 movptr(index, tmp); // *index_adr := tmp 7311 addptr(tmp, buffer); // tmp := tmp + *buffer_adr 7312 7313 // Record the previous value 7314 movptr(Address(tmp, 0), pre_val); 7315 jmp(done); 7316 7317 bind(runtime); 7318 // save the live input values 7319 if(tosca_live) push(rax); 7320 7321 if (obj != noreg && obj != rax) 7322 push(obj); 7323 7324 if (pre_val != rax) 7325 push(pre_val); 7326 7327 // Calling the runtime using the regular call_VM_leaf mechanism generates 7328 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 7329 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 7330 // 7331 // If we care generating the pre-barrier without a frame (e.g. in the 7332 // intrinsified Reference.get() routine) then ebp might be pointing to 7333 // the caller frame and so this check will most likely fail at runtime. 7334 // 7335 // Expanding the call directly bypasses the generation of the check. 7336 // So when we do not have have a full interpreter frame on the stack 7337 // expand_call should be passed true. 7338 7339 NOT_LP64( push(thread); ) 7340 7341 if (expand_call) { 7342 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 7343 pass_arg1(this, thread); 7344 pass_arg0(this, pre_val); 7345 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 7346 } else { 7347 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 7348 } 7349 7350 NOT_LP64( pop(thread); ) 7351 7352 // save the live input values 7353 if (pre_val != rax) 7354 pop(pre_val); 7355 7356 if (obj != noreg && obj != rax) 7357 pop(obj); 7358 7359 if(tosca_live) pop(rax); 7360 7361 bind(done); 7362 } 7363 7364 void MacroAssembler::g1_write_barrier_post(Register store_addr, 7365 Register new_val, 7366 Register thread, 7367 Register tmp, 7368 Register tmp2) { 7369 #ifdef _LP64 7370 assert(thread == r15_thread, "must be"); 7371 #endif // _LP64 7372 7373 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 7374 PtrQueue::byte_offset_of_index())); 7375 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 7376 PtrQueue::byte_offset_of_buf())); 7377 7378 BarrierSet* bs = Universe::heap()->barrier_set(); 7379 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 7380 Label done; 7381 Label runtime; 7382 7383 // Does store cross heap regions? 7384 7385 movptr(tmp, store_addr); 7386 xorptr(tmp, new_val); 7387 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 7388 jcc(Assembler::equal, done); 7389 7390 // crosses regions, storing NULL? 7391 7392 cmpptr(new_val, (int32_t) NULL_WORD); 7393 jcc(Assembler::equal, done); 7394 7395 // storing region crossing non-NULL, is card already dirty? 7396 7397 ExternalAddress cardtable((address) ct->byte_map_base); 7398 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 7399 #ifdef _LP64 7400 const Register card_addr = tmp; 7401 7402 movq(card_addr, store_addr); 7403 shrq(card_addr, CardTableModRefBS::card_shift); 7404 7405 lea(tmp2, cardtable); 7406 7407 // get the address of the card 7408 addq(card_addr, tmp2); 7409 #else 7410 const Register card_index = tmp; 7411 7412 movl(card_index, store_addr); 7413 shrl(card_index, CardTableModRefBS::card_shift); 7414 7415 Address index(noreg, card_index, Address::times_1); 7416 const Register card_addr = tmp; 7417 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 7418 #endif 7419 cmpb(Address(card_addr, 0), 0); 7420 jcc(Assembler::equal, done); 7421 7422 // storing a region crossing, non-NULL oop, card is clean. 7423 // dirty card and log. 7424 7425 movb(Address(card_addr, 0), 0); 7426 7427 cmpl(queue_index, 0); 7428 jcc(Assembler::equal, runtime); 7429 subl(queue_index, wordSize); 7430 movptr(tmp2, buffer); 7431 #ifdef _LP64 7432 movslq(rscratch1, queue_index); 7433 addq(tmp2, rscratch1); 7434 movq(Address(tmp2, 0), card_addr); 7435 #else 7436 addl(tmp2, queue_index); 7437 movl(Address(tmp2, 0), card_index); 7438 #endif 7439 jmp(done); 7440 7441 bind(runtime); 7442 // save the live input values 7443 push(store_addr); 7444 push(new_val); 7445 #ifdef _LP64 7446 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 7447 #else 7448 push(thread); 7449 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 7450 pop(thread); 7451 #endif 7452 pop(new_val); 7453 pop(store_addr); 7454 7455 bind(done); 7456 } 7457 7458 #endif // SERIALGC 7459 ////////////////////////////////////////////////////////////////////////////////// 7460 7461 7462 void MacroAssembler::store_check(Register obj) { 7463 // Does a store check for the oop in register obj. The content of 7464 // register obj is destroyed afterwards. 7465 store_check_part_1(obj); 7466 store_check_part_2(obj); 7467 } 7468 7469 void MacroAssembler::store_check(Register obj, Address dst) { 7470 store_check(obj); 7471 } 7472 7473 7474 // split the store check operation so that other instructions can be scheduled inbetween 7475 void MacroAssembler::store_check_part_1(Register obj) { 7476 BarrierSet* bs = Universe::heap()->barrier_set(); 7477 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 7478 shrptr(obj, CardTableModRefBS::card_shift); 7479 } 7480 7481 void MacroAssembler::store_check_part_2(Register obj) { 7482 BarrierSet* bs = Universe::heap()->barrier_set(); 7483 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 7484 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 7485 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 7486 7487 // The calculation for byte_map_base is as follows: 7488 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 7489 // So this essentially converts an address to a displacement and 7490 // it will never need to be relocated. On 64bit however the value may be too 7491 // large for a 32bit displacement 7492 7493 intptr_t disp = (intptr_t) ct->byte_map_base; 7494 if (is_simm32(disp)) { 7495 Address cardtable(noreg, obj, Address::times_1, disp); 7496 movb(cardtable, 0); 7497 } else { 7498 // By doing it as an ExternalAddress disp could be converted to a rip-relative 7499 // displacement and done in a single instruction given favorable mapping and 7500 // a smarter version of as_Address. Worst case it is two instructions which 7501 // is no worse off then loading disp into a register and doing as a simple 7502 // Address() as above. 7503 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 7504 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 7505 // in some cases we'll get a single instruction version. 7506 7507 ExternalAddress cardtable((address)disp); 7508 Address index(noreg, obj, Address::times_1); 7509 movb(as_Address(ArrayAddress(cardtable, index)), 0); 7510 } 7511 } 7512 7513 void MacroAssembler::subptr(Register dst, int32_t imm32) { 7514 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 7515 } 7516 7517 void MacroAssembler::subptr(Register dst, Register src) { 7518 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 7519 } 7520 7521 // C++ bool manipulation 7522 void MacroAssembler::testbool(Register dst) { 7523 if(sizeof(bool) == 1) 7524 testb(dst, 0xff); 7525 else if(sizeof(bool) == 2) { 7526 // testw implementation needed for two byte bools 7527 ShouldNotReachHere(); 7528 } else if(sizeof(bool) == 4) 7529 testl(dst, dst); 7530 else 7531 // unsupported 7532 ShouldNotReachHere(); 7533 } 7534 7535 void MacroAssembler::testptr(Register dst, Register src) { 7536 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 7537 } 7538 7539 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 7540 void MacroAssembler::tlab_allocate(Register obj, 7541 Register var_size_in_bytes, 7542 int con_size_in_bytes, 7543 Register t1, 7544 Register t2, 7545 Label& slow_case) { 7546 assert_different_registers(obj, t1, t2); 7547 assert_different_registers(obj, var_size_in_bytes, t1); 7548 Register end = t2; 7549 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 7550 7551 verify_tlab(); 7552 7553 NOT_LP64(get_thread(thread)); 7554 7555 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 7556 if (var_size_in_bytes == noreg) { 7557 lea(end, Address(obj, con_size_in_bytes)); 7558 } else { 7559 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 7560 } 7561 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 7562 jcc(Assembler::above, slow_case); 7563 7564 // update the tlab top pointer 7565 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 7566 7567 // recover var_size_in_bytes if necessary 7568 if (var_size_in_bytes == end) { 7569 subptr(var_size_in_bytes, obj); 7570 } 7571 verify_tlab(); 7572 } 7573 7574 // Preserves rbx, and rdx. 7575 Register MacroAssembler::tlab_refill(Label& retry, 7576 Label& try_eden, 7577 Label& slow_case) { 7578 Register top = rax; 7579 Register t1 = rcx; 7580 Register t2 = rsi; 7581 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 7582 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 7583 Label do_refill, discard_tlab; 7584 7585 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 7586 // No allocation in the shared eden. 7587 jmp(slow_case); 7588 } 7589 7590 NOT_LP64(get_thread(thread_reg)); 7591 7592 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 7593 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 7594 7595 // calculate amount of free space 7596 subptr(t1, top); 7597 shrptr(t1, LogHeapWordSize); 7598 7599 // Retain tlab and allocate object in shared space if 7600 // the amount free in the tlab is too large to discard. 7601 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 7602 jcc(Assembler::lessEqual, discard_tlab); 7603 7604 // Retain 7605 // %%% yuck as movptr... 7606 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 7607 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 7608 if (TLABStats) { 7609 // increment number of slow_allocations 7610 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 7611 } 7612 jmp(try_eden); 7613 7614 bind(discard_tlab); 7615 if (TLABStats) { 7616 // increment number of refills 7617 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 7618 // accumulate wastage -- t1 is amount free in tlab 7619 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 7620 } 7621 7622 // if tlab is currently allocated (top or end != null) then 7623 // fill [top, end + alignment_reserve) with array object 7624 testptr(top, top); 7625 jcc(Assembler::zero, do_refill); 7626 7627 // set up the mark word 7628 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 7629 // set the length to the remaining space 7630 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 7631 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 7632 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 7633 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 7634 // set klass to intArrayKlass 7635 // dubious reloc why not an oop reloc? 7636 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); 7637 // store klass last. concurrent gcs assumes klass length is valid if 7638 // klass field is not null. 7639 store_klass(top, t1); 7640 7641 movptr(t1, top); 7642 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 7643 incr_allocated_bytes(thread_reg, t1, 0); 7644 7645 // refill the tlab with an eden allocation 7646 bind(do_refill); 7647 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 7648 shlptr(t1, LogHeapWordSize); 7649 // allocate new tlab, address returned in top 7650 eden_allocate(top, t1, 0, t2, slow_case); 7651 7652 // Check that t1 was preserved in eden_allocate. 7653 #ifdef ASSERT 7654 if (UseTLAB) { 7655 Label ok; 7656 Register tsize = rsi; 7657 assert_different_registers(tsize, thread_reg, t1); 7658 push(tsize); 7659 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 7660 shlptr(tsize, LogHeapWordSize); 7661 cmpptr(t1, tsize); 7662 jcc(Assembler::equal, ok); 7663 stop("assert(t1 != tlab size)"); 7664 should_not_reach_here(); 7665 7666 bind(ok); 7667 pop(tsize); 7668 } 7669 #endif 7670 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 7671 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 7672 addptr(top, t1); 7673 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 7674 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 7675 verify_tlab(); 7676 jmp(retry); 7677 7678 return thread_reg; // for use by caller 7679 } 7680 7681 void MacroAssembler::incr_allocated_bytes(Register thread, 7682 Register var_size_in_bytes, 7683 int con_size_in_bytes, 7684 Register t1) { 7685 #ifdef _LP64 7686 if (var_size_in_bytes->is_valid()) { 7687 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 7688 } else { 7689 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 7690 } 7691 #else 7692 if (!thread->is_valid()) { 7693 assert(t1->is_valid(), "need temp reg"); 7694 thread = t1; 7695 get_thread(thread); 7696 } 7697 7698 if (var_size_in_bytes->is_valid()) { 7699 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 7700 } else { 7701 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 7702 } 7703 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); 7704 #endif 7705 } 7706 7707 static const double pi_4 = 0.7853981633974483; 7708 7709 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 7710 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 7711 // was attempted in this code; unfortunately it appears that the 7712 // switch to 80-bit precision and back causes this to be 7713 // unprofitable compared with simply performing a runtime call if 7714 // the argument is out of the (-pi/4, pi/4) range. 7715 7716 Register tmp = noreg; 7717 if (!VM_Version::supports_cmov()) { 7718 // fcmp needs a temporary so preserve rbx, 7719 tmp = rbx; 7720 push(tmp); 7721 } 7722 7723 Label slow_case, done; 7724 7725 ExternalAddress pi4_adr = (address)&pi_4; 7726 if (reachable(pi4_adr)) { 7727 // x ?<= pi/4 7728 fld_d(pi4_adr); 7729 fld_s(1); // Stack: X PI/4 X 7730 fabs(); // Stack: |X| PI/4 X 7731 fcmp(tmp); 7732 jcc(Assembler::above, slow_case); 7733 7734 // fastest case: -pi/4 <= x <= pi/4 7735 switch(trig) { 7736 case 's': 7737 fsin(); 7738 break; 7739 case 'c': 7740 fcos(); 7741 break; 7742 case 't': 7743 ftan(); 7744 break; 7745 default: 7746 assert(false, "bad intrinsic"); 7747 break; 7748 } 7749 jmp(done); 7750 } 7751 7752 // slow case: runtime call 7753 bind(slow_case); 7754 // Preserve registers across runtime call 7755 pusha(); 7756 int incoming_argument_and_return_value_offset = -1; 7757 if (num_fpu_regs_in_use > 1) { 7758 // Must preserve all other FPU regs (could alternatively convert 7759 // SharedRuntime::dsin and dcos into assembly routines known not to trash 7760 // FPU state, but can not trust C compiler) 7761 NEEDS_CLEANUP; 7762 // NOTE that in this case we also push the incoming argument to 7763 // the stack and restore it later; we also use this stack slot to 7764 // hold the return value from dsin or dcos. 7765 for (int i = 0; i < num_fpu_regs_in_use; i++) { 7766 subptr(rsp, sizeof(jdouble)); 7767 fstp_d(Address(rsp, 0)); 7768 } 7769 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 7770 fld_d(Address(rsp, incoming_argument_and_return_value_offset)); 7771 } 7772 subptr(rsp, sizeof(jdouble)); 7773 fstp_d(Address(rsp, 0)); 7774 #ifdef _LP64 7775 movdbl(xmm0, Address(rsp, 0)); 7776 #endif // _LP64 7777 7778 // NOTE: we must not use call_VM_leaf here because that requires a 7779 // complete interpreter frame in debug mode -- same bug as 4387334 7780 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 7781 // do proper 64bit abi 7782 7783 NEEDS_CLEANUP; 7784 // Need to add stack banging before this runtime call if it needs to 7785 // be taken; however, there is no generic stack banging routine at 7786 // the MacroAssembler level 7787 switch(trig) { 7788 case 's': 7789 { 7790 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0); 7791 } 7792 break; 7793 case 'c': 7794 { 7795 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0); 7796 } 7797 break; 7798 case 't': 7799 { 7800 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0); 7801 } 7802 break; 7803 default: 7804 assert(false, "bad intrinsic"); 7805 break; 7806 } 7807 #ifdef _LP64 7808 movsd(Address(rsp, 0), xmm0); 7809 fld_d(Address(rsp, 0)); 7810 #endif // _LP64 7811 addptr(rsp, sizeof(jdouble)); 7812 if (num_fpu_regs_in_use > 1) { 7813 // Must save return value to stack and then restore entire FPU stack 7814 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 7815 for (int i = 0; i < num_fpu_regs_in_use; i++) { 7816 fld_d(Address(rsp, 0)); 7817 addptr(rsp, sizeof(jdouble)); 7818 } 7819 } 7820 popa(); 7821 7822 // Come here with result in F-TOS 7823 bind(done); 7824 7825 if (tmp != noreg) { 7826 pop(tmp); 7827 } 7828 } 7829 7830 7831 // Look up the method for a megamorphic invokeinterface call. 7832 // The target method is determined by <intf_klass, itable_index>. 7833 // The receiver klass is in recv_klass. 7834 // On success, the result will be in method_result, and execution falls through. 7835 // On failure, execution transfers to the given label. 7836 void MacroAssembler::lookup_interface_method(Register recv_klass, 7837 Register intf_klass, 7838 RegisterOrConstant itable_index, 7839 Register method_result, 7840 Register scan_temp, 7841 Label& L_no_such_interface) { 7842 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 7843 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 7844 "caller must use same register for non-constant itable index as for method"); 7845 7846 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 7847 int vtable_base = instanceKlass::vtable_start_offset() * wordSize; 7848 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 7849 int scan_step = itableOffsetEntry::size() * wordSize; 7850 int vte_size = vtableEntry::size() * wordSize; 7851 Address::ScaleFactor times_vte_scale = Address::times_ptr; 7852 assert(vte_size == wordSize, "else adjust times_vte_scale"); 7853 7854 movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize)); 7855 7856 // %%% Could store the aligned, prescaled offset in the klassoop. 7857 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 7858 if (HeapWordsPerLong > 1) { 7859 // Round up to align_object_offset boundary 7860 // see code for instanceKlass::start_of_itable! 7861 round_to(scan_temp, BytesPerLong); 7862 } 7863 7864 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 7865 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 7866 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 7867 7868 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 7869 // if (scan->interface() == intf) { 7870 // result = (klass + scan->offset() + itable_index); 7871 // } 7872 // } 7873 Label search, found_method; 7874 7875 for (int peel = 1; peel >= 0; peel--) { 7876 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 7877 cmpptr(intf_klass, method_result); 7878 7879 if (peel) { 7880 jccb(Assembler::equal, found_method); 7881 } else { 7882 jccb(Assembler::notEqual, search); 7883 // (invert the test to fall through to found_method...) 7884 } 7885 7886 if (!peel) break; 7887 7888 bind(search); 7889 7890 // Check that the previous entry is non-null. A null entry means that 7891 // the receiver class doesn't implement the interface, and wasn't the 7892 // same as when the caller was compiled. 7893 testptr(method_result, method_result); 7894 jcc(Assembler::zero, L_no_such_interface); 7895 addptr(scan_temp, scan_step); 7896 } 7897 7898 bind(found_method); 7899 7900 // Got a hit. 7901 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 7902 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 7903 } 7904 7905 7906 void MacroAssembler::check_klass_subtype(Register sub_klass, 7907 Register super_klass, 7908 Register temp_reg, 7909 Label& L_success) { 7910 Label L_failure; 7911 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 7912 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 7913 bind(L_failure); 7914 } 7915 7916 7917 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 7918 Register super_klass, 7919 Register temp_reg, 7920 Label* L_success, 7921 Label* L_failure, 7922 Label* L_slow_path, 7923 RegisterOrConstant super_check_offset) { 7924 assert_different_registers(sub_klass, super_klass, temp_reg); 7925 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 7926 if (super_check_offset.is_register()) { 7927 assert_different_registers(sub_klass, super_klass, 7928 super_check_offset.as_register()); 7929 } else if (must_load_sco) { 7930 assert(temp_reg != noreg, "supply either a temp or a register offset"); 7931 } 7932 7933 Label L_fallthrough; 7934 int label_nulls = 0; 7935 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 7936 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 7937 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 7938 assert(label_nulls <= 1, "at most one NULL in the batch"); 7939 7940 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 7941 Klass::secondary_super_cache_offset_in_bytes()); 7942 int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 7943 Klass::super_check_offset_offset_in_bytes()); 7944 Address super_check_offset_addr(super_klass, sco_offset); 7945 7946 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 7947 // range of a jccb. If this routine grows larger, reconsider at 7948 // least some of these. 7949 #define local_jcc(assembler_cond, label) \ 7950 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 7951 else jcc( assembler_cond, label) /*omit semi*/ 7952 7953 // Hacked jmp, which may only be used just before L_fallthrough. 7954 #define final_jmp(label) \ 7955 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 7956 else jmp(label) /*omit semi*/ 7957 7958 // If the pointers are equal, we are done (e.g., String[] elements). 7959 // This self-check enables sharing of secondary supertype arrays among 7960 // non-primary types such as array-of-interface. Otherwise, each such 7961 // type would need its own customized SSA. 7962 // We move this check to the front of the fast path because many 7963 // type checks are in fact trivially successful in this manner, 7964 // so we get a nicely predicted branch right at the start of the check. 7965 cmpptr(sub_klass, super_klass); 7966 local_jcc(Assembler::equal, *L_success); 7967 7968 // Check the supertype display: 7969 if (must_load_sco) { 7970 // Positive movl does right thing on LP64. 7971 movl(temp_reg, super_check_offset_addr); 7972 super_check_offset = RegisterOrConstant(temp_reg); 7973 } 7974 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 7975 cmpptr(super_klass, super_check_addr); // load displayed supertype 7976 7977 // This check has worked decisively for primary supers. 7978 // Secondary supers are sought in the super_cache ('super_cache_addr'). 7979 // (Secondary supers are interfaces and very deeply nested subtypes.) 7980 // This works in the same check above because of a tricky aliasing 7981 // between the super_cache and the primary super display elements. 7982 // (The 'super_check_addr' can address either, as the case requires.) 7983 // Note that the cache is updated below if it does not help us find 7984 // what we need immediately. 7985 // So if it was a primary super, we can just fail immediately. 7986 // Otherwise, it's the slow path for us (no success at this point). 7987 7988 if (super_check_offset.is_register()) { 7989 local_jcc(Assembler::equal, *L_success); 7990 cmpl(super_check_offset.as_register(), sc_offset); 7991 if (L_failure == &L_fallthrough) { 7992 local_jcc(Assembler::equal, *L_slow_path); 7993 } else { 7994 local_jcc(Assembler::notEqual, *L_failure); 7995 final_jmp(*L_slow_path); 7996 } 7997 } else if (super_check_offset.as_constant() == sc_offset) { 7998 // Need a slow path; fast failure is impossible. 7999 if (L_slow_path == &L_fallthrough) { 8000 local_jcc(Assembler::equal, *L_success); 8001 } else { 8002 local_jcc(Assembler::notEqual, *L_slow_path); 8003 final_jmp(*L_success); 8004 } 8005 } else { 8006 // No slow path; it's a fast decision. 8007 if (L_failure == &L_fallthrough) { 8008 local_jcc(Assembler::equal, *L_success); 8009 } else { 8010 local_jcc(Assembler::notEqual, *L_failure); 8011 final_jmp(*L_success); 8012 } 8013 } 8014 8015 bind(L_fallthrough); 8016 8017 #undef local_jcc 8018 #undef final_jmp 8019 } 8020 8021 8022 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 8023 Register super_klass, 8024 Register temp_reg, 8025 Register temp2_reg, 8026 Label* L_success, 8027 Label* L_failure, 8028 bool set_cond_codes) { 8029 assert_different_registers(sub_klass, super_klass, temp_reg); 8030 if (temp2_reg != noreg) 8031 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 8032 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 8033 8034 Label L_fallthrough; 8035 int label_nulls = 0; 8036 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 8037 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 8038 assert(label_nulls <= 1, "at most one NULL in the batch"); 8039 8040 // a couple of useful fields in sub_klass: 8041 int ss_offset = (klassOopDesc::header_size() * HeapWordSize + 8042 Klass::secondary_supers_offset_in_bytes()); 8043 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 8044 Klass::secondary_super_cache_offset_in_bytes()); 8045 Address secondary_supers_addr(sub_klass, ss_offset); 8046 Address super_cache_addr( sub_klass, sc_offset); 8047 8048 // Do a linear scan of the secondary super-klass chain. 8049 // This code is rarely used, so simplicity is a virtue here. 8050 // The repne_scan instruction uses fixed registers, which we must spill. 8051 // Don't worry too much about pre-existing connections with the input regs. 8052 8053 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 8054 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 8055 8056 // Get super_klass value into rax (even if it was in rdi or rcx). 8057 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 8058 if (super_klass != rax || UseCompressedOops) { 8059 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 8060 mov(rax, super_klass); 8061 } 8062 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 8063 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 8064 8065 #ifndef PRODUCT 8066 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 8067 ExternalAddress pst_counter_addr((address) pst_counter); 8068 NOT_LP64( incrementl(pst_counter_addr) ); 8069 LP64_ONLY( lea(rcx, pst_counter_addr) ); 8070 LP64_ONLY( incrementl(Address(rcx, 0)) ); 8071 #endif //PRODUCT 8072 8073 // We will consult the secondary-super array. 8074 movptr(rdi, secondary_supers_addr); 8075 // Load the array length. (Positive movl does right thing on LP64.) 8076 movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 8077 // Skip to start of data. 8078 addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 8079 8080 // Scan RCX words at [RDI] for an occurrence of RAX. 8081 // Set NZ/Z based on last compare. 8082 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 8083 // not change flags (only scas instruction which is repeated sets flags). 8084 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 8085 #ifdef _LP64 8086 // This part is tricky, as values in supers array could be 32 or 64 bit wide 8087 // and we store values in objArrays always encoded, thus we need to encode 8088 // the value of rax before repne. Note that rax is dead after the repne. 8089 if (UseCompressedOops) { 8090 encode_heap_oop_not_null(rax); // Changes flags. 8091 // The superclass is never null; it would be a basic system error if a null 8092 // pointer were to sneak in here. Note that we have already loaded the 8093 // Klass::super_check_offset from the super_klass in the fast path, 8094 // so if there is a null in that register, we are already in the afterlife. 8095 testl(rax,rax); // Set Z = 0 8096 repne_scanl(); 8097 } else 8098 #endif // _LP64 8099 { 8100 testptr(rax,rax); // Set Z = 0 8101 repne_scan(); 8102 } 8103 // Unspill the temp. registers: 8104 if (pushed_rdi) pop(rdi); 8105 if (pushed_rcx) pop(rcx); 8106 if (pushed_rax) pop(rax); 8107 8108 if (set_cond_codes) { 8109 // Special hack for the AD files: rdi is guaranteed non-zero. 8110 assert(!pushed_rdi, "rdi must be left non-NULL"); 8111 // Also, the condition codes are properly set Z/NZ on succeed/failure. 8112 } 8113 8114 if (L_failure == &L_fallthrough) 8115 jccb(Assembler::notEqual, *L_failure); 8116 else jcc(Assembler::notEqual, *L_failure); 8117 8118 // Success. Cache the super we found and proceed in triumph. 8119 movptr(super_cache_addr, super_klass); 8120 8121 if (L_success != &L_fallthrough) { 8122 jmp(*L_success); 8123 } 8124 8125 #undef IS_A_TEMP 8126 8127 bind(L_fallthrough); 8128 } 8129 8130 8131 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 8132 if (reachable(src)) { 8133 Assembler::ucomisd(dst, as_Address(src)); 8134 } else { 8135 lea(rscratch1, src); 8136 Assembler::ucomisd(dst, Address(rscratch1, 0)); 8137 } 8138 } 8139 8140 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 8141 if (reachable(src)) { 8142 Assembler::ucomiss(dst, as_Address(src)); 8143 } else { 8144 lea(rscratch1, src); 8145 Assembler::ucomiss(dst, Address(rscratch1, 0)); 8146 } 8147 } 8148 8149 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 8150 // Used in sign-bit flipping with aligned address. 8151 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 8152 if (reachable(src)) { 8153 Assembler::xorpd(dst, as_Address(src)); 8154 } else { 8155 lea(rscratch1, src); 8156 Assembler::xorpd(dst, Address(rscratch1, 0)); 8157 } 8158 } 8159 8160 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 8161 // Used in sign-bit flipping with aligned address. 8162 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 8163 if (reachable(src)) { 8164 Assembler::xorps(dst, as_Address(src)); 8165 } else { 8166 lea(rscratch1, src); 8167 Assembler::xorps(dst, Address(rscratch1, 0)); 8168 } 8169 } 8170 8171 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 8172 if (VM_Version::supports_cmov()) { 8173 cmovl(cc, dst, src); 8174 } else { 8175 Label L; 8176 jccb(negate_condition(cc), L); 8177 movl(dst, src); 8178 bind(L); 8179 } 8180 } 8181 8182 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { 8183 if (VM_Version::supports_cmov()) { 8184 cmovl(cc, dst, src); 8185 } else { 8186 Label L; 8187 jccb(negate_condition(cc), L); 8188 movl(dst, src); 8189 bind(L); 8190 } 8191 } 8192 8193 void MacroAssembler::verify_oop(Register reg, const char* s) { 8194 if (!VerifyOops) return; 8195 8196 // Pass register number to verify_oop_subroutine 8197 char* b = new char[strlen(s) + 50]; 8198 sprintf(b, "verify_oop: %s: %s", reg->name(), s); 8199 #ifdef _LP64 8200 push(rscratch1); // save r10, trashed by movptr() 8201 #endif 8202 push(rax); // save rax, 8203 push(reg); // pass register argument 8204 ExternalAddress buffer((address) b); 8205 // avoid using pushptr, as it modifies scratch registers 8206 // and our contract is not to modify anything 8207 movptr(rax, buffer.addr()); 8208 push(rax); 8209 // call indirectly to solve generation ordering problem 8210 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 8211 call(rax); 8212 // Caller pops the arguments (oop, message) and restores rax, r10 8213 } 8214 8215 8216 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 8217 Register tmp, 8218 int offset) { 8219 intptr_t value = *delayed_value_addr; 8220 if (value != 0) 8221 return RegisterOrConstant(value + offset); 8222 8223 // load indirectly to solve generation ordering problem 8224 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 8225 8226 #ifdef ASSERT 8227 { Label L; 8228 testptr(tmp, tmp); 8229 if (WizardMode) { 8230 jcc(Assembler::notZero, L); 8231 char* buf = new char[40]; 8232 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 8233 stop(buf); 8234 } else { 8235 jccb(Assembler::notZero, L); 8236 hlt(); 8237 } 8238 bind(L); 8239 } 8240 #endif 8241 8242 if (offset != 0) 8243 addptr(tmp, offset); 8244 8245 return RegisterOrConstant(tmp); 8246 } 8247 8248 8249 // registers on entry: 8250 // - rax ('check' register): required MethodType 8251 // - rcx: method handle 8252 // - rdx, rsi, or ?: killable temp 8253 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg, 8254 Register temp_reg, 8255 Label& wrong_method_type) { 8256 Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)); 8257 // compare method type against that of the receiver 8258 if (UseCompressedOops) { 8259 load_heap_oop(temp_reg, type_addr); 8260 cmpptr(mtype_reg, temp_reg); 8261 } else { 8262 cmpptr(mtype_reg, type_addr); 8263 } 8264 jcc(Assembler::notEqual, wrong_method_type); 8265 } 8266 8267 8268 // A method handle has a "vmslots" field which gives the size of its 8269 // argument list in JVM stack slots. This field is either located directly 8270 // in every method handle, or else is indirectly accessed through the 8271 // method handle's MethodType. This macro hides the distinction. 8272 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg, 8273 Register temp_reg) { 8274 assert_different_registers(vmslots_reg, mh_reg, temp_reg); 8275 // load mh.type.form.vmslots 8276 Register temp2_reg = vmslots_reg; 8277 load_heap_oop(temp2_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg))); 8278 load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg))); 8279 movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg))); 8280 } 8281 8282 8283 // registers on entry: 8284 // - rcx: method handle 8285 // - rdx: killable temp (interpreted only) 8286 // - rax: killable temp (compiled only) 8287 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) { 8288 assert(mh_reg == rcx, "caller must put MH object in rcx"); 8289 assert_different_registers(mh_reg, temp_reg); 8290 8291 // pick out the interpreted side of the handler 8292 // NOTE: vmentry is not an oop! 8293 movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg))); 8294 8295 // off we go... 8296 jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes())); 8297 8298 // for the various stubs which take control at this point, 8299 // see MethodHandles::generate_method_handle_stub 8300 } 8301 8302 8303 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 8304 int extra_slot_offset) { 8305 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 8306 int stackElementSize = Interpreter::stackElementSize; 8307 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 8308 #ifdef ASSERT 8309 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 8310 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 8311 #endif 8312 Register scale_reg = noreg; 8313 Address::ScaleFactor scale_factor = Address::no_scale; 8314 if (arg_slot.is_constant()) { 8315 offset += arg_slot.as_constant() * stackElementSize; 8316 } else { 8317 scale_reg = arg_slot.as_register(); 8318 scale_factor = Address::times(stackElementSize); 8319 } 8320 offset += wordSize; // return PC is on stack 8321 return Address(rsp, scale_reg, scale_factor, offset); 8322 } 8323 8324 8325 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 8326 if (!VerifyOops) return; 8327 8328 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 8329 // Pass register number to verify_oop_subroutine 8330 char* b = new char[strlen(s) + 50]; 8331 sprintf(b, "verify_oop_addr: %s", s); 8332 8333 #ifdef _LP64 8334 push(rscratch1); // save r10, trashed by movptr() 8335 #endif 8336 push(rax); // save rax, 8337 // addr may contain rsp so we will have to adjust it based on the push 8338 // we just did (and on 64 bit we do two pushes) 8339 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 8340 // stores rax into addr which is backwards of what was intended. 8341 if (addr.uses(rsp)) { 8342 lea(rax, addr); 8343 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); 8344 } else { 8345 pushptr(addr); 8346 } 8347 8348 ExternalAddress buffer((address) b); 8349 // pass msg argument 8350 // avoid using pushptr, as it modifies scratch registers 8351 // and our contract is not to modify anything 8352 movptr(rax, buffer.addr()); 8353 push(rax); 8354 8355 // call indirectly to solve generation ordering problem 8356 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 8357 call(rax); 8358 // Caller pops the arguments (addr, message) and restores rax, r10. 8359 } 8360 8361 void MacroAssembler::verify_tlab() { 8362 #ifdef ASSERT 8363 if (UseTLAB && VerifyOops) { 8364 Label next, ok; 8365 Register t1 = rsi; 8366 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 8367 8368 push(t1); 8369 NOT_LP64(push(thread_reg)); 8370 NOT_LP64(get_thread(thread_reg)); 8371 8372 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8373 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 8374 jcc(Assembler::aboveEqual, next); 8375 stop("assert(top >= start)"); 8376 should_not_reach_here(); 8377 8378 bind(next); 8379 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 8380 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8381 jcc(Assembler::aboveEqual, ok); 8382 stop("assert(top <= end)"); 8383 should_not_reach_here(); 8384 8385 bind(ok); 8386 NOT_LP64(pop(thread_reg)); 8387 pop(t1); 8388 } 8389 #endif 8390 } 8391 8392 class ControlWord { 8393 public: 8394 int32_t _value; 8395 8396 int rounding_control() const { return (_value >> 10) & 3 ; } 8397 int precision_control() const { return (_value >> 8) & 3 ; } 8398 bool precision() const { return ((_value >> 5) & 1) != 0; } 8399 bool underflow() const { return ((_value >> 4) & 1) != 0; } 8400 bool overflow() const { return ((_value >> 3) & 1) != 0; } 8401 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 8402 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 8403 bool invalid() const { return ((_value >> 0) & 1) != 0; } 8404 8405 void print() const { 8406 // rounding control 8407 const char* rc; 8408 switch (rounding_control()) { 8409 case 0: rc = "round near"; break; 8410 case 1: rc = "round down"; break; 8411 case 2: rc = "round up "; break; 8412 case 3: rc = "chop "; break; 8413 }; 8414 // precision control 8415 const char* pc; 8416 switch (precision_control()) { 8417 case 0: pc = "24 bits "; break; 8418 case 1: pc = "reserved"; break; 8419 case 2: pc = "53 bits "; break; 8420 case 3: pc = "64 bits "; break; 8421 }; 8422 // flags 8423 char f[9]; 8424 f[0] = ' '; 8425 f[1] = ' '; 8426 f[2] = (precision ()) ? 'P' : 'p'; 8427 f[3] = (underflow ()) ? 'U' : 'u'; 8428 f[4] = (overflow ()) ? 'O' : 'o'; 8429 f[5] = (zero_divide ()) ? 'Z' : 'z'; 8430 f[6] = (denormalized()) ? 'D' : 'd'; 8431 f[7] = (invalid ()) ? 'I' : 'i'; 8432 f[8] = '\x0'; 8433 // output 8434 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 8435 } 8436 8437 }; 8438 8439 class StatusWord { 8440 public: 8441 int32_t _value; 8442 8443 bool busy() const { return ((_value >> 15) & 1) != 0; } 8444 bool C3() const { return ((_value >> 14) & 1) != 0; } 8445 bool C2() const { return ((_value >> 10) & 1) != 0; } 8446 bool C1() const { return ((_value >> 9) & 1) != 0; } 8447 bool C0() const { return ((_value >> 8) & 1) != 0; } 8448 int top() const { return (_value >> 11) & 7 ; } 8449 bool error_status() const { return ((_value >> 7) & 1) != 0; } 8450 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 8451 bool precision() const { return ((_value >> 5) & 1) != 0; } 8452 bool underflow() const { return ((_value >> 4) & 1) != 0; } 8453 bool overflow() const { return ((_value >> 3) & 1) != 0; } 8454 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 8455 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 8456 bool invalid() const { return ((_value >> 0) & 1) != 0; } 8457 8458 void print() const { 8459 // condition codes 8460 char c[5]; 8461 c[0] = (C3()) ? '3' : '-'; 8462 c[1] = (C2()) ? '2' : '-'; 8463 c[2] = (C1()) ? '1' : '-'; 8464 c[3] = (C0()) ? '0' : '-'; 8465 c[4] = '\x0'; 8466 // flags 8467 char f[9]; 8468 f[0] = (error_status()) ? 'E' : '-'; 8469 f[1] = (stack_fault ()) ? 'S' : '-'; 8470 f[2] = (precision ()) ? 'P' : '-'; 8471 f[3] = (underflow ()) ? 'U' : '-'; 8472 f[4] = (overflow ()) ? 'O' : '-'; 8473 f[5] = (zero_divide ()) ? 'Z' : '-'; 8474 f[6] = (denormalized()) ? 'D' : '-'; 8475 f[7] = (invalid ()) ? 'I' : '-'; 8476 f[8] = '\x0'; 8477 // output 8478 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 8479 } 8480 8481 }; 8482 8483 class TagWord { 8484 public: 8485 int32_t _value; 8486 8487 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 8488 8489 void print() const { 8490 printf("%04x", _value & 0xFFFF); 8491 } 8492 8493 }; 8494 8495 class FPU_Register { 8496 public: 8497 int32_t _m0; 8498 int32_t _m1; 8499 int16_t _ex; 8500 8501 bool is_indefinite() const { 8502 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 8503 } 8504 8505 void print() const { 8506 char sign = (_ex < 0) ? '-' : '+'; 8507 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 8508 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 8509 }; 8510 8511 }; 8512 8513 class FPU_State { 8514 public: 8515 enum { 8516 register_size = 10, 8517 number_of_registers = 8, 8518 register_mask = 7 8519 }; 8520 8521 ControlWord _control_word; 8522 StatusWord _status_word; 8523 TagWord _tag_word; 8524 int32_t _error_offset; 8525 int32_t _error_selector; 8526 int32_t _data_offset; 8527 int32_t _data_selector; 8528 int8_t _register[register_size * number_of_registers]; 8529 8530 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 8531 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 8532 8533 const char* tag_as_string(int tag) const { 8534 switch (tag) { 8535 case 0: return "valid"; 8536 case 1: return "zero"; 8537 case 2: return "special"; 8538 case 3: return "empty"; 8539 } 8540 ShouldNotReachHere(); 8541 return NULL; 8542 } 8543 8544 void print() const { 8545 // print computation registers 8546 { int t = _status_word.top(); 8547 for (int i = 0; i < number_of_registers; i++) { 8548 int j = (i - t) & register_mask; 8549 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 8550 st(j)->print(); 8551 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 8552 } 8553 } 8554 printf("\n"); 8555 // print control registers 8556 printf("ctrl = "); _control_word.print(); printf("\n"); 8557 printf("stat = "); _status_word .print(); printf("\n"); 8558 printf("tags = "); _tag_word .print(); printf("\n"); 8559 } 8560 8561 }; 8562 8563 class Flag_Register { 8564 public: 8565 int32_t _value; 8566 8567 bool overflow() const { return ((_value >> 11) & 1) != 0; } 8568 bool direction() const { return ((_value >> 10) & 1) != 0; } 8569 bool sign() const { return ((_value >> 7) & 1) != 0; } 8570 bool zero() const { return ((_value >> 6) & 1) != 0; } 8571 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 8572 bool parity() const { return ((_value >> 2) & 1) != 0; } 8573 bool carry() const { return ((_value >> 0) & 1) != 0; } 8574 8575 void print() const { 8576 // flags 8577 char f[8]; 8578 f[0] = (overflow ()) ? 'O' : '-'; 8579 f[1] = (direction ()) ? 'D' : '-'; 8580 f[2] = (sign ()) ? 'S' : '-'; 8581 f[3] = (zero ()) ? 'Z' : '-'; 8582 f[4] = (auxiliary_carry()) ? 'A' : '-'; 8583 f[5] = (parity ()) ? 'P' : '-'; 8584 f[6] = (carry ()) ? 'C' : '-'; 8585 f[7] = '\x0'; 8586 // output 8587 printf("%08x flags = %s", _value, f); 8588 } 8589 8590 }; 8591 8592 class IU_Register { 8593 public: 8594 int32_t _value; 8595 8596 void print() const { 8597 printf("%08x %11d", _value, _value); 8598 } 8599 8600 }; 8601 8602 class IU_State { 8603 public: 8604 Flag_Register _eflags; 8605 IU_Register _rdi; 8606 IU_Register _rsi; 8607 IU_Register _rbp; 8608 IU_Register _rsp; 8609 IU_Register _rbx; 8610 IU_Register _rdx; 8611 IU_Register _rcx; 8612 IU_Register _rax; 8613 8614 void print() const { 8615 // computation registers 8616 printf("rax, = "); _rax.print(); printf("\n"); 8617 printf("rbx, = "); _rbx.print(); printf("\n"); 8618 printf("rcx = "); _rcx.print(); printf("\n"); 8619 printf("rdx = "); _rdx.print(); printf("\n"); 8620 printf("rdi = "); _rdi.print(); printf("\n"); 8621 printf("rsi = "); _rsi.print(); printf("\n"); 8622 printf("rbp, = "); _rbp.print(); printf("\n"); 8623 printf("rsp = "); _rsp.print(); printf("\n"); 8624 printf("\n"); 8625 // control registers 8626 printf("flgs = "); _eflags.print(); printf("\n"); 8627 } 8628 }; 8629 8630 8631 class CPU_State { 8632 public: 8633 FPU_State _fpu_state; 8634 IU_State _iu_state; 8635 8636 void print() const { 8637 printf("--------------------------------------------------\n"); 8638 _iu_state .print(); 8639 printf("\n"); 8640 _fpu_state.print(); 8641 printf("--------------------------------------------------\n"); 8642 } 8643 8644 }; 8645 8646 8647 static void _print_CPU_state(CPU_State* state) { 8648 state->print(); 8649 }; 8650 8651 8652 void MacroAssembler::print_CPU_state() { 8653 push_CPU_state(); 8654 push(rsp); // pass CPU state 8655 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 8656 addptr(rsp, wordSize); // discard argument 8657 pop_CPU_state(); 8658 } 8659 8660 8661 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 8662 static int counter = 0; 8663 FPU_State* fs = &state->_fpu_state; 8664 counter++; 8665 // For leaf calls, only verify that the top few elements remain empty. 8666 // We only need 1 empty at the top for C2 code. 8667 if( stack_depth < 0 ) { 8668 if( fs->tag_for_st(7) != 3 ) { 8669 printf("FPR7 not empty\n"); 8670 state->print(); 8671 assert(false, "error"); 8672 return false; 8673 } 8674 return true; // All other stack states do not matter 8675 } 8676 8677 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 8678 "bad FPU control word"); 8679 8680 // compute stack depth 8681 int i = 0; 8682 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 8683 int d = i; 8684 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 8685 // verify findings 8686 if (i != FPU_State::number_of_registers) { 8687 // stack not contiguous 8688 printf("%s: stack not contiguous at ST%d\n", s, i); 8689 state->print(); 8690 assert(false, "error"); 8691 return false; 8692 } 8693 // check if computed stack depth corresponds to expected stack depth 8694 if (stack_depth < 0) { 8695 // expected stack depth is -stack_depth or less 8696 if (d > -stack_depth) { 8697 // too many elements on the stack 8698 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 8699 state->print(); 8700 assert(false, "error"); 8701 return false; 8702 } 8703 } else { 8704 // expected stack depth is stack_depth 8705 if (d != stack_depth) { 8706 // wrong stack depth 8707 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 8708 state->print(); 8709 assert(false, "error"); 8710 return false; 8711 } 8712 } 8713 // everything is cool 8714 return true; 8715 } 8716 8717 8718 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 8719 if (!VerifyFPU) return; 8720 push_CPU_state(); 8721 push(rsp); // pass CPU state 8722 ExternalAddress msg((address) s); 8723 // pass message string s 8724 pushptr(msg.addr()); 8725 push(stack_depth); // pass stack depth 8726 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 8727 addptr(rsp, 3 * wordSize); // discard arguments 8728 // check for error 8729 { Label L; 8730 testl(rax, rax); 8731 jcc(Assembler::notZero, L); 8732 int3(); // break if error condition 8733 bind(L); 8734 } 8735 pop_CPU_state(); 8736 } 8737 8738 void MacroAssembler::load_klass(Register dst, Register src) { 8739 #ifdef _LP64 8740 if (UseCompressedOops) { 8741 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8742 decode_heap_oop_not_null(dst); 8743 } else 8744 #endif 8745 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8746 } 8747 8748 void MacroAssembler::load_prototype_header(Register dst, Register src) { 8749 #ifdef _LP64 8750 if (UseCompressedOops) { 8751 assert (Universe::heap() != NULL, "java heap should be initialized"); 8752 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8753 if (Universe::narrow_oop_shift() != 0) { 8754 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8755 if (LogMinObjAlignmentInBytes == Address::times_8) { 8756 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8757 } else { 8758 // OK to use shift since we don't need to preserve flags. 8759 shlq(dst, LogMinObjAlignmentInBytes); 8760 movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8761 } 8762 } else { 8763 movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8764 } 8765 } else 8766 #endif 8767 { 8768 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8769 movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8770 } 8771 } 8772 8773 void MacroAssembler::store_klass(Register dst, Register src) { 8774 #ifdef _LP64 8775 if (UseCompressedOops) { 8776 encode_heap_oop_not_null(src); 8777 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 8778 } else 8779 #endif 8780 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 8781 } 8782 8783 void MacroAssembler::load_heap_oop(Register dst, Address src) { 8784 #ifdef _LP64 8785 if (UseCompressedOops) { 8786 movl(dst, src); 8787 decode_heap_oop(dst); 8788 } else 8789 #endif 8790 movptr(dst, src); 8791 } 8792 8793 // Doesn't do verfication, generates fixed size code 8794 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { 8795 #ifdef _LP64 8796 if (UseCompressedOops) { 8797 movl(dst, src); 8798 decode_heap_oop_not_null(dst); 8799 } else 8800 #endif 8801 movptr(dst, src); 8802 } 8803 8804 void MacroAssembler::store_heap_oop(Address dst, Register src) { 8805 #ifdef _LP64 8806 if (UseCompressedOops) { 8807 assert(!dst.uses(src), "not enough registers"); 8808 encode_heap_oop(src); 8809 movl(dst, src); 8810 } else 8811 #endif 8812 movptr(dst, src); 8813 } 8814 8815 // Used for storing NULLs. 8816 void MacroAssembler::store_heap_oop_null(Address dst) { 8817 #ifdef _LP64 8818 if (UseCompressedOops) { 8819 movl(dst, (int32_t)NULL_WORD); 8820 } else { 8821 movslq(dst, (int32_t)NULL_WORD); 8822 } 8823 #else 8824 movl(dst, (int32_t)NULL_WORD); 8825 #endif 8826 } 8827 8828 #ifdef _LP64 8829 void MacroAssembler::store_klass_gap(Register dst, Register src) { 8830 if (UseCompressedOops) { 8831 // Store to klass gap in destination 8832 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 8833 } 8834 } 8835 8836 #ifdef ASSERT 8837 void MacroAssembler::verify_heapbase(const char* msg) { 8838 assert (UseCompressedOops, "should be compressed"); 8839 assert (Universe::heap() != NULL, "java heap should be initialized"); 8840 if (CheckCompressedOops) { 8841 Label ok; 8842 push(rscratch1); // cmpptr trashes rscratch1 8843 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 8844 jcc(Assembler::equal, ok); 8845 stop(msg); 8846 bind(ok); 8847 pop(rscratch1); 8848 } 8849 } 8850 #endif 8851 8852 // Algorithm must match oop.inline.hpp encode_heap_oop. 8853 void MacroAssembler::encode_heap_oop(Register r) { 8854 #ifdef ASSERT 8855 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 8856 #endif 8857 verify_oop(r, "broken oop in encode_heap_oop"); 8858 if (Universe::narrow_oop_base() == NULL) { 8859 if (Universe::narrow_oop_shift() != 0) { 8860 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8861 shrq(r, LogMinObjAlignmentInBytes); 8862 } 8863 return; 8864 } 8865 testq(r, r); 8866 cmovq(Assembler::equal, r, r12_heapbase); 8867 subq(r, r12_heapbase); 8868 shrq(r, LogMinObjAlignmentInBytes); 8869 } 8870 8871 void MacroAssembler::encode_heap_oop_not_null(Register r) { 8872 #ifdef ASSERT 8873 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 8874 if (CheckCompressedOops) { 8875 Label ok; 8876 testq(r, r); 8877 jcc(Assembler::notEqual, ok); 8878 stop("null oop passed to encode_heap_oop_not_null"); 8879 bind(ok); 8880 } 8881 #endif 8882 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 8883 if (Universe::narrow_oop_base() != NULL) { 8884 subq(r, r12_heapbase); 8885 } 8886 if (Universe::narrow_oop_shift() != 0) { 8887 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8888 shrq(r, LogMinObjAlignmentInBytes); 8889 } 8890 } 8891 8892 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 8893 #ifdef ASSERT 8894 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 8895 if (CheckCompressedOops) { 8896 Label ok; 8897 testq(src, src); 8898 jcc(Assembler::notEqual, ok); 8899 stop("null oop passed to encode_heap_oop_not_null2"); 8900 bind(ok); 8901 } 8902 #endif 8903 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 8904 if (dst != src) { 8905 movq(dst, src); 8906 } 8907 if (Universe::narrow_oop_base() != NULL) { 8908 subq(dst, r12_heapbase); 8909 } 8910 if (Universe::narrow_oop_shift() != 0) { 8911 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8912 shrq(dst, LogMinObjAlignmentInBytes); 8913 } 8914 } 8915 8916 void MacroAssembler::decode_heap_oop(Register r) { 8917 #ifdef ASSERT 8918 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 8919 #endif 8920 if (Universe::narrow_oop_base() == NULL) { 8921 if (Universe::narrow_oop_shift() != 0) { 8922 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8923 shlq(r, LogMinObjAlignmentInBytes); 8924 } 8925 } else { 8926 Label done; 8927 shlq(r, LogMinObjAlignmentInBytes); 8928 jccb(Assembler::equal, done); 8929 addq(r, r12_heapbase); 8930 bind(done); 8931 } 8932 verify_oop(r, "broken oop in decode_heap_oop"); 8933 } 8934 8935 void MacroAssembler::decode_heap_oop_not_null(Register r) { 8936 // Note: it will change flags 8937 assert (UseCompressedOops, "should only be used for compressed headers"); 8938 assert (Universe::heap() != NULL, "java heap should be initialized"); 8939 // Cannot assert, unverified entry point counts instructions (see .ad file) 8940 // vtableStubs also counts instructions in pd_code_size_limit. 8941 // Also do not verify_oop as this is called by verify_oop. 8942 if (Universe::narrow_oop_shift() != 0) { 8943 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8944 shlq(r, LogMinObjAlignmentInBytes); 8945 if (Universe::narrow_oop_base() != NULL) { 8946 addq(r, r12_heapbase); 8947 } 8948 } else { 8949 assert (Universe::narrow_oop_base() == NULL, "sanity"); 8950 } 8951 } 8952 8953 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 8954 // Note: it will change flags 8955 assert (UseCompressedOops, "should only be used for compressed headers"); 8956 assert (Universe::heap() != NULL, "java heap should be initialized"); 8957 // Cannot assert, unverified entry point counts instructions (see .ad file) 8958 // vtableStubs also counts instructions in pd_code_size_limit. 8959 // Also do not verify_oop as this is called by verify_oop. 8960 if (Universe::narrow_oop_shift() != 0) { 8961 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8962 if (LogMinObjAlignmentInBytes == Address::times_8) { 8963 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 8964 } else { 8965 if (dst != src) { 8966 movq(dst, src); 8967 } 8968 shlq(dst, LogMinObjAlignmentInBytes); 8969 if (Universe::narrow_oop_base() != NULL) { 8970 addq(dst, r12_heapbase); 8971 } 8972 } 8973 } else { 8974 assert (Universe::narrow_oop_base() == NULL, "sanity"); 8975 if (dst != src) { 8976 movq(dst, src); 8977 } 8978 } 8979 } 8980 8981 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 8982 assert (UseCompressedOops, "should only be used for compressed headers"); 8983 assert (Universe::heap() != NULL, "java heap should be initialized"); 8984 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8985 int oop_index = oop_recorder()->find_index(obj); 8986 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8987 mov_narrow_oop(dst, oop_index, rspec); 8988 } 8989 8990 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 8991 assert (UseCompressedOops, "should only be used for compressed headers"); 8992 assert (Universe::heap() != NULL, "java heap should be initialized"); 8993 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8994 int oop_index = oop_recorder()->find_index(obj); 8995 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8996 mov_narrow_oop(dst, oop_index, rspec); 8997 } 8998 8999 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 9000 assert (UseCompressedOops, "should only be used for compressed headers"); 9001 assert (Universe::heap() != NULL, "java heap should be initialized"); 9002 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 9003 int oop_index = oop_recorder()->find_index(obj); 9004 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9005 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 9006 } 9007 9008 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 9009 assert (UseCompressedOops, "should only be used for compressed headers"); 9010 assert (Universe::heap() != NULL, "java heap should be initialized"); 9011 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 9012 int oop_index = oop_recorder()->find_index(obj); 9013 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9014 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 9015 } 9016 9017 void MacroAssembler::reinit_heapbase() { 9018 if (UseCompressedOops) { 9019 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 9020 } 9021 } 9022 #endif // _LP64 9023 9024 // IndexOf for constant substrings with size >= 8 chars 9025 // which don't need to be loaded through stack. 9026 void MacroAssembler::string_indexofC8(Register str1, Register str2, 9027 Register cnt1, Register cnt2, 9028 int int_cnt2, Register result, 9029 XMMRegister vec, Register tmp) { 9030 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 9031 9032 // This method uses pcmpestri inxtruction with bound registers 9033 // inputs: 9034 // xmm - substring 9035 // rax - substring length (elements count) 9036 // mem - scanned string 9037 // rdx - string length (elements count) 9038 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 9039 // outputs: 9040 // rcx - matched index in string 9041 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 9042 9043 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, 9044 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, 9045 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; 9046 9047 // Note, inline_string_indexOf() generates checks: 9048 // if (substr.count > string.count) return -1; 9049 // if (substr.count == 0) return 0; 9050 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); 9051 9052 // Load substring. 9053 movdqu(vec, Address(str2, 0)); 9054 movl(cnt2, int_cnt2); 9055 movptr(result, str1); // string addr 9056 9057 if (int_cnt2 > 8) { 9058 jmpb(SCAN_TO_SUBSTR); 9059 9060 // Reload substr for rescan, this code 9061 // is executed only for large substrings (> 8 chars) 9062 bind(RELOAD_SUBSTR); 9063 movdqu(vec, Address(str2, 0)); 9064 negptr(cnt2); // Jumped here with negative cnt2, convert to positive 9065 9066 bind(RELOAD_STR); 9067 // We came here after the beginning of the substring was 9068 // matched but the rest of it was not so we need to search 9069 // again. Start from the next element after the previous match. 9070 9071 // cnt2 is number of substring reminding elements and 9072 // cnt1 is number of string reminding elements when cmp failed. 9073 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 9074 subl(cnt1, cnt2); 9075 addl(cnt1, int_cnt2); 9076 movl(cnt2, int_cnt2); // Now restore cnt2 9077 9078 decrementl(cnt1); // Shift to next element 9079 cmpl(cnt1, cnt2); 9080 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 9081 9082 addptr(result, 2); 9083 9084 } // (int_cnt2 > 8) 9085 9086 // Scan string for start of substr in 16-byte vectors 9087 bind(SCAN_TO_SUBSTR); 9088 pcmpestri(vec, Address(result, 0), 0x0d); 9089 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 9090 subl(cnt1, 8); 9091 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 9092 cmpl(cnt1, cnt2); 9093 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 9094 addptr(result, 16); 9095 jmpb(SCAN_TO_SUBSTR); 9096 9097 // Found a potential substr 9098 bind(FOUND_CANDIDATE); 9099 // Matched whole vector if first element matched (tmp(rcx) == 0). 9100 if (int_cnt2 == 8) { 9101 jccb(Assembler::overflow, RET_FOUND); // OF == 1 9102 } else { // int_cnt2 > 8 9103 jccb(Assembler::overflow, FOUND_SUBSTR); 9104 } 9105 // After pcmpestri tmp(rcx) contains matched element index 9106 // Compute start addr of substr 9107 lea(result, Address(result, tmp, Address::times_2)); 9108 9109 // Make sure string is still long enough 9110 subl(cnt1, tmp); 9111 cmpl(cnt1, cnt2); 9112 if (int_cnt2 == 8) { 9113 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 9114 } else { // int_cnt2 > 8 9115 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); 9116 } 9117 // Left less then substring. 9118 9119 bind(RET_NOT_FOUND); 9120 movl(result, -1); 9121 jmpb(EXIT); 9122 9123 if (int_cnt2 > 8) { 9124 // This code is optimized for the case when whole substring 9125 // is matched if its head is matched. 9126 bind(MATCH_SUBSTR_HEAD); 9127 pcmpestri(vec, Address(result, 0), 0x0d); 9128 // Reload only string if does not match 9129 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 9130 9131 Label CONT_SCAN_SUBSTR; 9132 // Compare the rest of substring (> 8 chars). 9133 bind(FOUND_SUBSTR); 9134 // First 8 chars are already matched. 9135 negptr(cnt2); 9136 addptr(cnt2, 8); 9137 9138 bind(SCAN_SUBSTR); 9139 subl(cnt1, 8); 9140 cmpl(cnt2, -8); // Do not read beyond substring 9141 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); 9142 // Back-up strings to avoid reading beyond substring: 9143 // cnt1 = cnt1 - cnt2 + 8 9144 addl(cnt1, cnt2); // cnt2 is negative 9145 addl(cnt1, 8); 9146 movl(cnt2, 8); negptr(cnt2); 9147 bind(CONT_SCAN_SUBSTR); 9148 if (int_cnt2 < (int)G) { 9149 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); 9150 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); 9151 } else { 9152 // calculate index in register to avoid integer overflow (int_cnt2*2) 9153 movl(tmp, int_cnt2); 9154 addptr(tmp, cnt2); 9155 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); 9156 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 9157 } 9158 // Need to reload strings pointers if not matched whole vector 9159 jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 9160 addptr(cnt2, 8); 9161 jccb(Assembler::negative, SCAN_SUBSTR); 9162 // Fall through if found full substring 9163 9164 } // (int_cnt2 > 8) 9165 9166 bind(RET_FOUND); 9167 // Found result if we matched full small substring. 9168 // Compute substr offset 9169 subptr(result, str1); 9170 shrl(result, 1); // index 9171 bind(EXIT); 9172 9173 } // string_indexofC8 9174 9175 // Small strings are loaded through stack if they cross page boundary. 9176 void MacroAssembler::string_indexof(Register str1, Register str2, 9177 Register cnt1, Register cnt2, 9178 int int_cnt2, Register result, 9179 XMMRegister vec, Register tmp) { 9180 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 9181 // 9182 // int_cnt2 is length of small (< 8 chars) constant substring 9183 // or (-1) for non constant substring in which case its length 9184 // is in cnt2 register. 9185 // 9186 // Note, inline_string_indexOf() generates checks: 9187 // if (substr.count > string.count) return -1; 9188 // if (substr.count == 0) return 0; 9189 // 9190 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); 9191 9192 // This method uses pcmpestri inxtruction with bound registers 9193 // inputs: 9194 // xmm - substring 9195 // rax - substring length (elements count) 9196 // mem - scanned string 9197 // rdx - string length (elements count) 9198 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 9199 // outputs: 9200 // rcx - matched index in string 9201 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 9202 9203 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, 9204 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, 9205 FOUND_CANDIDATE; 9206 9207 { //======================================================== 9208 // We don't know where these strings are located 9209 // and we can't read beyond them. Load them through stack. 9210 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; 9211 9212 movptr(tmp, rsp); // save old SP 9213 9214 if (int_cnt2 > 0) { // small (< 8 chars) constant substring 9215 if (int_cnt2 == 1) { // One char 9216 load_unsigned_short(result, Address(str2, 0)); 9217 movdl(vec, result); // move 32 bits 9218 } else if (int_cnt2 == 2) { // Two chars 9219 movdl(vec, Address(str2, 0)); // move 32 bits 9220 } else if (int_cnt2 == 4) { // Four chars 9221 movq(vec, Address(str2, 0)); // move 64 bits 9222 } else { // cnt2 = { 3, 5, 6, 7 } 9223 // Array header size is 12 bytes in 32-bit VM 9224 // + 6 bytes for 3 chars == 18 bytes, 9225 // enough space to load vec and shift. 9226 assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity"); 9227 movdqu(vec, Address(str2, (int_cnt2*2)-16)); 9228 psrldq(vec, 16-(int_cnt2*2)); 9229 } 9230 } else { // not constant substring 9231 cmpl(cnt2, 8); 9232 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough 9233 9234 // We can read beyond string if srt+16 does not cross page boundary 9235 // since heaps are aligned and mapped by pages. 9236 assert(os::vm_page_size() < (int)G, "default page should be small"); 9237 movl(result, str2); // We need only low 32 bits 9238 andl(result, (os::vm_page_size()-1)); 9239 cmpl(result, (os::vm_page_size()-16)); 9240 jccb(Assembler::belowEqual, CHECK_STR); 9241 9242 // Move small strings to stack to allow load 16 bytes into vec. 9243 subptr(rsp, 16); 9244 int stk_offset = wordSize-2; 9245 push(cnt2); 9246 9247 bind(COPY_SUBSTR); 9248 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); 9249 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 9250 decrement(cnt2); 9251 jccb(Assembler::notZero, COPY_SUBSTR); 9252 9253 pop(cnt2); 9254 movptr(str2, rsp); // New substring address 9255 } // non constant 9256 9257 bind(CHECK_STR); 9258 cmpl(cnt1, 8); 9259 jccb(Assembler::aboveEqual, BIG_STRINGS); 9260 9261 // Check cross page boundary. 9262 movl(result, str1); // We need only low 32 bits 9263 andl(result, (os::vm_page_size()-1)); 9264 cmpl(result, (os::vm_page_size()-16)); 9265 jccb(Assembler::belowEqual, BIG_STRINGS); 9266 9267 subptr(rsp, 16); 9268 int stk_offset = -2; 9269 if (int_cnt2 < 0) { // not constant 9270 push(cnt2); 9271 stk_offset += wordSize; 9272 } 9273 movl(cnt2, cnt1); 9274 9275 bind(COPY_STR); 9276 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); 9277 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 9278 decrement(cnt2); 9279 jccb(Assembler::notZero, COPY_STR); 9280 9281 if (int_cnt2 < 0) { // not constant 9282 pop(cnt2); 9283 } 9284 movptr(str1, rsp); // New string address 9285 9286 bind(BIG_STRINGS); 9287 // Load substring. 9288 if (int_cnt2 < 0) { // -1 9289 movdqu(vec, Address(str2, 0)); 9290 push(cnt2); // substr count 9291 push(str2); // substr addr 9292 push(str1); // string addr 9293 } else { 9294 // Small (< 8 chars) constant substrings are loaded already. 9295 movl(cnt2, int_cnt2); 9296 } 9297 push(tmp); // original SP 9298 9299 } // Finished loading 9300 9301 //======================================================== 9302 // Start search 9303 // 9304 9305 movptr(result, str1); // string addr 9306 9307 if (int_cnt2 < 0) { // Only for non constant substring 9308 jmpb(SCAN_TO_SUBSTR); 9309 9310 // SP saved at sp+0 9311 // String saved at sp+1*wordSize 9312 // Substr saved at sp+2*wordSize 9313 // Substr count saved at sp+3*wordSize 9314 9315 // Reload substr for rescan, this code 9316 // is executed only for large substrings (> 8 chars) 9317 bind(RELOAD_SUBSTR); 9318 movptr(str2, Address(rsp, 2*wordSize)); 9319 movl(cnt2, Address(rsp, 3*wordSize)); 9320 movdqu(vec, Address(str2, 0)); 9321 // We came here after the beginning of the substring was 9322 // matched but the rest of it was not so we need to search 9323 // again. Start from the next element after the previous match. 9324 subptr(str1, result); // Restore counter 9325 shrl(str1, 1); 9326 addl(cnt1, str1); 9327 decrementl(cnt1); // Shift to next element 9328 cmpl(cnt1, cnt2); 9329 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 9330 9331 addptr(result, 2); 9332 } // non constant 9333 9334 // Scan string for start of substr in 16-byte vectors 9335 bind(SCAN_TO_SUBSTR); 9336 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 9337 pcmpestri(vec, Address(result, 0), 0x0d); 9338 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 9339 subl(cnt1, 8); 9340 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 9341 cmpl(cnt1, cnt2); 9342 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 9343 addptr(result, 16); 9344 9345 bind(ADJUST_STR); 9346 cmpl(cnt1, 8); // Do not read beyond string 9347 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 9348 // Back-up string to avoid reading beyond string. 9349 lea(result, Address(result, cnt1, Address::times_2, -16)); 9350 movl(cnt1, 8); 9351 jmpb(SCAN_TO_SUBSTR); 9352 9353 // Found a potential substr 9354 bind(FOUND_CANDIDATE); 9355 // After pcmpestri tmp(rcx) contains matched element index 9356 9357 // Make sure string is still long enough 9358 subl(cnt1, tmp); 9359 cmpl(cnt1, cnt2); 9360 jccb(Assembler::greaterEqual, FOUND_SUBSTR); 9361 // Left less then substring. 9362 9363 bind(RET_NOT_FOUND); 9364 movl(result, -1); 9365 jmpb(CLEANUP); 9366 9367 bind(FOUND_SUBSTR); 9368 // Compute start addr of substr 9369 lea(result, Address(result, tmp, Address::times_2)); 9370 9371 if (int_cnt2 > 0) { // Constant substring 9372 // Repeat search for small substring (< 8 chars) 9373 // from new point without reloading substring. 9374 // Have to check that we don't read beyond string. 9375 cmpl(tmp, 8-int_cnt2); 9376 jccb(Assembler::greater, ADJUST_STR); 9377 // Fall through if matched whole substring. 9378 } else { // non constant 9379 assert(int_cnt2 == -1, "should be != 0"); 9380 9381 addl(tmp, cnt2); 9382 // Found result if we matched whole substring. 9383 cmpl(tmp, 8); 9384 jccb(Assembler::lessEqual, RET_FOUND); 9385 9386 // Repeat search for small substring (<= 8 chars) 9387 // from new point 'str1' without reloading substring. 9388 cmpl(cnt2, 8); 9389 // Have to check that we don't read beyond string. 9390 jccb(Assembler::lessEqual, ADJUST_STR); 9391 9392 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; 9393 // Compare the rest of substring (> 8 chars). 9394 movptr(str1, result); 9395 9396 cmpl(tmp, cnt2); 9397 // First 8 chars are already matched. 9398 jccb(Assembler::equal, CHECK_NEXT); 9399 9400 bind(SCAN_SUBSTR); 9401 pcmpestri(vec, Address(str1, 0), 0x0d); 9402 // Need to reload strings pointers if not matched whole vector 9403 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 9404 9405 bind(CHECK_NEXT); 9406 subl(cnt2, 8); 9407 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring 9408 addptr(str1, 16); 9409 addptr(str2, 16); 9410 subl(cnt1, 8); 9411 cmpl(cnt2, 8); // Do not read beyond substring 9412 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); 9413 // Back-up strings to avoid reading beyond substring. 9414 lea(str2, Address(str2, cnt2, Address::times_2, -16)); 9415 lea(str1, Address(str1, cnt2, Address::times_2, -16)); 9416 subl(cnt1, cnt2); 9417 movl(cnt2, 8); 9418 addl(cnt1, 8); 9419 bind(CONT_SCAN_SUBSTR); 9420 movdqu(vec, Address(str2, 0)); 9421 jmpb(SCAN_SUBSTR); 9422 9423 bind(RET_FOUND_LONG); 9424 movptr(str1, Address(rsp, wordSize)); 9425 } // non constant 9426 9427 bind(RET_FOUND); 9428 // Compute substr offset 9429 subptr(result, str1); 9430 shrl(result, 1); // index 9431 9432 bind(CLEANUP); 9433 pop(rsp); // restore SP 9434 9435 } // string_indexof 9436 9437 // Compare strings. 9438 void MacroAssembler::string_compare(Register str1, Register str2, 9439 Register cnt1, Register cnt2, Register result, 9440 XMMRegister vec1) { 9441 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 9442 9443 // Compute the minimum of the string lengths and the 9444 // difference of the string lengths (stack). 9445 // Do the conditional move stuff 9446 movl(result, cnt1); 9447 subl(cnt1, cnt2); 9448 push(cnt1); 9449 cmov32(Assembler::lessEqual, cnt2, result); 9450 9451 // Is the minimum length zero? 9452 testl(cnt2, cnt2); 9453 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 9454 9455 // Load first characters 9456 load_unsigned_short(result, Address(str1, 0)); 9457 load_unsigned_short(cnt1, Address(str2, 0)); 9458 9459 // Compare first characters 9460 subl(result, cnt1); 9461 jcc(Assembler::notZero, POP_LABEL); 9462 decrementl(cnt2); 9463 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 9464 9465 { 9466 // Check after comparing first character to see if strings are equivalent 9467 Label LSkip2; 9468 // Check if the strings start at same location 9469 cmpptr(str1, str2); 9470 jccb(Assembler::notEqual, LSkip2); 9471 9472 // Check if the length difference is zero (from stack) 9473 cmpl(Address(rsp, 0), 0x0); 9474 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 9475 9476 // Strings might not be equivalent 9477 bind(LSkip2); 9478 } 9479 9480 Address::ScaleFactor scale = Address::times_2; 9481 int stride = 8; 9482 9483 // Advance to next element 9484 addptr(str1, 16/stride); 9485 addptr(str2, 16/stride); 9486 9487 if (UseSSE42Intrinsics) { 9488 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 9489 int pcmpmask = 0x19; 9490 // Setup to compare 16-byte vectors 9491 movl(result, cnt2); 9492 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 9493 jccb(Assembler::zero, COMPARE_TAIL); 9494 9495 lea(str1, Address(str1, result, scale)); 9496 lea(str2, Address(str2, result, scale)); 9497 negptr(result); 9498 9499 // pcmpestri 9500 // inputs: 9501 // vec1- substring 9502 // rax - negative string length (elements count) 9503 // mem - scaned string 9504 // rdx - string length (elements count) 9505 // pcmpmask - cmp mode: 11000 (string compare with negated result) 9506 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 9507 // outputs: 9508 // rcx - first mismatched element index 9509 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 9510 9511 bind(COMPARE_WIDE_VECTORS); 9512 movdqu(vec1, Address(str1, result, scale)); 9513 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 9514 // After pcmpestri cnt1(rcx) contains mismatched element index 9515 9516 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 9517 addptr(result, stride); 9518 subptr(cnt2, stride); 9519 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); 9520 9521 // compare wide vectors tail 9522 testl(result, result); 9523 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 9524 9525 movl(cnt2, stride); 9526 movl(result, stride); 9527 negptr(result); 9528 movdqu(vec1, Address(str1, result, scale)); 9529 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 9530 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); 9531 9532 // Mismatched characters in the vectors 9533 bind(VECTOR_NOT_EQUAL); 9534 addptr(result, cnt1); 9535 movptr(cnt2, result); 9536 load_unsigned_short(result, Address(str1, cnt2, scale)); 9537 load_unsigned_short(cnt1, Address(str2, cnt2, scale)); 9538 subl(result, cnt1); 9539 jmpb(POP_LABEL); 9540 9541 bind(COMPARE_TAIL); // limit is zero 9542 movl(cnt2, result); 9543 // Fallthru to tail compare 9544 } 9545 9546 // Shift str2 and str1 to the end of the arrays, negate min 9547 lea(str1, Address(str1, cnt2, scale, 0)); 9548 lea(str2, Address(str2, cnt2, scale, 0)); 9549 negptr(cnt2); 9550 9551 // Compare the rest of the elements 9552 bind(WHILE_HEAD_LABEL); 9553 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); 9554 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); 9555 subl(result, cnt1); 9556 jccb(Assembler::notZero, POP_LABEL); 9557 increment(cnt2); 9558 jccb(Assembler::notZero, WHILE_HEAD_LABEL); 9559 9560 // Strings are equal up to min length. Return the length difference. 9561 bind(LENGTH_DIFF_LABEL); 9562 pop(result); 9563 jmpb(DONE_LABEL); 9564 9565 // Discard the stored length difference 9566 bind(POP_LABEL); 9567 pop(cnt1); 9568 9569 // That's it 9570 bind(DONE_LABEL); 9571 } 9572 9573 // Compare char[] arrays aligned to 4 bytes or substrings. 9574 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 9575 Register limit, Register result, Register chr, 9576 XMMRegister vec1, XMMRegister vec2) { 9577 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 9578 9579 int length_offset = arrayOopDesc::length_offset_in_bytes(); 9580 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 9581 9582 // Check the input args 9583 cmpptr(ary1, ary2); 9584 jcc(Assembler::equal, TRUE_LABEL); 9585 9586 if (is_array_equ) { 9587 // Need additional checks for arrays_equals. 9588 testptr(ary1, ary1); 9589 jcc(Assembler::zero, FALSE_LABEL); 9590 testptr(ary2, ary2); 9591 jcc(Assembler::zero, FALSE_LABEL); 9592 9593 // Check the lengths 9594 movl(limit, Address(ary1, length_offset)); 9595 cmpl(limit, Address(ary2, length_offset)); 9596 jcc(Assembler::notEqual, FALSE_LABEL); 9597 } 9598 9599 // count == 0 9600 testl(limit, limit); 9601 jcc(Assembler::zero, TRUE_LABEL); 9602 9603 if (is_array_equ) { 9604 // Load array address 9605 lea(ary1, Address(ary1, base_offset)); 9606 lea(ary2, Address(ary2, base_offset)); 9607 } 9608 9609 shll(limit, 1); // byte count != 0 9610 movl(result, limit); // copy 9611 9612 if (UseSSE42Intrinsics) { 9613 // With SSE4.2, use double quad vector compare 9614 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 9615 9616 // Compare 16-byte vectors 9617 andl(result, 0x0000000e); // tail count (in bytes) 9618 andl(limit, 0xfffffff0); // vector count (in bytes) 9619 jccb(Assembler::zero, COMPARE_TAIL); 9620 9621 lea(ary1, Address(ary1, limit, Address::times_1)); 9622 lea(ary2, Address(ary2, limit, Address::times_1)); 9623 negptr(limit); 9624 9625 bind(COMPARE_WIDE_VECTORS); 9626 movdqu(vec1, Address(ary1, limit, Address::times_1)); 9627 movdqu(vec2, Address(ary2, limit, Address::times_1)); 9628 pxor(vec1, vec2); 9629 9630 ptest(vec1, vec1); 9631 jccb(Assembler::notZero, FALSE_LABEL); 9632 addptr(limit, 16); 9633 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 9634 9635 testl(result, result); 9636 jccb(Assembler::zero, TRUE_LABEL); 9637 9638 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); 9639 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); 9640 pxor(vec1, vec2); 9641 9642 ptest(vec1, vec1); 9643 jccb(Assembler::notZero, FALSE_LABEL); 9644 jmpb(TRUE_LABEL); 9645 9646 bind(COMPARE_TAIL); // limit is zero 9647 movl(limit, result); 9648 // Fallthru to tail compare 9649 } 9650 9651 // Compare 4-byte vectors 9652 andl(limit, 0xfffffffc); // vector count (in bytes) 9653 jccb(Assembler::zero, COMPARE_CHAR); 9654 9655 lea(ary1, Address(ary1, limit, Address::times_1)); 9656 lea(ary2, Address(ary2, limit, Address::times_1)); 9657 negptr(limit); 9658 9659 bind(COMPARE_VECTORS); 9660 movl(chr, Address(ary1, limit, Address::times_1)); 9661 cmpl(chr, Address(ary2, limit, Address::times_1)); 9662 jccb(Assembler::notEqual, FALSE_LABEL); 9663 addptr(limit, 4); 9664 jcc(Assembler::notZero, COMPARE_VECTORS); 9665 9666 // Compare trailing char (final 2 bytes), if any 9667 bind(COMPARE_CHAR); 9668 testl(result, 0x2); // tail char 9669 jccb(Assembler::zero, TRUE_LABEL); 9670 load_unsigned_short(chr, Address(ary1, 0)); 9671 load_unsigned_short(limit, Address(ary2, 0)); 9672 cmpl(chr, limit); 9673 jccb(Assembler::notEqual, FALSE_LABEL); 9674 9675 bind(TRUE_LABEL); 9676 movl(result, 1); // return true 9677 jmpb(DONE); 9678 9679 bind(FALSE_LABEL); 9680 xorl(result, result); // return false 9681 9682 // That's it 9683 bind(DONE); 9684 } 9685 9686 #ifdef PRODUCT 9687 #define BLOCK_COMMENT(str) /* nothing */ 9688 #else 9689 #define BLOCK_COMMENT(str) block_comment(str) 9690 #endif 9691 9692 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 9693 void MacroAssembler::generate_fill(BasicType t, bool aligned, 9694 Register to, Register value, Register count, 9695 Register rtmp, XMMRegister xtmp) { 9696 assert_different_registers(to, value, count, rtmp); 9697 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 9698 Label L_fill_2_bytes, L_fill_4_bytes; 9699 9700 int shift = -1; 9701 switch (t) { 9702 case T_BYTE: 9703 shift = 2; 9704 break; 9705 case T_SHORT: 9706 shift = 1; 9707 break; 9708 case T_INT: 9709 shift = 0; 9710 break; 9711 default: ShouldNotReachHere(); 9712 } 9713 9714 if (t == T_BYTE) { 9715 andl(value, 0xff); 9716 movl(rtmp, value); 9717 shll(rtmp, 8); 9718 orl(value, rtmp); 9719 } 9720 if (t == T_SHORT) { 9721 andl(value, 0xffff); 9722 } 9723 if (t == T_BYTE || t == T_SHORT) { 9724 movl(rtmp, value); 9725 shll(rtmp, 16); 9726 orl(value, rtmp); 9727 } 9728 9729 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 9730 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 9731 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 9732 // align source address at 4 bytes address boundary 9733 if (t == T_BYTE) { 9734 // One byte misalignment happens only for byte arrays 9735 testptr(to, 1); 9736 jccb(Assembler::zero, L_skip_align1); 9737 movb(Address(to, 0), value); 9738 increment(to); 9739 decrement(count); 9740 BIND(L_skip_align1); 9741 } 9742 // Two bytes misalignment happens only for byte and short (char) arrays 9743 testptr(to, 2); 9744 jccb(Assembler::zero, L_skip_align2); 9745 movw(Address(to, 0), value); 9746 addptr(to, 2); 9747 subl(count, 1<<(shift-1)); 9748 BIND(L_skip_align2); 9749 } 9750 if (UseSSE < 2) { 9751 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 9752 // Fill 32-byte chunks 9753 subl(count, 8 << shift); 9754 jcc(Assembler::less, L_check_fill_8_bytes); 9755 align(16); 9756 9757 BIND(L_fill_32_bytes_loop); 9758 9759 for (int i = 0; i < 32; i += 4) { 9760 movl(Address(to, i), value); 9761 } 9762 9763 addptr(to, 32); 9764 subl(count, 8 << shift); 9765 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 9766 BIND(L_check_fill_8_bytes); 9767 addl(count, 8 << shift); 9768 jccb(Assembler::zero, L_exit); 9769 jmpb(L_fill_8_bytes); 9770 9771 // 9772 // length is too short, just fill qwords 9773 // 9774 BIND(L_fill_8_bytes_loop); 9775 movl(Address(to, 0), value); 9776 movl(Address(to, 4), value); 9777 addptr(to, 8); 9778 BIND(L_fill_8_bytes); 9779 subl(count, 1 << (shift + 1)); 9780 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 9781 // fall through to fill 4 bytes 9782 } else { 9783 Label L_fill_32_bytes; 9784 if (!UseUnalignedLoadStores) { 9785 // align to 8 bytes, we know we are 4 byte aligned to start 9786 testptr(to, 4); 9787 jccb(Assembler::zero, L_fill_32_bytes); 9788 movl(Address(to, 0), value); 9789 addptr(to, 4); 9790 subl(count, 1<<shift); 9791 } 9792 BIND(L_fill_32_bytes); 9793 { 9794 assert( UseSSE >= 2, "supported cpu only" ); 9795 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 9796 // Fill 32-byte chunks 9797 movdl(xtmp, value); 9798 pshufd(xtmp, xtmp, 0); 9799 9800 subl(count, 8 << shift); 9801 jcc(Assembler::less, L_check_fill_8_bytes); 9802 align(16); 9803 9804 BIND(L_fill_32_bytes_loop); 9805 9806 if (UseUnalignedLoadStores) { 9807 movdqu(Address(to, 0), xtmp); 9808 movdqu(Address(to, 16), xtmp); 9809 } else { 9810 movq(Address(to, 0), xtmp); 9811 movq(Address(to, 8), xtmp); 9812 movq(Address(to, 16), xtmp); 9813 movq(Address(to, 24), xtmp); 9814 } 9815 9816 addptr(to, 32); 9817 subl(count, 8 << shift); 9818 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 9819 BIND(L_check_fill_8_bytes); 9820 addl(count, 8 << shift); 9821 jccb(Assembler::zero, L_exit); 9822 jmpb(L_fill_8_bytes); 9823 9824 // 9825 // length is too short, just fill qwords 9826 // 9827 BIND(L_fill_8_bytes_loop); 9828 movq(Address(to, 0), xtmp); 9829 addptr(to, 8); 9830 BIND(L_fill_8_bytes); 9831 subl(count, 1 << (shift + 1)); 9832 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 9833 } 9834 } 9835 // fill trailing 4 bytes 9836 BIND(L_fill_4_bytes); 9837 testl(count, 1<<shift); 9838 jccb(Assembler::zero, L_fill_2_bytes); 9839 movl(Address(to, 0), value); 9840 if (t == T_BYTE || t == T_SHORT) { 9841 addptr(to, 4); 9842 BIND(L_fill_2_bytes); 9843 // fill trailing 2 bytes 9844 testl(count, 1<<(shift-1)); 9845 jccb(Assembler::zero, L_fill_byte); 9846 movw(Address(to, 0), value); 9847 if (t == T_BYTE) { 9848 addptr(to, 2); 9849 BIND(L_fill_byte); 9850 // fill trailing byte 9851 testl(count, 1); 9852 jccb(Assembler::zero, L_exit); 9853 movb(Address(to, 0), value); 9854 } else { 9855 BIND(L_fill_byte); 9856 } 9857 } else { 9858 BIND(L_fill_2_bytes); 9859 } 9860 BIND(L_exit); 9861 } 9862 #undef BIND 9863 #undef BLOCK_COMMENT 9864 9865 9866 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 9867 switch (cond) { 9868 // Note some conditions are synonyms for others 9869 case Assembler::zero: return Assembler::notZero; 9870 case Assembler::notZero: return Assembler::zero; 9871 case Assembler::less: return Assembler::greaterEqual; 9872 case Assembler::lessEqual: return Assembler::greater; 9873 case Assembler::greater: return Assembler::lessEqual; 9874 case Assembler::greaterEqual: return Assembler::less; 9875 case Assembler::below: return Assembler::aboveEqual; 9876 case Assembler::belowEqual: return Assembler::above; 9877 case Assembler::above: return Assembler::belowEqual; 9878 case Assembler::aboveEqual: return Assembler::below; 9879 case Assembler::overflow: return Assembler::noOverflow; 9880 case Assembler::noOverflow: return Assembler::overflow; 9881 case Assembler::negative: return Assembler::positive; 9882 case Assembler::positive: return Assembler::negative; 9883 case Assembler::parity: return Assembler::noParity; 9884 case Assembler::noParity: return Assembler::parity; 9885 } 9886 ShouldNotReachHere(); return Assembler::overflow; 9887 } 9888 9889 SkipIfEqual::SkipIfEqual( 9890 MacroAssembler* masm, const bool* flag_addr, bool value) { 9891 _masm = masm; 9892 _masm->cmp8(ExternalAddress((address)flag_addr), value); 9893 _masm->jcc(Assembler::equal, _label); 9894 } 9895 9896 SkipIfEqual::~SkipIfEqual() { 9897 _masm->bind(_label); 9898 }