1 /* 2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "assembler_x86.inline.hpp" 27 #include "gc_interface/collectedHeap.inline.hpp" 28 #include "interpreter/interpreter.hpp" 29 #include "memory/cardTableModRefBS.hpp" 30 #include "memory/resourceArea.hpp" 31 #include "prims/methodHandles.hpp" 32 #include "runtime/biasedLocking.hpp" 33 #include "runtime/interfaceSupport.hpp" 34 #include "runtime/objectMonitor.hpp" 35 #include "runtime/os.hpp" 36 #include "runtime/sharedRuntime.hpp" 37 #include "runtime/stubRoutines.hpp" 38 #ifndef SERIALGC 39 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 40 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 41 #include "gc_implementation/g1/heapRegion.hpp" 42 #endif 43 44 // Implementation of AddressLiteral 45 46 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { 47 _is_lval = false; 48 _target = target; 49 switch (rtype) { 50 case relocInfo::oop_type: 51 // Oops are a special case. Normally they would be their own section 52 // but in cases like icBuffer they are literals in the code stream that 53 // we don't have a section for. We use none so that we get a literal address 54 // which is always patchable. 55 break; 56 case relocInfo::external_word_type: 57 _rspec = external_word_Relocation::spec(target); 58 break; 59 case relocInfo::internal_word_type: 60 _rspec = internal_word_Relocation::spec(target); 61 break; 62 case relocInfo::opt_virtual_call_type: 63 _rspec = opt_virtual_call_Relocation::spec(); 64 break; 65 case relocInfo::static_call_type: 66 _rspec = static_call_Relocation::spec(); 67 break; 68 case relocInfo::runtime_call_type: 69 _rspec = runtime_call_Relocation::spec(); 70 break; 71 case relocInfo::poll_type: 72 case relocInfo::poll_return_type: 73 _rspec = Relocation::spec_simple(rtype); 74 break; 75 case relocInfo::none: 76 break; 77 default: 78 ShouldNotReachHere(); 79 break; 80 } 81 } 82 83 // Implementation of Address 84 85 #ifdef _LP64 86 87 Address Address::make_array(ArrayAddress adr) { 88 // Not implementable on 64bit machines 89 // Should have been handled higher up the call chain. 90 ShouldNotReachHere(); 91 return Address(); 92 } 93 94 // exceedingly dangerous constructor 95 Address::Address(int disp, address loc, relocInfo::relocType rtype) { 96 _base = noreg; 97 _index = noreg; 98 _scale = no_scale; 99 _disp = disp; 100 switch (rtype) { 101 case relocInfo::external_word_type: 102 _rspec = external_word_Relocation::spec(loc); 103 break; 104 case relocInfo::internal_word_type: 105 _rspec = internal_word_Relocation::spec(loc); 106 break; 107 case relocInfo::runtime_call_type: 108 // HMM 109 _rspec = runtime_call_Relocation::spec(); 110 break; 111 case relocInfo::poll_type: 112 case relocInfo::poll_return_type: 113 _rspec = Relocation::spec_simple(rtype); 114 break; 115 case relocInfo::none: 116 break; 117 default: 118 ShouldNotReachHere(); 119 } 120 } 121 #else // LP64 122 123 Address Address::make_array(ArrayAddress adr) { 124 AddressLiteral base = adr.base(); 125 Address index = adr.index(); 126 assert(index._disp == 0, "must not have disp"); // maybe it can? 127 Address array(index._base, index._index, index._scale, (intptr_t) base.target()); 128 array._rspec = base._rspec; 129 return array; 130 } 131 132 // exceedingly dangerous constructor 133 Address::Address(address loc, RelocationHolder spec) { 134 _base = noreg; 135 _index = noreg; 136 _scale = no_scale; 137 _disp = (intptr_t) loc; 138 _rspec = spec; 139 } 140 141 #endif // _LP64 142 143 144 145 // Convert the raw encoding form into the form expected by the constructor for 146 // Address. An index of 4 (rsp) corresponds to having no index, so convert 147 // that to noreg for the Address constructor. 148 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) { 149 RelocationHolder rspec; 150 if (disp_is_oop) { 151 rspec = Relocation::spec_simple(relocInfo::oop_type); 152 } 153 bool valid_index = index != rsp->encoding(); 154 if (valid_index) { 155 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); 156 madr._rspec = rspec; 157 return madr; 158 } else { 159 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); 160 madr._rspec = rspec; 161 return madr; 162 } 163 } 164 165 // Implementation of Assembler 166 167 int AbstractAssembler::code_fill_byte() { 168 return (u_char)'\xF4'; // hlt 169 } 170 171 // make this go away someday 172 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { 173 if (rtype == relocInfo::none) 174 emit_long(data); 175 else emit_data(data, Relocation::spec_simple(rtype), format); 176 } 177 178 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { 179 assert(imm_operand == 0, "default format must be immediate in this file"); 180 assert(inst_mark() != NULL, "must be inside InstructionMark"); 181 if (rspec.type() != relocInfo::none) { 182 #ifdef ASSERT 183 check_relocation(rspec, format); 184 #endif 185 // Do not use AbstractAssembler::relocate, which is not intended for 186 // embedded words. Instead, relocate to the enclosing instruction. 187 188 // hack. call32 is too wide for mask so use disp32 189 if (format == call32_operand) 190 code_section()->relocate(inst_mark(), rspec, disp32_operand); 191 else 192 code_section()->relocate(inst_mark(), rspec, format); 193 } 194 emit_long(data); 195 } 196 197 static int encode(Register r) { 198 int enc = r->encoding(); 199 if (enc >= 8) { 200 enc -= 8; 201 } 202 return enc; 203 } 204 205 static int encode(XMMRegister r) { 206 int enc = r->encoding(); 207 if (enc >= 8) { 208 enc -= 8; 209 } 210 return enc; 211 } 212 213 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { 214 assert(dst->has_byte_register(), "must have byte register"); 215 assert(isByte(op1) && isByte(op2), "wrong opcode"); 216 assert(isByte(imm8), "not a byte"); 217 assert((op1 & 0x01) == 0, "should be 8bit operation"); 218 emit_byte(op1); 219 emit_byte(op2 | encode(dst)); 220 emit_byte(imm8); 221 } 222 223 224 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 225 assert(isByte(op1) && isByte(op2), "wrong opcode"); 226 assert((op1 & 0x01) == 1, "should be 32bit operation"); 227 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 228 if (is8bit(imm32)) { 229 emit_byte(op1 | 0x02); // set sign bit 230 emit_byte(op2 | encode(dst)); 231 emit_byte(imm32 & 0xFF); 232 } else { 233 emit_byte(op1); 234 emit_byte(op2 | encode(dst)); 235 emit_long(imm32); 236 } 237 } 238 239 // immediate-to-memory forms 240 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 241 assert((op1 & 0x01) == 1, "should be 32bit operation"); 242 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 243 if (is8bit(imm32)) { 244 emit_byte(op1 | 0x02); // set sign bit 245 emit_operand(rm, adr, 1); 246 emit_byte(imm32 & 0xFF); 247 } else { 248 emit_byte(op1); 249 emit_operand(rm, adr, 4); 250 emit_long(imm32); 251 } 252 } 253 254 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) { 255 LP64_ONLY(ShouldNotReachHere()); 256 assert(isByte(op1) && isByte(op2), "wrong opcode"); 257 assert((op1 & 0x01) == 1, "should be 32bit operation"); 258 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 259 InstructionMark im(this); 260 emit_byte(op1); 261 emit_byte(op2 | encode(dst)); 262 emit_data((intptr_t)obj, relocInfo::oop_type, 0); 263 } 264 265 266 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { 267 assert(isByte(op1) && isByte(op2), "wrong opcode"); 268 emit_byte(op1); 269 emit_byte(op2 | encode(dst) << 3 | encode(src)); 270 } 271 272 273 void Assembler::emit_operand(Register reg, Register base, Register index, 274 Address::ScaleFactor scale, int disp, 275 RelocationHolder const& rspec, 276 int rip_relative_correction) { 277 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type(); 278 279 // Encode the registers as needed in the fields they are used in 280 281 int regenc = encode(reg) << 3; 282 int indexenc = index->is_valid() ? encode(index) << 3 : 0; 283 int baseenc = base->is_valid() ? encode(base) : 0; 284 285 if (base->is_valid()) { 286 if (index->is_valid()) { 287 assert(scale != Address::no_scale, "inconsistent address"); 288 // [base + index*scale + disp] 289 if (disp == 0 && rtype == relocInfo::none && 290 base != rbp LP64_ONLY(&& base != r13)) { 291 // [base + index*scale] 292 // [00 reg 100][ss index base] 293 assert(index != rsp, "illegal addressing mode"); 294 emit_byte(0x04 | regenc); 295 emit_byte(scale << 6 | indexenc | baseenc); 296 } else if (is8bit(disp) && rtype == relocInfo::none) { 297 // [base + index*scale + imm8] 298 // [01 reg 100][ss index base] imm8 299 assert(index != rsp, "illegal addressing mode"); 300 emit_byte(0x44 | regenc); 301 emit_byte(scale << 6 | indexenc | baseenc); 302 emit_byte(disp & 0xFF); 303 } else { 304 // [base + index*scale + disp32] 305 // [10 reg 100][ss index base] disp32 306 assert(index != rsp, "illegal addressing mode"); 307 emit_byte(0x84 | regenc); 308 emit_byte(scale << 6 | indexenc | baseenc); 309 emit_data(disp, rspec, disp32_operand); 310 } 311 } else if (base == rsp LP64_ONLY(|| base == r12)) { 312 // [rsp + disp] 313 if (disp == 0 && rtype == relocInfo::none) { 314 // [rsp] 315 // [00 reg 100][00 100 100] 316 emit_byte(0x04 | regenc); 317 emit_byte(0x24); 318 } else if (is8bit(disp) && rtype == relocInfo::none) { 319 // [rsp + imm8] 320 // [01 reg 100][00 100 100] disp8 321 emit_byte(0x44 | regenc); 322 emit_byte(0x24); 323 emit_byte(disp & 0xFF); 324 } else { 325 // [rsp + imm32] 326 // [10 reg 100][00 100 100] disp32 327 emit_byte(0x84 | regenc); 328 emit_byte(0x24); 329 emit_data(disp, rspec, disp32_operand); 330 } 331 } else { 332 // [base + disp] 333 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode"); 334 if (disp == 0 && rtype == relocInfo::none && 335 base != rbp LP64_ONLY(&& base != r13)) { 336 // [base] 337 // [00 reg base] 338 emit_byte(0x00 | regenc | baseenc); 339 } else if (is8bit(disp) && rtype == relocInfo::none) { 340 // [base + disp8] 341 // [01 reg base] disp8 342 emit_byte(0x40 | regenc | baseenc); 343 emit_byte(disp & 0xFF); 344 } else { 345 // [base + disp32] 346 // [10 reg base] disp32 347 emit_byte(0x80 | regenc | baseenc); 348 emit_data(disp, rspec, disp32_operand); 349 } 350 } 351 } else { 352 if (index->is_valid()) { 353 assert(scale != Address::no_scale, "inconsistent address"); 354 // [index*scale + disp] 355 // [00 reg 100][ss index 101] disp32 356 assert(index != rsp, "illegal addressing mode"); 357 emit_byte(0x04 | regenc); 358 emit_byte(scale << 6 | indexenc | 0x05); 359 emit_data(disp, rspec, disp32_operand); 360 } else if (rtype != relocInfo::none ) { 361 // [disp] (64bit) RIP-RELATIVE (32bit) abs 362 // [00 000 101] disp32 363 364 emit_byte(0x05 | regenc); 365 // Note that the RIP-rel. correction applies to the generated 366 // disp field, but _not_ to the target address in the rspec. 367 368 // disp was created by converting the target address minus the pc 369 // at the start of the instruction. That needs more correction here. 370 // intptr_t disp = target - next_ip; 371 assert(inst_mark() != NULL, "must be inside InstructionMark"); 372 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction; 373 int64_t adjusted = disp; 374 // Do rip-rel adjustment for 64bit 375 LP64_ONLY(adjusted -= (next_ip - inst_mark())); 376 assert(is_simm32(adjusted), 377 "must be 32bit offset (RIP relative address)"); 378 emit_data((int32_t) adjusted, rspec, disp32_operand); 379 380 } else { 381 // 32bit never did this, did everything as the rip-rel/disp code above 382 // [disp] ABSOLUTE 383 // [00 reg 100][00 100 101] disp32 384 emit_byte(0x04 | regenc); 385 emit_byte(0x25); 386 emit_data(disp, rspec, disp32_operand); 387 } 388 } 389 } 390 391 void Assembler::emit_operand(XMMRegister reg, Register base, Register index, 392 Address::ScaleFactor scale, int disp, 393 RelocationHolder const& rspec) { 394 emit_operand((Register)reg, base, index, scale, disp, rspec); 395 } 396 397 // Secret local extension to Assembler::WhichOperand: 398 #define end_pc_operand (_WhichOperand_limit) 399 400 address Assembler::locate_operand(address inst, WhichOperand which) { 401 // Decode the given instruction, and return the address of 402 // an embedded 32-bit operand word. 403 404 // If "which" is disp32_operand, selects the displacement portion 405 // of an effective address specifier. 406 // If "which" is imm64_operand, selects the trailing immediate constant. 407 // If "which" is call32_operand, selects the displacement of a call or jump. 408 // Caller is responsible for ensuring that there is such an operand, 409 // and that it is 32/64 bits wide. 410 411 // If "which" is end_pc_operand, find the end of the instruction. 412 413 address ip = inst; 414 bool is_64bit = false; 415 416 debug_only(bool has_disp32 = false); 417 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn 418 419 again_after_prefix: 420 switch (0xFF & *ip++) { 421 422 // These convenience macros generate groups of "case" labels for the switch. 423 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3 424 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \ 425 case (x)+4: case (x)+5: case (x)+6: case (x)+7 426 #define REP16(x) REP8((x)+0): \ 427 case REP8((x)+8) 428 429 case CS_segment: 430 case SS_segment: 431 case DS_segment: 432 case ES_segment: 433 case FS_segment: 434 case GS_segment: 435 // Seems dubious 436 LP64_ONLY(assert(false, "shouldn't have that prefix")); 437 assert(ip == inst+1, "only one prefix allowed"); 438 goto again_after_prefix; 439 440 case 0x67: 441 case REX: 442 case REX_B: 443 case REX_X: 444 case REX_XB: 445 case REX_R: 446 case REX_RB: 447 case REX_RX: 448 case REX_RXB: 449 NOT_LP64(assert(false, "64bit prefixes")); 450 goto again_after_prefix; 451 452 case REX_W: 453 case REX_WB: 454 case REX_WX: 455 case REX_WXB: 456 case REX_WR: 457 case REX_WRB: 458 case REX_WRX: 459 case REX_WRXB: 460 NOT_LP64(assert(false, "64bit prefixes")); 461 is_64bit = true; 462 goto again_after_prefix; 463 464 case 0xFF: // pushq a; decl a; incl a; call a; jmp a 465 case 0x88: // movb a, r 466 case 0x89: // movl a, r 467 case 0x8A: // movb r, a 468 case 0x8B: // movl r, a 469 case 0x8F: // popl a 470 debug_only(has_disp32 = true); 471 break; 472 473 case 0x68: // pushq #32 474 if (which == end_pc_operand) { 475 return ip + 4; 476 } 477 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate"); 478 return ip; // not produced by emit_operand 479 480 case 0x66: // movw ... (size prefix) 481 again_after_size_prefix2: 482 switch (0xFF & *ip++) { 483 case REX: 484 case REX_B: 485 case REX_X: 486 case REX_XB: 487 case REX_R: 488 case REX_RB: 489 case REX_RX: 490 case REX_RXB: 491 case REX_W: 492 case REX_WB: 493 case REX_WX: 494 case REX_WXB: 495 case REX_WR: 496 case REX_WRB: 497 case REX_WRX: 498 case REX_WRXB: 499 NOT_LP64(assert(false, "64bit prefix found")); 500 goto again_after_size_prefix2; 501 case 0x8B: // movw r, a 502 case 0x89: // movw a, r 503 debug_only(has_disp32 = true); 504 break; 505 case 0xC7: // movw a, #16 506 debug_only(has_disp32 = true); 507 tail_size = 2; // the imm16 508 break; 509 case 0x0F: // several SSE/SSE2 variants 510 ip--; // reparse the 0x0F 511 goto again_after_prefix; 512 default: 513 ShouldNotReachHere(); 514 } 515 break; 516 517 case REP8(0xB8): // movl/q r, #32/#64(oop?) 518 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4); 519 // these asserts are somewhat nonsensical 520 #ifndef _LP64 521 assert(which == imm_operand || which == disp32_operand, ""); 522 #else 523 assert((which == call32_operand || which == imm_operand) && is_64bit || 524 which == narrow_oop_operand && !is_64bit, ""); 525 #endif // _LP64 526 return ip; 527 528 case 0x69: // imul r, a, #32 529 case 0xC7: // movl a, #32(oop?) 530 tail_size = 4; 531 debug_only(has_disp32 = true); // has both kinds of operands! 532 break; 533 534 case 0x0F: // movx..., etc. 535 switch (0xFF & *ip++) { 536 case 0x3A: // pcmpestri 537 tail_size = 1; 538 case 0x38: // ptest, pmovzxbw 539 ip++; // skip opcode 540 debug_only(has_disp32 = true); // has both kinds of operands! 541 break; 542 543 case 0x70: // pshufd r, r/a, #8 544 debug_only(has_disp32 = true); // has both kinds of operands! 545 case 0x73: // psrldq r, #8 546 tail_size = 1; 547 break; 548 549 case 0x12: // movlps 550 case 0x28: // movaps 551 case 0x2E: // ucomiss 552 case 0x2F: // comiss 553 case 0x54: // andps 554 case 0x55: // andnps 555 case 0x56: // orps 556 case 0x57: // xorps 557 case 0x6E: // movd 558 case 0x7E: // movd 559 case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush 560 debug_only(has_disp32 = true); 561 break; 562 563 case 0xAD: // shrd r, a, %cl 564 case 0xAF: // imul r, a 565 case 0xBE: // movsbl r, a (movsxb) 566 case 0xBF: // movswl r, a (movsxw) 567 case 0xB6: // movzbl r, a (movzxb) 568 case 0xB7: // movzwl r, a (movzxw) 569 case REP16(0x40): // cmovl cc, r, a 570 case 0xB0: // cmpxchgb 571 case 0xB1: // cmpxchg 572 case 0xC1: // xaddl 573 case 0xC7: // cmpxchg8 574 case REP16(0x90): // setcc a 575 debug_only(has_disp32 = true); 576 // fall out of the switch to decode the address 577 break; 578 579 case 0xC4: // pinsrw r, a, #8 580 debug_only(has_disp32 = true); 581 case 0xC5: // pextrw r, r, #8 582 tail_size = 1; // the imm8 583 break; 584 585 case 0xAC: // shrd r, a, #8 586 debug_only(has_disp32 = true); 587 tail_size = 1; // the imm8 588 break; 589 590 case REP16(0x80): // jcc rdisp32 591 if (which == end_pc_operand) return ip + 4; 592 assert(which == call32_operand, "jcc has no disp32 or imm"); 593 return ip; 594 default: 595 ShouldNotReachHere(); 596 } 597 break; 598 599 case 0x81: // addl a, #32; addl r, #32 600 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 601 // on 32bit in the case of cmpl, the imm might be an oop 602 tail_size = 4; 603 debug_only(has_disp32 = true); // has both kinds of operands! 604 break; 605 606 case 0x83: // addl a, #8; addl r, #8 607 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 608 debug_only(has_disp32 = true); // has both kinds of operands! 609 tail_size = 1; 610 break; 611 612 case 0x9B: 613 switch (0xFF & *ip++) { 614 case 0xD9: // fnstcw a 615 debug_only(has_disp32 = true); 616 break; 617 default: 618 ShouldNotReachHere(); 619 } 620 break; 621 622 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a 623 case REP4(0x10): // adc... 624 case REP4(0x20): // and... 625 case REP4(0x30): // xor... 626 case REP4(0x08): // or... 627 case REP4(0x18): // sbb... 628 case REP4(0x28): // sub... 629 case 0xF7: // mull a 630 case 0x8D: // lea r, a 631 case 0x87: // xchg r, a 632 case REP4(0x38): // cmp... 633 case 0x85: // test r, a 634 debug_only(has_disp32 = true); // has both kinds of operands! 635 break; 636 637 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8 638 case 0xC6: // movb a, #8 639 case 0x80: // cmpb a, #8 640 case 0x6B: // imul r, a, #8 641 debug_only(has_disp32 = true); // has both kinds of operands! 642 tail_size = 1; // the imm8 643 break; 644 645 case 0xC4: // VEX_3bytes 646 case 0xC5: // VEX_2bytes 647 assert((UseAVX > 0), "shouldn't have VEX prefix"); 648 assert(ip == inst+1, "no prefixes allowed"); 649 // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions 650 // but they have prefix 0x0F and processed when 0x0F processed above. 651 // 652 // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES 653 // instructions (these instructions are not supported in 64-bit mode). 654 // To distinguish them bits [7:6] are set in the VEX second byte since 655 // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set 656 // those VEX bits REX and vvvv bits are inverted. 657 // 658 // Fortunately C2 doesn't generate these instructions so we don't need 659 // to check for them in product version. 660 661 // Check second byte 662 NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions")); 663 664 // First byte 665 if ((0xFF & *inst) == VEX_3bytes) { 666 ip++; // third byte 667 is_64bit = ((VEX_W & *ip) == VEX_W); 668 } 669 ip++; // opcode 670 // To find the end of instruction (which == end_pc_operand). 671 switch (0xFF & *ip) { 672 case 0x61: // pcmpestri r, r/a, #8 673 case 0x70: // pshufd r, r/a, #8 674 case 0x73: // psrldq r, #8 675 tail_size = 1; // the imm8 676 break; 677 default: 678 break; 679 } 680 ip++; // skip opcode 681 debug_only(has_disp32 = true); // has both kinds of operands! 682 break; 683 684 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 685 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 686 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a 687 case 0xDD: // fld_d a; fst_d a; fstp_d a 688 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a 689 case 0xDF: // fild_d a; fistp_d a 690 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a 691 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a 692 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a 693 debug_only(has_disp32 = true); 694 break; 695 696 case 0xE8: // call rdisp32 697 case 0xE9: // jmp rdisp32 698 if (which == end_pc_operand) return ip + 4; 699 assert(which == call32_operand, "call has no disp32 or imm"); 700 return ip; 701 702 case 0xF0: // Lock 703 assert(os::is_MP(), "only on MP"); 704 goto again_after_prefix; 705 706 case 0xF3: // For SSE 707 case 0xF2: // For SSE2 708 switch (0xFF & *ip++) { 709 case REX: 710 case REX_B: 711 case REX_X: 712 case REX_XB: 713 case REX_R: 714 case REX_RB: 715 case REX_RX: 716 case REX_RXB: 717 case REX_W: 718 case REX_WB: 719 case REX_WX: 720 case REX_WXB: 721 case REX_WR: 722 case REX_WRB: 723 case REX_WRX: 724 case REX_WRXB: 725 NOT_LP64(assert(false, "found 64bit prefix")); 726 ip++; 727 default: 728 ip++; 729 } 730 debug_only(has_disp32 = true); // has both kinds of operands! 731 break; 732 733 default: 734 ShouldNotReachHere(); 735 736 #undef REP8 737 #undef REP16 738 } 739 740 assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); 741 #ifdef _LP64 742 assert(which != imm_operand, "instruction is not a movq reg, imm64"); 743 #else 744 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field"); 745 assert(which != imm_operand || has_disp32, "instruction has no imm32 field"); 746 #endif // LP64 747 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field"); 748 749 // parse the output of emit_operand 750 int op2 = 0xFF & *ip++; 751 int base = op2 & 0x07; 752 int op3 = -1; 753 const int b100 = 4; 754 const int b101 = 5; 755 if (base == b100 && (op2 >> 6) != 3) { 756 op3 = 0xFF & *ip++; 757 base = op3 & 0x07; // refetch the base 758 } 759 // now ip points at the disp (if any) 760 761 switch (op2 >> 6) { 762 case 0: 763 // [00 reg 100][ss index base] 764 // [00 reg 100][00 100 esp] 765 // [00 reg base] 766 // [00 reg 100][ss index 101][disp32] 767 // [00 reg 101] [disp32] 768 769 if (base == b101) { 770 if (which == disp32_operand) 771 return ip; // caller wants the disp32 772 ip += 4; // skip the disp32 773 } 774 break; 775 776 case 1: 777 // [01 reg 100][ss index base][disp8] 778 // [01 reg 100][00 100 esp][disp8] 779 // [01 reg base] [disp8] 780 ip += 1; // skip the disp8 781 break; 782 783 case 2: 784 // [10 reg 100][ss index base][disp32] 785 // [10 reg 100][00 100 esp][disp32] 786 // [10 reg base] [disp32] 787 if (which == disp32_operand) 788 return ip; // caller wants the disp32 789 ip += 4; // skip the disp32 790 break; 791 792 case 3: 793 // [11 reg base] (not a memory addressing mode) 794 break; 795 } 796 797 if (which == end_pc_operand) { 798 return ip + tail_size; 799 } 800 801 #ifdef _LP64 802 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32"); 803 #else 804 assert(which == imm_operand, "instruction has only an imm field"); 805 #endif // LP64 806 return ip; 807 } 808 809 address Assembler::locate_next_instruction(address inst) { 810 // Secretly share code with locate_operand: 811 return locate_operand(inst, end_pc_operand); 812 } 813 814 815 #ifdef ASSERT 816 void Assembler::check_relocation(RelocationHolder const& rspec, int format) { 817 address inst = inst_mark(); 818 assert(inst != NULL && inst < pc(), "must point to beginning of instruction"); 819 address opnd; 820 821 Relocation* r = rspec.reloc(); 822 if (r->type() == relocInfo::none) { 823 return; 824 } else if (r->is_call() || format == call32_operand) { 825 // assert(format == imm32_operand, "cannot specify a nonzero format"); 826 opnd = locate_operand(inst, call32_operand); 827 } else if (r->is_data()) { 828 assert(format == imm_operand || format == disp32_operand 829 LP64_ONLY(|| format == narrow_oop_operand), "format ok"); 830 opnd = locate_operand(inst, (WhichOperand)format); 831 } else { 832 assert(format == imm_operand, "cannot specify a format"); 833 return; 834 } 835 assert(opnd == pc(), "must put operand where relocs can find it"); 836 } 837 #endif // ASSERT 838 839 void Assembler::emit_operand32(Register reg, Address adr) { 840 assert(reg->encoding() < 8, "no extended registers"); 841 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 842 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 843 adr._rspec); 844 } 845 846 void Assembler::emit_operand(Register reg, Address adr, 847 int rip_relative_correction) { 848 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 849 adr._rspec, 850 rip_relative_correction); 851 } 852 853 void Assembler::emit_operand(XMMRegister reg, Address adr) { 854 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 855 adr._rspec); 856 } 857 858 // MMX operations 859 void Assembler::emit_operand(MMXRegister reg, Address adr) { 860 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 861 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 862 } 863 864 // work around gcc (3.2.1-7a) bug 865 void Assembler::emit_operand(Address adr, MMXRegister reg) { 866 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 867 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 868 } 869 870 871 void Assembler::emit_farith(int b1, int b2, int i) { 872 assert(isByte(b1) && isByte(b2), "wrong opcode"); 873 assert(0 <= i && i < 8, "illegal stack offset"); 874 emit_byte(b1); 875 emit_byte(b2 + i); 876 } 877 878 879 // Now the Assembler instructions (identical for 32/64 bits) 880 881 void Assembler::adcl(Address dst, int32_t imm32) { 882 InstructionMark im(this); 883 prefix(dst); 884 emit_arith_operand(0x81, rdx, dst, imm32); 885 } 886 887 void Assembler::adcl(Address dst, Register src) { 888 InstructionMark im(this); 889 prefix(dst, src); 890 emit_byte(0x11); 891 emit_operand(src, dst); 892 } 893 894 void Assembler::adcl(Register dst, int32_t imm32) { 895 prefix(dst); 896 emit_arith(0x81, 0xD0, dst, imm32); 897 } 898 899 void Assembler::adcl(Register dst, Address src) { 900 InstructionMark im(this); 901 prefix(src, dst); 902 emit_byte(0x13); 903 emit_operand(dst, src); 904 } 905 906 void Assembler::adcl(Register dst, Register src) { 907 (void) prefix_and_encode(dst->encoding(), src->encoding()); 908 emit_arith(0x13, 0xC0, dst, src); 909 } 910 911 void Assembler::addl(Address dst, int32_t imm32) { 912 InstructionMark im(this); 913 prefix(dst); 914 emit_arith_operand(0x81, rax, dst, imm32); 915 } 916 917 void Assembler::addl(Address dst, Register src) { 918 InstructionMark im(this); 919 prefix(dst, src); 920 emit_byte(0x01); 921 emit_operand(src, dst); 922 } 923 924 void Assembler::addl(Register dst, int32_t imm32) { 925 prefix(dst); 926 emit_arith(0x81, 0xC0, dst, imm32); 927 } 928 929 void Assembler::addl(Register dst, Address src) { 930 InstructionMark im(this); 931 prefix(src, dst); 932 emit_byte(0x03); 933 emit_operand(dst, src); 934 } 935 936 void Assembler::addl(Register dst, Register src) { 937 (void) prefix_and_encode(dst->encoding(), src->encoding()); 938 emit_arith(0x03, 0xC0, dst, src); 939 } 940 941 void Assembler::addr_nop_4() { 942 // 4 bytes: NOP DWORD PTR [EAX+0] 943 emit_byte(0x0F); 944 emit_byte(0x1F); 945 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 946 emit_byte(0); // 8-bits offset (1 byte) 947 } 948 949 void Assembler::addr_nop_5() { 950 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 951 emit_byte(0x0F); 952 emit_byte(0x1F); 953 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 954 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 955 emit_byte(0); // 8-bits offset (1 byte) 956 } 957 958 void Assembler::addr_nop_7() { 959 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 960 emit_byte(0x0F); 961 emit_byte(0x1F); 962 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 963 emit_long(0); // 32-bits offset (4 bytes) 964 } 965 966 void Assembler::addr_nop_8() { 967 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 968 emit_byte(0x0F); 969 emit_byte(0x1F); 970 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 971 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 972 emit_long(0); // 32-bits offset (4 bytes) 973 } 974 975 void Assembler::addsd(XMMRegister dst, XMMRegister src) { 976 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 977 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 978 emit_byte(0x58); 979 emit_byte(0xC0 | encode); 980 } 981 982 void Assembler::addsd(XMMRegister dst, Address src) { 983 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 984 InstructionMark im(this); 985 simd_prefix(dst, dst, src, VEX_SIMD_F2); 986 emit_byte(0x58); 987 emit_operand(dst, src); 988 } 989 990 void Assembler::addss(XMMRegister dst, XMMRegister src) { 991 NOT_LP64(assert(VM_Version::supports_sse(), "")); 992 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 993 emit_byte(0x58); 994 emit_byte(0xC0 | encode); 995 } 996 997 void Assembler::addss(XMMRegister dst, Address src) { 998 NOT_LP64(assert(VM_Version::supports_sse(), "")); 999 InstructionMark im(this); 1000 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1001 emit_byte(0x58); 1002 emit_operand(dst, src); 1003 } 1004 1005 void Assembler::andl(Address dst, int32_t imm32) { 1006 InstructionMark im(this); 1007 prefix(dst); 1008 emit_byte(0x81); 1009 emit_operand(rsp, dst, 4); 1010 emit_long(imm32); 1011 } 1012 1013 void Assembler::andl(Register dst, int32_t imm32) { 1014 prefix(dst); 1015 emit_arith(0x81, 0xE0, dst, imm32); 1016 } 1017 1018 void Assembler::andl(Register dst, Address src) { 1019 InstructionMark im(this); 1020 prefix(src, dst); 1021 emit_byte(0x23); 1022 emit_operand(dst, src); 1023 } 1024 1025 void Assembler::andl(Register dst, Register src) { 1026 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1027 emit_arith(0x23, 0xC0, dst, src); 1028 } 1029 1030 void Assembler::andpd(XMMRegister dst, Address src) { 1031 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1032 InstructionMark im(this); 1033 simd_prefix(dst, dst, src, VEX_SIMD_66); 1034 emit_byte(0x54); 1035 emit_operand(dst, src); 1036 } 1037 1038 void Assembler::andpd(XMMRegister dst, XMMRegister src) { 1039 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1040 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 1041 emit_byte(0x54); 1042 emit_byte(0xC0 | encode); 1043 } 1044 1045 void Assembler::andps(XMMRegister dst, Address src) { 1046 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1047 InstructionMark im(this); 1048 simd_prefix(dst, dst, src, VEX_SIMD_NONE); 1049 emit_byte(0x54); 1050 emit_operand(dst, src); 1051 } 1052 1053 void Assembler::andps(XMMRegister dst, XMMRegister src) { 1054 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1055 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); 1056 emit_byte(0x54); 1057 emit_byte(0xC0 | encode); 1058 } 1059 1060 void Assembler::bsfl(Register dst, Register src) { 1061 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1062 emit_byte(0x0F); 1063 emit_byte(0xBC); 1064 emit_byte(0xC0 | encode); 1065 } 1066 1067 void Assembler::bsrl(Register dst, Register src) { 1068 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 1069 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1070 emit_byte(0x0F); 1071 emit_byte(0xBD); 1072 emit_byte(0xC0 | encode); 1073 } 1074 1075 void Assembler::bswapl(Register reg) { // bswap 1076 int encode = prefix_and_encode(reg->encoding()); 1077 emit_byte(0x0F); 1078 emit_byte(0xC8 | encode); 1079 } 1080 1081 void Assembler::call(Label& L, relocInfo::relocType rtype) { 1082 // suspect disp32 is always good 1083 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); 1084 1085 if (L.is_bound()) { 1086 const int long_size = 5; 1087 int offs = (int)( target(L) - pc() ); 1088 assert(offs <= 0, "assembler error"); 1089 InstructionMark im(this); 1090 // 1110 1000 #32-bit disp 1091 emit_byte(0xE8); 1092 emit_data(offs - long_size, rtype, operand); 1093 } else { 1094 InstructionMark im(this); 1095 // 1110 1000 #32-bit disp 1096 L.add_patch_at(code(), locator()); 1097 1098 emit_byte(0xE8); 1099 emit_data(int(0), rtype, operand); 1100 } 1101 } 1102 1103 void Assembler::call(Register dst) { 1104 int encode = prefix_and_encode(dst->encoding()); 1105 emit_byte(0xFF); 1106 emit_byte(0xD0 | encode); 1107 } 1108 1109 1110 void Assembler::call(Address adr) { 1111 InstructionMark im(this); 1112 prefix(adr); 1113 emit_byte(0xFF); 1114 emit_operand(rdx, adr); 1115 } 1116 1117 void Assembler::call_literal(address entry, RelocationHolder const& rspec) { 1118 assert(entry != NULL, "call most probably wrong"); 1119 InstructionMark im(this); 1120 emit_byte(0xE8); 1121 intptr_t disp = entry - (_code_pos + sizeof(int32_t)); 1122 assert(is_simm32(disp), "must be 32bit offset (call2)"); 1123 // Technically, should use call32_operand, but this format is 1124 // implied by the fact that we're emitting a call instruction. 1125 1126 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand); 1127 emit_data((int) disp, rspec, operand); 1128 } 1129 1130 void Assembler::cdql() { 1131 emit_byte(0x99); 1132 } 1133 1134 void Assembler::cmovl(Condition cc, Register dst, Register src) { 1135 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1136 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1137 emit_byte(0x0F); 1138 emit_byte(0x40 | cc); 1139 emit_byte(0xC0 | encode); 1140 } 1141 1142 1143 void Assembler::cmovl(Condition cc, Register dst, Address src) { 1144 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1145 prefix(src, dst); 1146 emit_byte(0x0F); 1147 emit_byte(0x40 | cc); 1148 emit_operand(dst, src); 1149 } 1150 1151 void Assembler::cmpb(Address dst, int imm8) { 1152 InstructionMark im(this); 1153 prefix(dst); 1154 emit_byte(0x80); 1155 emit_operand(rdi, dst, 1); 1156 emit_byte(imm8); 1157 } 1158 1159 void Assembler::cmpl(Address dst, int32_t imm32) { 1160 InstructionMark im(this); 1161 prefix(dst); 1162 emit_byte(0x81); 1163 emit_operand(rdi, dst, 4); 1164 emit_long(imm32); 1165 } 1166 1167 void Assembler::cmpl(Register dst, int32_t imm32) { 1168 prefix(dst); 1169 emit_arith(0x81, 0xF8, dst, imm32); 1170 } 1171 1172 void Assembler::cmpl(Register dst, Register src) { 1173 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1174 emit_arith(0x3B, 0xC0, dst, src); 1175 } 1176 1177 1178 void Assembler::cmpl(Register dst, Address src) { 1179 InstructionMark im(this); 1180 prefix(src, dst); 1181 emit_byte(0x3B); 1182 emit_operand(dst, src); 1183 } 1184 1185 void Assembler::cmpw(Address dst, int imm16) { 1186 InstructionMark im(this); 1187 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers"); 1188 emit_byte(0x66); 1189 emit_byte(0x81); 1190 emit_operand(rdi, dst, 2); 1191 emit_word(imm16); 1192 } 1193 1194 // The 32-bit cmpxchg compares the value at adr with the contents of rax, 1195 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1196 // The ZF is set if the compared values were equal, and cleared otherwise. 1197 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg 1198 if (Atomics & 2) { 1199 // caveat: no instructionmark, so this isn't relocatable. 1200 // Emit a synthetic, non-atomic, CAS equivalent. 1201 // Beware. The synthetic form sets all ICCs, not just ZF. 1202 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r) 1203 cmpl(rax, adr); 1204 movl(rax, adr); 1205 if (reg != rax) { 1206 Label L ; 1207 jcc(Assembler::notEqual, L); 1208 movl(adr, reg); 1209 bind(L); 1210 } 1211 } else { 1212 InstructionMark im(this); 1213 prefix(adr, reg); 1214 emit_byte(0x0F); 1215 emit_byte(0xB1); 1216 emit_operand(reg, adr); 1217 } 1218 } 1219 1220 void Assembler::comisd(XMMRegister dst, Address src) { 1221 // NOTE: dbx seems to decode this as comiss even though the 1222 // 0x66 is there. Strangly ucomisd comes out correct 1223 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1224 InstructionMark im(this); 1225 simd_prefix(dst, src, VEX_SIMD_66); 1226 emit_byte(0x2F); 1227 emit_operand(dst, src); 1228 } 1229 1230 void Assembler::comisd(XMMRegister dst, XMMRegister src) { 1231 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1232 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1233 emit_byte(0x2F); 1234 emit_byte(0xC0 | encode); 1235 } 1236 1237 void Assembler::comiss(XMMRegister dst, Address src) { 1238 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1239 InstructionMark im(this); 1240 simd_prefix(dst, src, VEX_SIMD_NONE); 1241 emit_byte(0x2F); 1242 emit_operand(dst, src); 1243 } 1244 1245 void Assembler::comiss(XMMRegister dst, XMMRegister src) { 1246 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1247 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 1248 emit_byte(0x2F); 1249 emit_byte(0xC0 | encode); 1250 } 1251 1252 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 1253 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1254 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1255 emit_byte(0xE6); 1256 emit_byte(0xC0 | encode); 1257 } 1258 1259 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 1260 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1261 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 1262 emit_byte(0x5B); 1263 emit_byte(0xC0 | encode); 1264 } 1265 1266 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 1267 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1268 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1269 emit_byte(0x5A); 1270 emit_byte(0xC0 | encode); 1271 } 1272 1273 void Assembler::cvtsd2ss(XMMRegister dst, Address src) { 1274 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1275 InstructionMark im(this); 1276 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1277 emit_byte(0x5A); 1278 emit_operand(dst, src); 1279 } 1280 1281 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { 1282 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1283 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1284 emit_byte(0x2A); 1285 emit_byte(0xC0 | encode); 1286 } 1287 1288 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { 1289 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1290 InstructionMark im(this); 1291 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1292 emit_byte(0x2A); 1293 emit_operand(dst, src); 1294 } 1295 1296 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { 1297 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1298 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1299 emit_byte(0x2A); 1300 emit_byte(0xC0 | encode); 1301 } 1302 1303 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { 1304 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1305 InstructionMark im(this); 1306 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1307 emit_byte(0x2A); 1308 emit_operand(dst, src); 1309 } 1310 1311 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { 1312 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1313 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1314 emit_byte(0x5A); 1315 emit_byte(0xC0 | encode); 1316 } 1317 1318 void Assembler::cvtss2sd(XMMRegister dst, Address src) { 1319 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1320 InstructionMark im(this); 1321 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1322 emit_byte(0x5A); 1323 emit_operand(dst, src); 1324 } 1325 1326 1327 void Assembler::cvttsd2sil(Register dst, XMMRegister src) { 1328 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1329 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); 1330 emit_byte(0x2C); 1331 emit_byte(0xC0 | encode); 1332 } 1333 1334 void Assembler::cvttss2sil(Register dst, XMMRegister src) { 1335 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1336 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1337 emit_byte(0x2C); 1338 emit_byte(0xC0 | encode); 1339 } 1340 1341 void Assembler::decl(Address dst) { 1342 // Don't use it directly. Use MacroAssembler::decrement() instead. 1343 InstructionMark im(this); 1344 prefix(dst); 1345 emit_byte(0xFF); 1346 emit_operand(rcx, dst); 1347 } 1348 1349 void Assembler::divsd(XMMRegister dst, Address src) { 1350 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1351 InstructionMark im(this); 1352 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1353 emit_byte(0x5E); 1354 emit_operand(dst, src); 1355 } 1356 1357 void Assembler::divsd(XMMRegister dst, XMMRegister src) { 1358 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1359 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1360 emit_byte(0x5E); 1361 emit_byte(0xC0 | encode); 1362 } 1363 1364 void Assembler::divss(XMMRegister dst, Address src) { 1365 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1366 InstructionMark im(this); 1367 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1368 emit_byte(0x5E); 1369 emit_operand(dst, src); 1370 } 1371 1372 void Assembler::divss(XMMRegister dst, XMMRegister src) { 1373 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1374 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1375 emit_byte(0x5E); 1376 emit_byte(0xC0 | encode); 1377 } 1378 1379 void Assembler::emms() { 1380 NOT_LP64(assert(VM_Version::supports_mmx(), "")); 1381 emit_byte(0x0F); 1382 emit_byte(0x77); 1383 } 1384 1385 void Assembler::hlt() { 1386 emit_byte(0xF4); 1387 } 1388 1389 void Assembler::idivl(Register src) { 1390 int encode = prefix_and_encode(src->encoding()); 1391 emit_byte(0xF7); 1392 emit_byte(0xF8 | encode); 1393 } 1394 1395 void Assembler::divl(Register src) { // Unsigned 1396 int encode = prefix_and_encode(src->encoding()); 1397 emit_byte(0xF7); 1398 emit_byte(0xF0 | encode); 1399 } 1400 1401 void Assembler::imull(Register dst, Register src) { 1402 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1403 emit_byte(0x0F); 1404 emit_byte(0xAF); 1405 emit_byte(0xC0 | encode); 1406 } 1407 1408 1409 void Assembler::imull(Register dst, Register src, int value) { 1410 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1411 if (is8bit(value)) { 1412 emit_byte(0x6B); 1413 emit_byte(0xC0 | encode); 1414 emit_byte(value & 0xFF); 1415 } else { 1416 emit_byte(0x69); 1417 emit_byte(0xC0 | encode); 1418 emit_long(value); 1419 } 1420 } 1421 1422 void Assembler::incl(Address dst) { 1423 // Don't use it directly. Use MacroAssembler::increment() instead. 1424 InstructionMark im(this); 1425 prefix(dst); 1426 emit_byte(0xFF); 1427 emit_operand(rax, dst); 1428 } 1429 1430 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) { 1431 InstructionMark im(this); 1432 assert((0 <= cc) && (cc < 16), "illegal cc"); 1433 if (L.is_bound()) { 1434 address dst = target(L); 1435 assert(dst != NULL, "jcc most probably wrong"); 1436 1437 const int short_size = 2; 1438 const int long_size = 6; 1439 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos; 1440 if (maybe_short && is8bit(offs - short_size)) { 1441 // 0111 tttn #8-bit disp 1442 emit_byte(0x70 | cc); 1443 emit_byte((offs - short_size) & 0xFF); 1444 } else { 1445 // 0000 1111 1000 tttn #32-bit disp 1446 assert(is_simm32(offs - long_size), 1447 "must be 32bit offset (call4)"); 1448 emit_byte(0x0F); 1449 emit_byte(0x80 | cc); 1450 emit_long(offs - long_size); 1451 } 1452 } else { 1453 // Note: could eliminate cond. jumps to this jump if condition 1454 // is the same however, seems to be rather unlikely case. 1455 // Note: use jccb() if label to be bound is very close to get 1456 // an 8-bit displacement 1457 L.add_patch_at(code(), locator()); 1458 emit_byte(0x0F); 1459 emit_byte(0x80 | cc); 1460 emit_long(0); 1461 } 1462 } 1463 1464 void Assembler::jccb(Condition cc, Label& L) { 1465 if (L.is_bound()) { 1466 const int short_size = 2; 1467 address entry = target(L); 1468 assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)), 1469 "Dispacement too large for a short jmp"); 1470 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; 1471 // 0111 tttn #8-bit disp 1472 emit_byte(0x70 | cc); 1473 emit_byte((offs - short_size) & 0xFF); 1474 } else { 1475 InstructionMark im(this); 1476 L.add_patch_at(code(), locator()); 1477 emit_byte(0x70 | cc); 1478 emit_byte(0); 1479 } 1480 } 1481 1482 void Assembler::jmp(Address adr) { 1483 InstructionMark im(this); 1484 prefix(adr); 1485 emit_byte(0xFF); 1486 emit_operand(rsp, adr); 1487 } 1488 1489 void Assembler::jmp(Label& L, bool maybe_short) { 1490 if (L.is_bound()) { 1491 address entry = target(L); 1492 assert(entry != NULL, "jmp most probably wrong"); 1493 InstructionMark im(this); 1494 const int short_size = 2; 1495 const int long_size = 5; 1496 intptr_t offs = entry - _code_pos; 1497 if (maybe_short && is8bit(offs - short_size)) { 1498 emit_byte(0xEB); 1499 emit_byte((offs - short_size) & 0xFF); 1500 } else { 1501 emit_byte(0xE9); 1502 emit_long(offs - long_size); 1503 } 1504 } else { 1505 // By default, forward jumps are always 32-bit displacements, since 1506 // we can't yet know where the label will be bound. If you're sure that 1507 // the forward jump will not run beyond 256 bytes, use jmpb to 1508 // force an 8-bit displacement. 1509 InstructionMark im(this); 1510 L.add_patch_at(code(), locator()); 1511 emit_byte(0xE9); 1512 emit_long(0); 1513 } 1514 } 1515 1516 void Assembler::jmp(Register entry) { 1517 int encode = prefix_and_encode(entry->encoding()); 1518 emit_byte(0xFF); 1519 emit_byte(0xE0 | encode); 1520 } 1521 1522 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { 1523 InstructionMark im(this); 1524 emit_byte(0xE9); 1525 assert(dest != NULL, "must have a target"); 1526 intptr_t disp = dest - (_code_pos + sizeof(int32_t)); 1527 assert(is_simm32(disp), "must be 32bit offset (jmp)"); 1528 emit_data(disp, rspec.reloc(), call32_operand); 1529 } 1530 1531 void Assembler::jmpb(Label& L) { 1532 if (L.is_bound()) { 1533 const int short_size = 2; 1534 address entry = target(L); 1535 assert(is8bit((entry - _code_pos) + short_size), 1536 "Dispacement too large for a short jmp"); 1537 assert(entry != NULL, "jmp most probably wrong"); 1538 intptr_t offs = entry - _code_pos; 1539 emit_byte(0xEB); 1540 emit_byte((offs - short_size) & 0xFF); 1541 } else { 1542 InstructionMark im(this); 1543 L.add_patch_at(code(), locator()); 1544 emit_byte(0xEB); 1545 emit_byte(0); 1546 } 1547 } 1548 1549 void Assembler::ldmxcsr( Address src) { 1550 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1551 InstructionMark im(this); 1552 prefix(src); 1553 emit_byte(0x0F); 1554 emit_byte(0xAE); 1555 emit_operand(as_Register(2), src); 1556 } 1557 1558 void Assembler::leal(Register dst, Address src) { 1559 InstructionMark im(this); 1560 #ifdef _LP64 1561 emit_byte(0x67); // addr32 1562 prefix(src, dst); 1563 #endif // LP64 1564 emit_byte(0x8D); 1565 emit_operand(dst, src); 1566 } 1567 1568 void Assembler::lock() { 1569 if (Atomics & 1) { 1570 // Emit either nothing, a NOP, or a NOP: prefix 1571 emit_byte(0x90) ; 1572 } else { 1573 emit_byte(0xF0); 1574 } 1575 } 1576 1577 void Assembler::lzcntl(Register dst, Register src) { 1578 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 1579 emit_byte(0xF3); 1580 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1581 emit_byte(0x0F); 1582 emit_byte(0xBD); 1583 emit_byte(0xC0 | encode); 1584 } 1585 1586 // Emit mfence instruction 1587 void Assembler::mfence() { 1588 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) 1589 emit_byte( 0x0F ); 1590 emit_byte( 0xAE ); 1591 emit_byte( 0xF0 ); 1592 } 1593 1594 void Assembler::mov(Register dst, Register src) { 1595 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 1596 } 1597 1598 void Assembler::movapd(XMMRegister dst, XMMRegister src) { 1599 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1600 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1601 emit_byte(0x28); 1602 emit_byte(0xC0 | encode); 1603 } 1604 1605 void Assembler::movaps(XMMRegister dst, XMMRegister src) { 1606 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1607 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 1608 emit_byte(0x28); 1609 emit_byte(0xC0 | encode); 1610 } 1611 1612 void Assembler::movb(Register dst, Address src) { 1613 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 1614 InstructionMark im(this); 1615 prefix(src, dst, true); 1616 emit_byte(0x8A); 1617 emit_operand(dst, src); 1618 } 1619 1620 1621 void Assembler::movb(Address dst, int imm8) { 1622 InstructionMark im(this); 1623 prefix(dst); 1624 emit_byte(0xC6); 1625 emit_operand(rax, dst, 1); 1626 emit_byte(imm8); 1627 } 1628 1629 1630 void Assembler::movb(Address dst, Register src) { 1631 assert(src->has_byte_register(), "must have byte register"); 1632 InstructionMark im(this); 1633 prefix(dst, src, true); 1634 emit_byte(0x88); 1635 emit_operand(src, dst); 1636 } 1637 1638 void Assembler::movdl(XMMRegister dst, Register src) { 1639 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1640 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1641 emit_byte(0x6E); 1642 emit_byte(0xC0 | encode); 1643 } 1644 1645 void Assembler::movdl(Register dst, XMMRegister src) { 1646 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1647 // swap src/dst to get correct prefix 1648 int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66); 1649 emit_byte(0x7E); 1650 emit_byte(0xC0 | encode); 1651 } 1652 1653 void Assembler::movdl(XMMRegister dst, Address src) { 1654 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1655 InstructionMark im(this); 1656 simd_prefix(dst, src, VEX_SIMD_66); 1657 emit_byte(0x6E); 1658 emit_operand(dst, src); 1659 } 1660 1661 void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 1662 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1663 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1664 emit_byte(0x6F); 1665 emit_byte(0xC0 | encode); 1666 } 1667 1668 void Assembler::movdqu(XMMRegister dst, Address src) { 1669 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1670 InstructionMark im(this); 1671 simd_prefix(dst, src, VEX_SIMD_F3); 1672 emit_byte(0x6F); 1673 emit_operand(dst, src); 1674 } 1675 1676 void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 1677 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1678 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1679 emit_byte(0x6F); 1680 emit_byte(0xC0 | encode); 1681 } 1682 1683 void Assembler::movdqu(Address dst, XMMRegister src) { 1684 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1685 InstructionMark im(this); 1686 simd_prefix(dst, src, VEX_SIMD_F3); 1687 emit_byte(0x7F); 1688 emit_operand(src, dst); 1689 } 1690 1691 // Uses zero extension on 64bit 1692 1693 void Assembler::movl(Register dst, int32_t imm32) { 1694 int encode = prefix_and_encode(dst->encoding()); 1695 emit_byte(0xB8 | encode); 1696 emit_long(imm32); 1697 } 1698 1699 void Assembler::movl(Register dst, Register src) { 1700 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1701 emit_byte(0x8B); 1702 emit_byte(0xC0 | encode); 1703 } 1704 1705 void Assembler::movl(Register dst, Address src) { 1706 InstructionMark im(this); 1707 prefix(src, dst); 1708 emit_byte(0x8B); 1709 emit_operand(dst, src); 1710 } 1711 1712 void Assembler::movl(Address dst, int32_t imm32) { 1713 InstructionMark im(this); 1714 prefix(dst); 1715 emit_byte(0xC7); 1716 emit_operand(rax, dst, 4); 1717 emit_long(imm32); 1718 } 1719 1720 void Assembler::movl(Address dst, Register src) { 1721 InstructionMark im(this); 1722 prefix(dst, src); 1723 emit_byte(0x89); 1724 emit_operand(src, dst); 1725 } 1726 1727 // New cpus require to use movsd and movss to avoid partial register stall 1728 // when loading from memory. But for old Opteron use movlpd instead of movsd. 1729 // The selection is done in MacroAssembler::movdbl() and movflt(). 1730 void Assembler::movlpd(XMMRegister dst, Address src) { 1731 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1732 InstructionMark im(this); 1733 simd_prefix(dst, dst, src, VEX_SIMD_66); 1734 emit_byte(0x12); 1735 emit_operand(dst, src); 1736 } 1737 1738 void Assembler::movq( MMXRegister dst, Address src ) { 1739 assert( VM_Version::supports_mmx(), "" ); 1740 emit_byte(0x0F); 1741 emit_byte(0x6F); 1742 emit_operand(dst, src); 1743 } 1744 1745 void Assembler::movq( Address dst, MMXRegister src ) { 1746 assert( VM_Version::supports_mmx(), "" ); 1747 emit_byte(0x0F); 1748 emit_byte(0x7F); 1749 // workaround gcc (3.2.1-7a) bug 1750 // In that version of gcc with only an emit_operand(MMX, Address) 1751 // gcc will tail jump and try and reverse the parameters completely 1752 // obliterating dst in the process. By having a version available 1753 // that doesn't need to swap the args at the tail jump the bug is 1754 // avoided. 1755 emit_operand(dst, src); 1756 } 1757 1758 void Assembler::movq(XMMRegister dst, Address src) { 1759 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1760 InstructionMark im(this); 1761 simd_prefix(dst, src, VEX_SIMD_F3); 1762 emit_byte(0x7E); 1763 emit_operand(dst, src); 1764 } 1765 1766 void Assembler::movq(Address dst, XMMRegister src) { 1767 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1768 InstructionMark im(this); 1769 simd_prefix(dst, src, VEX_SIMD_66); 1770 emit_byte(0xD6); 1771 emit_operand(src, dst); 1772 } 1773 1774 void Assembler::movsbl(Register dst, Address src) { // movsxb 1775 InstructionMark im(this); 1776 prefix(src, dst); 1777 emit_byte(0x0F); 1778 emit_byte(0xBE); 1779 emit_operand(dst, src); 1780 } 1781 1782 void Assembler::movsbl(Register dst, Register src) { // movsxb 1783 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1784 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1785 emit_byte(0x0F); 1786 emit_byte(0xBE); 1787 emit_byte(0xC0 | encode); 1788 } 1789 1790 void Assembler::movsd(XMMRegister dst, XMMRegister src) { 1791 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1792 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1793 emit_byte(0x10); 1794 emit_byte(0xC0 | encode); 1795 } 1796 1797 void Assembler::movsd(XMMRegister dst, Address src) { 1798 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1799 InstructionMark im(this); 1800 simd_prefix(dst, src, VEX_SIMD_F2); 1801 emit_byte(0x10); 1802 emit_operand(dst, src); 1803 } 1804 1805 void Assembler::movsd(Address dst, XMMRegister src) { 1806 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1807 InstructionMark im(this); 1808 simd_prefix(dst, src, VEX_SIMD_F2); 1809 emit_byte(0x11); 1810 emit_operand(src, dst); 1811 } 1812 1813 void Assembler::movss(XMMRegister dst, XMMRegister src) { 1814 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1815 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1816 emit_byte(0x10); 1817 emit_byte(0xC0 | encode); 1818 } 1819 1820 void Assembler::movss(XMMRegister dst, Address src) { 1821 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1822 InstructionMark im(this); 1823 simd_prefix(dst, src, VEX_SIMD_F3); 1824 emit_byte(0x10); 1825 emit_operand(dst, src); 1826 } 1827 1828 void Assembler::movss(Address dst, XMMRegister src) { 1829 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1830 InstructionMark im(this); 1831 simd_prefix(dst, src, VEX_SIMD_F3); 1832 emit_byte(0x11); 1833 emit_operand(src, dst); 1834 } 1835 1836 void Assembler::movswl(Register dst, Address src) { // movsxw 1837 InstructionMark im(this); 1838 prefix(src, dst); 1839 emit_byte(0x0F); 1840 emit_byte(0xBF); 1841 emit_operand(dst, src); 1842 } 1843 1844 void Assembler::movswl(Register dst, Register src) { // movsxw 1845 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1846 emit_byte(0x0F); 1847 emit_byte(0xBF); 1848 emit_byte(0xC0 | encode); 1849 } 1850 1851 void Assembler::movw(Address dst, int imm16) { 1852 InstructionMark im(this); 1853 1854 emit_byte(0x66); // switch to 16-bit mode 1855 prefix(dst); 1856 emit_byte(0xC7); 1857 emit_operand(rax, dst, 2); 1858 emit_word(imm16); 1859 } 1860 1861 void Assembler::movw(Register dst, Address src) { 1862 InstructionMark im(this); 1863 emit_byte(0x66); 1864 prefix(src, dst); 1865 emit_byte(0x8B); 1866 emit_operand(dst, src); 1867 } 1868 1869 void Assembler::movw(Address dst, Register src) { 1870 InstructionMark im(this); 1871 emit_byte(0x66); 1872 prefix(dst, src); 1873 emit_byte(0x89); 1874 emit_operand(src, dst); 1875 } 1876 1877 void Assembler::movzbl(Register dst, Address src) { // movzxb 1878 InstructionMark im(this); 1879 prefix(src, dst); 1880 emit_byte(0x0F); 1881 emit_byte(0xB6); 1882 emit_operand(dst, src); 1883 } 1884 1885 void Assembler::movzbl(Register dst, Register src) { // movzxb 1886 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1887 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1888 emit_byte(0x0F); 1889 emit_byte(0xB6); 1890 emit_byte(0xC0 | encode); 1891 } 1892 1893 void Assembler::movzwl(Register dst, Address src) { // movzxw 1894 InstructionMark im(this); 1895 prefix(src, dst); 1896 emit_byte(0x0F); 1897 emit_byte(0xB7); 1898 emit_operand(dst, src); 1899 } 1900 1901 void Assembler::movzwl(Register dst, Register src) { // movzxw 1902 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1903 emit_byte(0x0F); 1904 emit_byte(0xB7); 1905 emit_byte(0xC0 | encode); 1906 } 1907 1908 void Assembler::mull(Address src) { 1909 InstructionMark im(this); 1910 prefix(src); 1911 emit_byte(0xF7); 1912 emit_operand(rsp, src); 1913 } 1914 1915 void Assembler::mull(Register src) { 1916 int encode = prefix_and_encode(src->encoding()); 1917 emit_byte(0xF7); 1918 emit_byte(0xE0 | encode); 1919 } 1920 1921 void Assembler::mulsd(XMMRegister dst, Address src) { 1922 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1923 InstructionMark im(this); 1924 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1925 emit_byte(0x59); 1926 emit_operand(dst, src); 1927 } 1928 1929 void Assembler::mulsd(XMMRegister dst, XMMRegister src) { 1930 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1931 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1932 emit_byte(0x59); 1933 emit_byte(0xC0 | encode); 1934 } 1935 1936 void Assembler::mulss(XMMRegister dst, Address src) { 1937 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1938 InstructionMark im(this); 1939 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1940 emit_byte(0x59); 1941 emit_operand(dst, src); 1942 } 1943 1944 void Assembler::mulss(XMMRegister dst, XMMRegister src) { 1945 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1946 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1947 emit_byte(0x59); 1948 emit_byte(0xC0 | encode); 1949 } 1950 1951 void Assembler::negl(Register dst) { 1952 int encode = prefix_and_encode(dst->encoding()); 1953 emit_byte(0xF7); 1954 emit_byte(0xD8 | encode); 1955 } 1956 1957 void Assembler::nop(int i) { 1958 #ifdef ASSERT 1959 assert(i > 0, " "); 1960 // The fancy nops aren't currently recognized by debuggers making it a 1961 // pain to disassemble code while debugging. If asserts are on clearly 1962 // speed is not an issue so simply use the single byte traditional nop 1963 // to do alignment. 1964 1965 for (; i > 0 ; i--) emit_byte(0x90); 1966 return; 1967 1968 #endif // ASSERT 1969 1970 if (UseAddressNop && VM_Version::is_intel()) { 1971 // 1972 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 1973 // 1: 0x90 1974 // 2: 0x66 0x90 1975 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 1976 // 4: 0x0F 0x1F 0x40 0x00 1977 // 5: 0x0F 0x1F 0x44 0x00 0x00 1978 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 1979 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1980 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1981 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1982 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1983 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1984 1985 // The rest coding is Intel specific - don't use consecutive address nops 1986 1987 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1988 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1989 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1990 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1991 1992 while(i >= 15) { 1993 // For Intel don't generate consecutive addess nops (mix with regular nops) 1994 i -= 15; 1995 emit_byte(0x66); // size prefix 1996 emit_byte(0x66); // size prefix 1997 emit_byte(0x66); // size prefix 1998 addr_nop_8(); 1999 emit_byte(0x66); // size prefix 2000 emit_byte(0x66); // size prefix 2001 emit_byte(0x66); // size prefix 2002 emit_byte(0x90); // nop 2003 } 2004 switch (i) { 2005 case 14: 2006 emit_byte(0x66); // size prefix 2007 case 13: 2008 emit_byte(0x66); // size prefix 2009 case 12: 2010 addr_nop_8(); 2011 emit_byte(0x66); // size prefix 2012 emit_byte(0x66); // size prefix 2013 emit_byte(0x66); // size prefix 2014 emit_byte(0x90); // nop 2015 break; 2016 case 11: 2017 emit_byte(0x66); // size prefix 2018 case 10: 2019 emit_byte(0x66); // size prefix 2020 case 9: 2021 emit_byte(0x66); // size prefix 2022 case 8: 2023 addr_nop_8(); 2024 break; 2025 case 7: 2026 addr_nop_7(); 2027 break; 2028 case 6: 2029 emit_byte(0x66); // size prefix 2030 case 5: 2031 addr_nop_5(); 2032 break; 2033 case 4: 2034 addr_nop_4(); 2035 break; 2036 case 3: 2037 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2038 emit_byte(0x66); // size prefix 2039 case 2: 2040 emit_byte(0x66); // size prefix 2041 case 1: 2042 emit_byte(0x90); // nop 2043 break; 2044 default: 2045 assert(i == 0, " "); 2046 } 2047 return; 2048 } 2049 if (UseAddressNop && VM_Version::is_amd()) { 2050 // 2051 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD. 2052 // 1: 0x90 2053 // 2: 0x66 0x90 2054 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2055 // 4: 0x0F 0x1F 0x40 0x00 2056 // 5: 0x0F 0x1F 0x44 0x00 0x00 2057 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2058 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2059 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2060 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2061 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2062 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2063 2064 // The rest coding is AMD specific - use consecutive address nops 2065 2066 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2067 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2068 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2069 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2070 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2071 // Size prefixes (0x66) are added for larger sizes 2072 2073 while(i >= 22) { 2074 i -= 11; 2075 emit_byte(0x66); // size prefix 2076 emit_byte(0x66); // size prefix 2077 emit_byte(0x66); // size prefix 2078 addr_nop_8(); 2079 } 2080 // Generate first nop for size between 21-12 2081 switch (i) { 2082 case 21: 2083 i -= 1; 2084 emit_byte(0x66); // size prefix 2085 case 20: 2086 case 19: 2087 i -= 1; 2088 emit_byte(0x66); // size prefix 2089 case 18: 2090 case 17: 2091 i -= 1; 2092 emit_byte(0x66); // size prefix 2093 case 16: 2094 case 15: 2095 i -= 8; 2096 addr_nop_8(); 2097 break; 2098 case 14: 2099 case 13: 2100 i -= 7; 2101 addr_nop_7(); 2102 break; 2103 case 12: 2104 i -= 6; 2105 emit_byte(0x66); // size prefix 2106 addr_nop_5(); 2107 break; 2108 default: 2109 assert(i < 12, " "); 2110 } 2111 2112 // Generate second nop for size between 11-1 2113 switch (i) { 2114 case 11: 2115 emit_byte(0x66); // size prefix 2116 case 10: 2117 emit_byte(0x66); // size prefix 2118 case 9: 2119 emit_byte(0x66); // size prefix 2120 case 8: 2121 addr_nop_8(); 2122 break; 2123 case 7: 2124 addr_nop_7(); 2125 break; 2126 case 6: 2127 emit_byte(0x66); // size prefix 2128 case 5: 2129 addr_nop_5(); 2130 break; 2131 case 4: 2132 addr_nop_4(); 2133 break; 2134 case 3: 2135 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2136 emit_byte(0x66); // size prefix 2137 case 2: 2138 emit_byte(0x66); // size prefix 2139 case 1: 2140 emit_byte(0x90); // nop 2141 break; 2142 default: 2143 assert(i == 0, " "); 2144 } 2145 return; 2146 } 2147 2148 // Using nops with size prefixes "0x66 0x90". 2149 // From AMD Optimization Guide: 2150 // 1: 0x90 2151 // 2: 0x66 0x90 2152 // 3: 0x66 0x66 0x90 2153 // 4: 0x66 0x66 0x66 0x90 2154 // 5: 0x66 0x66 0x90 0x66 0x90 2155 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2156 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2157 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2158 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2159 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2160 // 2161 while(i > 12) { 2162 i -= 4; 2163 emit_byte(0x66); // size prefix 2164 emit_byte(0x66); 2165 emit_byte(0x66); 2166 emit_byte(0x90); // nop 2167 } 2168 // 1 - 12 nops 2169 if(i > 8) { 2170 if(i > 9) { 2171 i -= 1; 2172 emit_byte(0x66); 2173 } 2174 i -= 3; 2175 emit_byte(0x66); 2176 emit_byte(0x66); 2177 emit_byte(0x90); 2178 } 2179 // 1 - 8 nops 2180 if(i > 4) { 2181 if(i > 6) { 2182 i -= 1; 2183 emit_byte(0x66); 2184 } 2185 i -= 3; 2186 emit_byte(0x66); 2187 emit_byte(0x66); 2188 emit_byte(0x90); 2189 } 2190 switch (i) { 2191 case 4: 2192 emit_byte(0x66); 2193 case 3: 2194 emit_byte(0x66); 2195 case 2: 2196 emit_byte(0x66); 2197 case 1: 2198 emit_byte(0x90); 2199 break; 2200 default: 2201 assert(i == 0, " "); 2202 } 2203 } 2204 2205 void Assembler::notl(Register dst) { 2206 int encode = prefix_and_encode(dst->encoding()); 2207 emit_byte(0xF7); 2208 emit_byte(0xD0 | encode ); 2209 } 2210 2211 void Assembler::orl(Address dst, int32_t imm32) { 2212 InstructionMark im(this); 2213 prefix(dst); 2214 emit_arith_operand(0x81, rcx, dst, imm32); 2215 } 2216 2217 void Assembler::orl(Register dst, int32_t imm32) { 2218 prefix(dst); 2219 emit_arith(0x81, 0xC8, dst, imm32); 2220 } 2221 2222 void Assembler::orl(Register dst, Address src) { 2223 InstructionMark im(this); 2224 prefix(src, dst); 2225 emit_byte(0x0B); 2226 emit_operand(dst, src); 2227 } 2228 2229 void Assembler::orl(Register dst, Register src) { 2230 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2231 emit_arith(0x0B, 0xC0, dst, src); 2232 } 2233 2234 void Assembler::packuswb(XMMRegister dst, Address src) { 2235 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2236 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2237 InstructionMark im(this); 2238 simd_prefix(dst, dst, src, VEX_SIMD_66); 2239 emit_byte(0x67); 2240 emit_operand(dst, src); 2241 } 2242 2243 void Assembler::packuswb(XMMRegister dst, XMMRegister src) { 2244 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2245 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2246 emit_byte(0x67); 2247 emit_byte(0xC0 | encode); 2248 } 2249 2250 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { 2251 assert(VM_Version::supports_sse4_2(), ""); 2252 InstructionMark im(this); 2253 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2254 emit_byte(0x61); 2255 emit_operand(dst, src); 2256 emit_byte(imm8); 2257 } 2258 2259 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { 2260 assert(VM_Version::supports_sse4_2(), ""); 2261 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2262 emit_byte(0x61); 2263 emit_byte(0xC0 | encode); 2264 emit_byte(imm8); 2265 } 2266 2267 void Assembler::pmovzxbw(XMMRegister dst, Address src) { 2268 assert(VM_Version::supports_sse4_1(), ""); 2269 InstructionMark im(this); 2270 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2271 emit_byte(0x30); 2272 emit_operand(dst, src); 2273 } 2274 2275 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { 2276 assert(VM_Version::supports_sse4_1(), ""); 2277 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2278 emit_byte(0x30); 2279 emit_byte(0xC0 | encode); 2280 } 2281 2282 // generic 2283 void Assembler::pop(Register dst) { 2284 int encode = prefix_and_encode(dst->encoding()); 2285 emit_byte(0x58 | encode); 2286 } 2287 2288 void Assembler::popcntl(Register dst, Address src) { 2289 assert(VM_Version::supports_popcnt(), "must support"); 2290 InstructionMark im(this); 2291 emit_byte(0xF3); 2292 prefix(src, dst); 2293 emit_byte(0x0F); 2294 emit_byte(0xB8); 2295 emit_operand(dst, src); 2296 } 2297 2298 void Assembler::popcntl(Register dst, Register src) { 2299 assert(VM_Version::supports_popcnt(), "must support"); 2300 emit_byte(0xF3); 2301 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2302 emit_byte(0x0F); 2303 emit_byte(0xB8); 2304 emit_byte(0xC0 | encode); 2305 } 2306 2307 void Assembler::popf() { 2308 emit_byte(0x9D); 2309 } 2310 2311 #ifndef _LP64 // no 32bit push/pop on amd64 2312 void Assembler::popl(Address dst) { 2313 // NOTE: this will adjust stack by 8byte on 64bits 2314 InstructionMark im(this); 2315 prefix(dst); 2316 emit_byte(0x8F); 2317 emit_operand(rax, dst); 2318 } 2319 #endif 2320 2321 void Assembler::prefetch_prefix(Address src) { 2322 prefix(src); 2323 emit_byte(0x0F); 2324 } 2325 2326 void Assembler::prefetchnta(Address src) { 2327 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2328 InstructionMark im(this); 2329 prefetch_prefix(src); 2330 emit_byte(0x18); 2331 emit_operand(rax, src); // 0, src 2332 } 2333 2334 void Assembler::prefetchr(Address src) { 2335 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2336 InstructionMark im(this); 2337 prefetch_prefix(src); 2338 emit_byte(0x0D); 2339 emit_operand(rax, src); // 0, src 2340 } 2341 2342 void Assembler::prefetcht0(Address src) { 2343 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2344 InstructionMark im(this); 2345 prefetch_prefix(src); 2346 emit_byte(0x18); 2347 emit_operand(rcx, src); // 1, src 2348 } 2349 2350 void Assembler::prefetcht1(Address src) { 2351 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2352 InstructionMark im(this); 2353 prefetch_prefix(src); 2354 emit_byte(0x18); 2355 emit_operand(rdx, src); // 2, src 2356 } 2357 2358 void Assembler::prefetcht2(Address src) { 2359 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2360 InstructionMark im(this); 2361 prefetch_prefix(src); 2362 emit_byte(0x18); 2363 emit_operand(rbx, src); // 3, src 2364 } 2365 2366 void Assembler::prefetchw(Address src) { 2367 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2368 InstructionMark im(this); 2369 prefetch_prefix(src); 2370 emit_byte(0x0D); 2371 emit_operand(rcx, src); // 1, src 2372 } 2373 2374 void Assembler::prefix(Prefix p) { 2375 a_byte(p); 2376 } 2377 2378 void Assembler::por(XMMRegister dst, XMMRegister src) { 2379 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2380 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2381 emit_byte(0xEB); 2382 emit_byte(0xC0 | encode); 2383 } 2384 2385 void Assembler::por(XMMRegister dst, Address src) { 2386 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2387 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2388 InstructionMark im(this); 2389 simd_prefix(dst, dst, src, VEX_SIMD_66); 2390 emit_byte(0xEB); 2391 emit_operand(dst, src); 2392 } 2393 2394 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 2395 assert(isByte(mode), "invalid value"); 2396 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2397 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 2398 emit_byte(0x70); 2399 emit_byte(0xC0 | encode); 2400 emit_byte(mode & 0xFF); 2401 2402 } 2403 2404 void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 2405 assert(isByte(mode), "invalid value"); 2406 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2407 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2408 InstructionMark im(this); 2409 simd_prefix(dst, src, VEX_SIMD_66); 2410 emit_byte(0x70); 2411 emit_operand(dst, src); 2412 emit_byte(mode & 0xFF); 2413 } 2414 2415 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 2416 assert(isByte(mode), "invalid value"); 2417 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2418 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); 2419 emit_byte(0x70); 2420 emit_byte(0xC0 | encode); 2421 emit_byte(mode & 0xFF); 2422 } 2423 2424 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 2425 assert(isByte(mode), "invalid value"); 2426 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2427 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2428 InstructionMark im(this); 2429 simd_prefix(dst, src, VEX_SIMD_F2); 2430 emit_byte(0x70); 2431 emit_operand(dst, src); 2432 emit_byte(mode & 0xFF); 2433 } 2434 2435 void Assembler::psrlq(XMMRegister dst, int shift) { 2436 // Shift 64 bit value logically right by specified number of bits. 2437 // HMM Table D-1 says sse2 or mmx. 2438 // Do not confuse it with psrldq SSE2 instruction which 2439 // shifts 128 bit value in xmm register by number of bytes. 2440 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2441 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 2442 emit_byte(0x73); 2443 emit_byte(0xC0 | encode); 2444 emit_byte(shift); 2445 } 2446 2447 void Assembler::psrldq(XMMRegister dst, int shift) { 2448 // Shift 128 bit value in xmm register by number of bytes. 2449 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2450 int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66); 2451 emit_byte(0x73); 2452 emit_byte(0xC0 | encode); 2453 emit_byte(shift); 2454 } 2455 2456 void Assembler::ptest(XMMRegister dst, Address src) { 2457 assert(VM_Version::supports_sse4_1(), ""); 2458 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2459 InstructionMark im(this); 2460 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2461 emit_byte(0x17); 2462 emit_operand(dst, src); 2463 } 2464 2465 void Assembler::ptest(XMMRegister dst, XMMRegister src) { 2466 assert(VM_Version::supports_sse4_1(), ""); 2467 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2468 emit_byte(0x17); 2469 emit_byte(0xC0 | encode); 2470 } 2471 2472 void Assembler::punpcklbw(XMMRegister dst, Address src) { 2473 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2474 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2475 InstructionMark im(this); 2476 simd_prefix(dst, dst, src, VEX_SIMD_66); 2477 emit_byte(0x60); 2478 emit_operand(dst, src); 2479 } 2480 2481 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 2482 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2483 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2484 emit_byte(0x60); 2485 emit_byte(0xC0 | encode); 2486 } 2487 2488 void Assembler::punpckldq(XMMRegister dst, Address src) { 2489 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2490 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2491 InstructionMark im(this); 2492 simd_prefix(dst, dst, src, VEX_SIMD_66); 2493 emit_byte(0x62); 2494 emit_operand(dst, src); 2495 } 2496 2497 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { 2498 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2499 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2500 emit_byte(0x62); 2501 emit_byte(0xC0 | encode); 2502 } 2503 2504 void Assembler::push(int32_t imm32) { 2505 // in 64bits we push 64bits onto the stack but only 2506 // take a 32bit immediate 2507 emit_byte(0x68); 2508 emit_long(imm32); 2509 } 2510 2511 void Assembler::push(Register src) { 2512 int encode = prefix_and_encode(src->encoding()); 2513 2514 emit_byte(0x50 | encode); 2515 } 2516 2517 void Assembler::pushf() { 2518 emit_byte(0x9C); 2519 } 2520 2521 #ifndef _LP64 // no 32bit push/pop on amd64 2522 void Assembler::pushl(Address src) { 2523 // Note this will push 64bit on 64bit 2524 InstructionMark im(this); 2525 prefix(src); 2526 emit_byte(0xFF); 2527 emit_operand(rsi, src); 2528 } 2529 #endif 2530 2531 void Assembler::pxor(XMMRegister dst, Address src) { 2532 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2533 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2534 InstructionMark im(this); 2535 simd_prefix(dst, dst, src, VEX_SIMD_66); 2536 emit_byte(0xEF); 2537 emit_operand(dst, src); 2538 } 2539 2540 void Assembler::pxor(XMMRegister dst, XMMRegister src) { 2541 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2542 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2543 emit_byte(0xEF); 2544 emit_byte(0xC0 | encode); 2545 } 2546 2547 void Assembler::rcll(Register dst, int imm8) { 2548 assert(isShiftCount(imm8), "illegal shift count"); 2549 int encode = prefix_and_encode(dst->encoding()); 2550 if (imm8 == 1) { 2551 emit_byte(0xD1); 2552 emit_byte(0xD0 | encode); 2553 } else { 2554 emit_byte(0xC1); 2555 emit_byte(0xD0 | encode); 2556 emit_byte(imm8); 2557 } 2558 } 2559 2560 // copies data from [esi] to [edi] using rcx pointer sized words 2561 // generic 2562 void Assembler::rep_mov() { 2563 emit_byte(0xF3); 2564 // MOVSQ 2565 LP64_ONLY(prefix(REX_W)); 2566 emit_byte(0xA5); 2567 } 2568 2569 // sets rcx pointer sized words with rax, value at [edi] 2570 // generic 2571 void Assembler::rep_set() { // rep_set 2572 emit_byte(0xF3); 2573 // STOSQ 2574 LP64_ONLY(prefix(REX_W)); 2575 emit_byte(0xAB); 2576 } 2577 2578 // scans rcx pointer sized words at [edi] for occurance of rax, 2579 // generic 2580 void Assembler::repne_scan() { // repne_scan 2581 emit_byte(0xF2); 2582 // SCASQ 2583 LP64_ONLY(prefix(REX_W)); 2584 emit_byte(0xAF); 2585 } 2586 2587 #ifdef _LP64 2588 // scans rcx 4 byte words at [edi] for occurance of rax, 2589 // generic 2590 void Assembler::repne_scanl() { // repne_scan 2591 emit_byte(0xF2); 2592 // SCASL 2593 emit_byte(0xAF); 2594 } 2595 #endif 2596 2597 void Assembler::ret(int imm16) { 2598 if (imm16 == 0) { 2599 emit_byte(0xC3); 2600 } else { 2601 emit_byte(0xC2); 2602 emit_word(imm16); 2603 } 2604 } 2605 2606 void Assembler::sahf() { 2607 #ifdef _LP64 2608 // Not supported in 64bit mode 2609 ShouldNotReachHere(); 2610 #endif 2611 emit_byte(0x9E); 2612 } 2613 2614 void Assembler::sarl(Register dst, int imm8) { 2615 int encode = prefix_and_encode(dst->encoding()); 2616 assert(isShiftCount(imm8), "illegal shift count"); 2617 if (imm8 == 1) { 2618 emit_byte(0xD1); 2619 emit_byte(0xF8 | encode); 2620 } else { 2621 emit_byte(0xC1); 2622 emit_byte(0xF8 | encode); 2623 emit_byte(imm8); 2624 } 2625 } 2626 2627 void Assembler::sarl(Register dst) { 2628 int encode = prefix_and_encode(dst->encoding()); 2629 emit_byte(0xD3); 2630 emit_byte(0xF8 | encode); 2631 } 2632 2633 void Assembler::sbbl(Address dst, int32_t imm32) { 2634 InstructionMark im(this); 2635 prefix(dst); 2636 emit_arith_operand(0x81, rbx, dst, imm32); 2637 } 2638 2639 void Assembler::sbbl(Register dst, int32_t imm32) { 2640 prefix(dst); 2641 emit_arith(0x81, 0xD8, dst, imm32); 2642 } 2643 2644 2645 void Assembler::sbbl(Register dst, Address src) { 2646 InstructionMark im(this); 2647 prefix(src, dst); 2648 emit_byte(0x1B); 2649 emit_operand(dst, src); 2650 } 2651 2652 void Assembler::sbbl(Register dst, Register src) { 2653 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2654 emit_arith(0x1B, 0xC0, dst, src); 2655 } 2656 2657 void Assembler::setb(Condition cc, Register dst) { 2658 assert(0 <= cc && cc < 16, "illegal cc"); 2659 int encode = prefix_and_encode(dst->encoding(), true); 2660 emit_byte(0x0F); 2661 emit_byte(0x90 | cc); 2662 emit_byte(0xC0 | encode); 2663 } 2664 2665 void Assembler::shll(Register dst, int imm8) { 2666 assert(isShiftCount(imm8), "illegal shift count"); 2667 int encode = prefix_and_encode(dst->encoding()); 2668 if (imm8 == 1 ) { 2669 emit_byte(0xD1); 2670 emit_byte(0xE0 | encode); 2671 } else { 2672 emit_byte(0xC1); 2673 emit_byte(0xE0 | encode); 2674 emit_byte(imm8); 2675 } 2676 } 2677 2678 void Assembler::shll(Register dst) { 2679 int encode = prefix_and_encode(dst->encoding()); 2680 emit_byte(0xD3); 2681 emit_byte(0xE0 | encode); 2682 } 2683 2684 void Assembler::shrl(Register dst, int imm8) { 2685 assert(isShiftCount(imm8), "illegal shift count"); 2686 int encode = prefix_and_encode(dst->encoding()); 2687 emit_byte(0xC1); 2688 emit_byte(0xE8 | encode); 2689 emit_byte(imm8); 2690 } 2691 2692 void Assembler::shrl(Register dst) { 2693 int encode = prefix_and_encode(dst->encoding()); 2694 emit_byte(0xD3); 2695 emit_byte(0xE8 | encode); 2696 } 2697 2698 // copies a single word from [esi] to [edi] 2699 void Assembler::smovl() { 2700 emit_byte(0xA5); 2701 } 2702 2703 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { 2704 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2705 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 2706 emit_byte(0x51); 2707 emit_byte(0xC0 | encode); 2708 } 2709 2710 void Assembler::sqrtsd(XMMRegister dst, Address src) { 2711 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2712 InstructionMark im(this); 2713 simd_prefix(dst, dst, src, VEX_SIMD_F2); 2714 emit_byte(0x51); 2715 emit_operand(dst, src); 2716 } 2717 2718 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { 2719 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2720 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 2721 emit_byte(0x51); 2722 emit_byte(0xC0 | encode); 2723 } 2724 2725 void Assembler::sqrtss(XMMRegister dst, Address src) { 2726 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2727 InstructionMark im(this); 2728 simd_prefix(dst, dst, src, VEX_SIMD_F3); 2729 emit_byte(0x51); 2730 emit_operand(dst, src); 2731 } 2732 2733 void Assembler::stmxcsr( Address dst) { 2734 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2735 InstructionMark im(this); 2736 prefix(dst); 2737 emit_byte(0x0F); 2738 emit_byte(0xAE); 2739 emit_operand(as_Register(3), dst); 2740 } 2741 2742 void Assembler::subl(Address dst, int32_t imm32) { 2743 InstructionMark im(this); 2744 prefix(dst); 2745 emit_arith_operand(0x81, rbp, dst, imm32); 2746 } 2747 2748 void Assembler::subl(Address dst, Register src) { 2749 InstructionMark im(this); 2750 prefix(dst, src); 2751 emit_byte(0x29); 2752 emit_operand(src, dst); 2753 } 2754 2755 void Assembler::subl(Register dst, int32_t imm32) { 2756 prefix(dst); 2757 emit_arith(0x81, 0xE8, dst, imm32); 2758 } 2759 2760 void Assembler::subl(Register dst, Address src) { 2761 InstructionMark im(this); 2762 prefix(src, dst); 2763 emit_byte(0x2B); 2764 emit_operand(dst, src); 2765 } 2766 2767 void Assembler::subl(Register dst, Register src) { 2768 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2769 emit_arith(0x2B, 0xC0, dst, src); 2770 } 2771 2772 void Assembler::subsd(XMMRegister dst, XMMRegister src) { 2773 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2774 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 2775 emit_byte(0x5C); 2776 emit_byte(0xC0 | encode); 2777 } 2778 2779 void Assembler::subsd(XMMRegister dst, Address src) { 2780 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2781 InstructionMark im(this); 2782 simd_prefix(dst, dst, src, VEX_SIMD_F2); 2783 emit_byte(0x5C); 2784 emit_operand(dst, src); 2785 } 2786 2787 void Assembler::subss(XMMRegister dst, XMMRegister src) { 2788 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2789 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 2790 emit_byte(0x5C); 2791 emit_byte(0xC0 | encode); 2792 } 2793 2794 void Assembler::subss(XMMRegister dst, Address src) { 2795 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2796 InstructionMark im(this); 2797 simd_prefix(dst, dst, src, VEX_SIMD_F3); 2798 emit_byte(0x5C); 2799 emit_operand(dst, src); 2800 } 2801 2802 void Assembler::testb(Register dst, int imm8) { 2803 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 2804 (void) prefix_and_encode(dst->encoding(), true); 2805 emit_arith_b(0xF6, 0xC0, dst, imm8); 2806 } 2807 2808 void Assembler::testl(Register dst, int32_t imm32) { 2809 // not using emit_arith because test 2810 // doesn't support sign-extension of 2811 // 8bit operands 2812 int encode = dst->encoding(); 2813 if (encode == 0) { 2814 emit_byte(0xA9); 2815 } else { 2816 encode = prefix_and_encode(encode); 2817 emit_byte(0xF7); 2818 emit_byte(0xC0 | encode); 2819 } 2820 emit_long(imm32); 2821 } 2822 2823 void Assembler::testl(Register dst, Register src) { 2824 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2825 emit_arith(0x85, 0xC0, dst, src); 2826 } 2827 2828 void Assembler::testl(Register dst, Address src) { 2829 InstructionMark im(this); 2830 prefix(src, dst); 2831 emit_byte(0x85); 2832 emit_operand(dst, src); 2833 } 2834 2835 void Assembler::ucomisd(XMMRegister dst, Address src) { 2836 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2837 InstructionMark im(this); 2838 simd_prefix(dst, src, VEX_SIMD_66); 2839 emit_byte(0x2E); 2840 emit_operand(dst, src); 2841 } 2842 2843 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { 2844 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2845 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 2846 emit_byte(0x2E); 2847 emit_byte(0xC0 | encode); 2848 } 2849 2850 void Assembler::ucomiss(XMMRegister dst, Address src) { 2851 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2852 InstructionMark im(this); 2853 simd_prefix(dst, src, VEX_SIMD_NONE); 2854 emit_byte(0x2E); 2855 emit_operand(dst, src); 2856 } 2857 2858 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { 2859 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2860 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 2861 emit_byte(0x2E); 2862 emit_byte(0xC0 | encode); 2863 } 2864 2865 2866 void Assembler::xaddl(Address dst, Register src) { 2867 InstructionMark im(this); 2868 prefix(dst, src); 2869 emit_byte(0x0F); 2870 emit_byte(0xC1); 2871 emit_operand(src, dst); 2872 } 2873 2874 void Assembler::xchgl(Register dst, Address src) { // xchg 2875 InstructionMark im(this); 2876 prefix(src, dst); 2877 emit_byte(0x87); 2878 emit_operand(dst, src); 2879 } 2880 2881 void Assembler::xchgl(Register dst, Register src) { 2882 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2883 emit_byte(0x87); 2884 emit_byte(0xc0 | encode); 2885 } 2886 2887 void Assembler::xorl(Register dst, int32_t imm32) { 2888 prefix(dst); 2889 emit_arith(0x81, 0xF0, dst, imm32); 2890 } 2891 2892 void Assembler::xorl(Register dst, Address src) { 2893 InstructionMark im(this); 2894 prefix(src, dst); 2895 emit_byte(0x33); 2896 emit_operand(dst, src); 2897 } 2898 2899 void Assembler::xorl(Register dst, Register src) { 2900 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2901 emit_arith(0x33, 0xC0, dst, src); 2902 } 2903 2904 void Assembler::xorpd(XMMRegister dst, XMMRegister src) { 2905 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2906 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2907 emit_byte(0x57); 2908 emit_byte(0xC0 | encode); 2909 } 2910 2911 void Assembler::xorpd(XMMRegister dst, Address src) { 2912 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2913 InstructionMark im(this); 2914 simd_prefix(dst, dst, src, VEX_SIMD_66); 2915 emit_byte(0x57); 2916 emit_operand(dst, src); 2917 } 2918 2919 2920 void Assembler::xorps(XMMRegister dst, XMMRegister src) { 2921 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2922 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); 2923 emit_byte(0x57); 2924 emit_byte(0xC0 | encode); 2925 } 2926 2927 void Assembler::xorps(XMMRegister dst, Address src) { 2928 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2929 InstructionMark im(this); 2930 simd_prefix(dst, dst, src, VEX_SIMD_NONE); 2931 emit_byte(0x57); 2932 emit_operand(dst, src); 2933 } 2934 2935 // AVX 3-operands non destructive source instructions (encoded with VEX prefix) 2936 2937 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { 2938 assert(VM_Version::supports_avx(), ""); 2939 InstructionMark im(this); 2940 vex_prefix(dst, nds, src, VEX_SIMD_F2); 2941 emit_byte(0x58); 2942 emit_operand(dst, src); 2943 } 2944 2945 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2946 assert(VM_Version::supports_avx(), ""); 2947 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 2948 emit_byte(0x58); 2949 emit_byte(0xC0 | encode); 2950 } 2951 2952 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { 2953 assert(VM_Version::supports_avx(), ""); 2954 InstructionMark im(this); 2955 vex_prefix(dst, nds, src, VEX_SIMD_F3); 2956 emit_byte(0x58); 2957 emit_operand(dst, src); 2958 } 2959 2960 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2961 assert(VM_Version::supports_avx(), ""); 2962 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 2963 emit_byte(0x58); 2964 emit_byte(0xC0 | encode); 2965 } 2966 2967 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) { 2968 assert(VM_Version::supports_avx(), ""); 2969 InstructionMark im(this); 2970 vex_prefix(dst, nds, src, VEX_SIMD_66); 2971 emit_byte(0x54); 2972 emit_operand(dst, src); 2973 } 2974 2975 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) { 2976 assert(VM_Version::supports_avx(), ""); 2977 InstructionMark im(this); 2978 vex_prefix(dst, nds, src, VEX_SIMD_NONE); 2979 emit_byte(0x54); 2980 emit_operand(dst, src); 2981 } 2982 2983 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { 2984 assert(VM_Version::supports_avx(), ""); 2985 InstructionMark im(this); 2986 vex_prefix(dst, nds, src, VEX_SIMD_F2); 2987 emit_byte(0x5E); 2988 emit_operand(dst, src); 2989 } 2990 2991 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2992 assert(VM_Version::supports_avx(), ""); 2993 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 2994 emit_byte(0x5E); 2995 emit_byte(0xC0 | encode); 2996 } 2997 2998 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { 2999 assert(VM_Version::supports_avx(), ""); 3000 InstructionMark im(this); 3001 vex_prefix(dst, nds, src, VEX_SIMD_F3); 3002 emit_byte(0x5E); 3003 emit_operand(dst, src); 3004 } 3005 3006 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3007 assert(VM_Version::supports_avx(), ""); 3008 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 3009 emit_byte(0x5E); 3010 emit_byte(0xC0 | encode); 3011 } 3012 3013 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { 3014 assert(VM_Version::supports_avx(), ""); 3015 InstructionMark im(this); 3016 vex_prefix(dst, nds, src, VEX_SIMD_F2); 3017 emit_byte(0x59); 3018 emit_operand(dst, src); 3019 } 3020 3021 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3022 assert(VM_Version::supports_avx(), ""); 3023 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 3024 emit_byte(0x59); 3025 emit_byte(0xC0 | encode); 3026 } 3027 3028 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { 3029 InstructionMark im(this); 3030 vex_prefix(dst, nds, src, VEX_SIMD_F3); 3031 emit_byte(0x59); 3032 emit_operand(dst, src); 3033 } 3034 3035 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3036 assert(VM_Version::supports_avx(), ""); 3037 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 3038 emit_byte(0x59); 3039 emit_byte(0xC0 | encode); 3040 } 3041 3042 3043 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { 3044 assert(VM_Version::supports_avx(), ""); 3045 InstructionMark im(this); 3046 vex_prefix(dst, nds, src, VEX_SIMD_F2); 3047 emit_byte(0x5C); 3048 emit_operand(dst, src); 3049 } 3050 3051 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3052 assert(VM_Version::supports_avx(), ""); 3053 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 3054 emit_byte(0x5C); 3055 emit_byte(0xC0 | encode); 3056 } 3057 3058 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { 3059 assert(VM_Version::supports_avx(), ""); 3060 InstructionMark im(this); 3061 vex_prefix(dst, nds, src, VEX_SIMD_F3); 3062 emit_byte(0x5C); 3063 emit_operand(dst, src); 3064 } 3065 3066 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3067 assert(VM_Version::supports_avx(), ""); 3068 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 3069 emit_byte(0x5C); 3070 emit_byte(0xC0 | encode); 3071 } 3072 3073 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) { 3074 assert(VM_Version::supports_avx(), ""); 3075 InstructionMark im(this); 3076 vex_prefix(dst, nds, src, VEX_SIMD_66); 3077 emit_byte(0x57); 3078 emit_operand(dst, src); 3079 } 3080 3081 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) { 3082 assert(VM_Version::supports_avx(), ""); 3083 InstructionMark im(this); 3084 vex_prefix(dst, nds, src, VEX_SIMD_NONE); 3085 emit_byte(0x57); 3086 emit_operand(dst, src); 3087 } 3088 3089 3090 #ifndef _LP64 3091 // 32bit only pieces of the assembler 3092 3093 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { 3094 // NO PREFIX AS NEVER 64BIT 3095 InstructionMark im(this); 3096 emit_byte(0x81); 3097 emit_byte(0xF8 | src1->encoding()); 3098 emit_data(imm32, rspec, 0); 3099 } 3100 3101 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { 3102 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs 3103 InstructionMark im(this); 3104 emit_byte(0x81); 3105 emit_operand(rdi, src1); 3106 emit_data(imm32, rspec, 0); 3107 } 3108 3109 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax, 3110 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded 3111 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. 3112 void Assembler::cmpxchg8(Address adr) { 3113 InstructionMark im(this); 3114 emit_byte(0x0F); 3115 emit_byte(0xc7); 3116 emit_operand(rcx, adr); 3117 } 3118 3119 void Assembler::decl(Register dst) { 3120 // Don't use it directly. Use MacroAssembler::decrementl() instead. 3121 emit_byte(0x48 | dst->encoding()); 3122 } 3123 3124 #endif // _LP64 3125 3126 // 64bit typically doesn't use the x87 but needs to for the trig funcs 3127 3128 void Assembler::fabs() { 3129 emit_byte(0xD9); 3130 emit_byte(0xE1); 3131 } 3132 3133 void Assembler::fadd(int i) { 3134 emit_farith(0xD8, 0xC0, i); 3135 } 3136 3137 void Assembler::fadd_d(Address src) { 3138 InstructionMark im(this); 3139 emit_byte(0xDC); 3140 emit_operand32(rax, src); 3141 } 3142 3143 void Assembler::fadd_s(Address src) { 3144 InstructionMark im(this); 3145 emit_byte(0xD8); 3146 emit_operand32(rax, src); 3147 } 3148 3149 void Assembler::fadda(int i) { 3150 emit_farith(0xDC, 0xC0, i); 3151 } 3152 3153 void Assembler::faddp(int i) { 3154 emit_farith(0xDE, 0xC0, i); 3155 } 3156 3157 void Assembler::fchs() { 3158 emit_byte(0xD9); 3159 emit_byte(0xE0); 3160 } 3161 3162 void Assembler::fcom(int i) { 3163 emit_farith(0xD8, 0xD0, i); 3164 } 3165 3166 void Assembler::fcomp(int i) { 3167 emit_farith(0xD8, 0xD8, i); 3168 } 3169 3170 void Assembler::fcomp_d(Address src) { 3171 InstructionMark im(this); 3172 emit_byte(0xDC); 3173 emit_operand32(rbx, src); 3174 } 3175 3176 void Assembler::fcomp_s(Address src) { 3177 InstructionMark im(this); 3178 emit_byte(0xD8); 3179 emit_operand32(rbx, src); 3180 } 3181 3182 void Assembler::fcompp() { 3183 emit_byte(0xDE); 3184 emit_byte(0xD9); 3185 } 3186 3187 void Assembler::fcos() { 3188 emit_byte(0xD9); 3189 emit_byte(0xFF); 3190 } 3191 3192 void Assembler::fdecstp() { 3193 emit_byte(0xD9); 3194 emit_byte(0xF6); 3195 } 3196 3197 void Assembler::fdiv(int i) { 3198 emit_farith(0xD8, 0xF0, i); 3199 } 3200 3201 void Assembler::fdiv_d(Address src) { 3202 InstructionMark im(this); 3203 emit_byte(0xDC); 3204 emit_operand32(rsi, src); 3205 } 3206 3207 void Assembler::fdiv_s(Address src) { 3208 InstructionMark im(this); 3209 emit_byte(0xD8); 3210 emit_operand32(rsi, src); 3211 } 3212 3213 void Assembler::fdiva(int i) { 3214 emit_farith(0xDC, 0xF8, i); 3215 } 3216 3217 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) 3218 // is erroneous for some of the floating-point instructions below. 3219 3220 void Assembler::fdivp(int i) { 3221 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) 3222 } 3223 3224 void Assembler::fdivr(int i) { 3225 emit_farith(0xD8, 0xF8, i); 3226 } 3227 3228 void Assembler::fdivr_d(Address src) { 3229 InstructionMark im(this); 3230 emit_byte(0xDC); 3231 emit_operand32(rdi, src); 3232 } 3233 3234 void Assembler::fdivr_s(Address src) { 3235 InstructionMark im(this); 3236 emit_byte(0xD8); 3237 emit_operand32(rdi, src); 3238 } 3239 3240 void Assembler::fdivra(int i) { 3241 emit_farith(0xDC, 0xF0, i); 3242 } 3243 3244 void Assembler::fdivrp(int i) { 3245 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) 3246 } 3247 3248 void Assembler::ffree(int i) { 3249 emit_farith(0xDD, 0xC0, i); 3250 } 3251 3252 void Assembler::fild_d(Address adr) { 3253 InstructionMark im(this); 3254 emit_byte(0xDF); 3255 emit_operand32(rbp, adr); 3256 } 3257 3258 void Assembler::fild_s(Address adr) { 3259 InstructionMark im(this); 3260 emit_byte(0xDB); 3261 emit_operand32(rax, adr); 3262 } 3263 3264 void Assembler::fincstp() { 3265 emit_byte(0xD9); 3266 emit_byte(0xF7); 3267 } 3268 3269 void Assembler::finit() { 3270 emit_byte(0x9B); 3271 emit_byte(0xDB); 3272 emit_byte(0xE3); 3273 } 3274 3275 void Assembler::fist_s(Address adr) { 3276 InstructionMark im(this); 3277 emit_byte(0xDB); 3278 emit_operand32(rdx, adr); 3279 } 3280 3281 void Assembler::fistp_d(Address adr) { 3282 InstructionMark im(this); 3283 emit_byte(0xDF); 3284 emit_operand32(rdi, adr); 3285 } 3286 3287 void Assembler::fistp_s(Address adr) { 3288 InstructionMark im(this); 3289 emit_byte(0xDB); 3290 emit_operand32(rbx, adr); 3291 } 3292 3293 void Assembler::fld1() { 3294 emit_byte(0xD9); 3295 emit_byte(0xE8); 3296 } 3297 3298 void Assembler::fld_d(Address adr) { 3299 InstructionMark im(this); 3300 emit_byte(0xDD); 3301 emit_operand32(rax, adr); 3302 } 3303 3304 void Assembler::fld_s(Address adr) { 3305 InstructionMark im(this); 3306 emit_byte(0xD9); 3307 emit_operand32(rax, adr); 3308 } 3309 3310 3311 void Assembler::fld_s(int index) { 3312 emit_farith(0xD9, 0xC0, index); 3313 } 3314 3315 void Assembler::fld_x(Address adr) { 3316 InstructionMark im(this); 3317 emit_byte(0xDB); 3318 emit_operand32(rbp, adr); 3319 } 3320 3321 void Assembler::fldcw(Address src) { 3322 InstructionMark im(this); 3323 emit_byte(0xd9); 3324 emit_operand32(rbp, src); 3325 } 3326 3327 void Assembler::fldenv(Address src) { 3328 InstructionMark im(this); 3329 emit_byte(0xD9); 3330 emit_operand32(rsp, src); 3331 } 3332 3333 void Assembler::fldlg2() { 3334 emit_byte(0xD9); 3335 emit_byte(0xEC); 3336 } 3337 3338 void Assembler::fldln2() { 3339 emit_byte(0xD9); 3340 emit_byte(0xED); 3341 } 3342 3343 void Assembler::fldz() { 3344 emit_byte(0xD9); 3345 emit_byte(0xEE); 3346 } 3347 3348 void Assembler::flog() { 3349 fldln2(); 3350 fxch(); 3351 fyl2x(); 3352 } 3353 3354 void Assembler::flog10() { 3355 fldlg2(); 3356 fxch(); 3357 fyl2x(); 3358 } 3359 3360 void Assembler::fmul(int i) { 3361 emit_farith(0xD8, 0xC8, i); 3362 } 3363 3364 void Assembler::fmul_d(Address src) { 3365 InstructionMark im(this); 3366 emit_byte(0xDC); 3367 emit_operand32(rcx, src); 3368 } 3369 3370 void Assembler::fmul_s(Address src) { 3371 InstructionMark im(this); 3372 emit_byte(0xD8); 3373 emit_operand32(rcx, src); 3374 } 3375 3376 void Assembler::fmula(int i) { 3377 emit_farith(0xDC, 0xC8, i); 3378 } 3379 3380 void Assembler::fmulp(int i) { 3381 emit_farith(0xDE, 0xC8, i); 3382 } 3383 3384 void Assembler::fnsave(Address dst) { 3385 InstructionMark im(this); 3386 emit_byte(0xDD); 3387 emit_operand32(rsi, dst); 3388 } 3389 3390 void Assembler::fnstcw(Address src) { 3391 InstructionMark im(this); 3392 emit_byte(0x9B); 3393 emit_byte(0xD9); 3394 emit_operand32(rdi, src); 3395 } 3396 3397 void Assembler::fnstsw_ax() { 3398 emit_byte(0xdF); 3399 emit_byte(0xE0); 3400 } 3401 3402 void Assembler::fprem() { 3403 emit_byte(0xD9); 3404 emit_byte(0xF8); 3405 } 3406 3407 void Assembler::fprem1() { 3408 emit_byte(0xD9); 3409 emit_byte(0xF5); 3410 } 3411 3412 void Assembler::frstor(Address src) { 3413 InstructionMark im(this); 3414 emit_byte(0xDD); 3415 emit_operand32(rsp, src); 3416 } 3417 3418 void Assembler::fsin() { 3419 emit_byte(0xD9); 3420 emit_byte(0xFE); 3421 } 3422 3423 void Assembler::fsqrt() { 3424 emit_byte(0xD9); 3425 emit_byte(0xFA); 3426 } 3427 3428 void Assembler::fst_d(Address adr) { 3429 InstructionMark im(this); 3430 emit_byte(0xDD); 3431 emit_operand32(rdx, adr); 3432 } 3433 3434 void Assembler::fst_s(Address adr) { 3435 InstructionMark im(this); 3436 emit_byte(0xD9); 3437 emit_operand32(rdx, adr); 3438 } 3439 3440 void Assembler::fstp_d(Address adr) { 3441 InstructionMark im(this); 3442 emit_byte(0xDD); 3443 emit_operand32(rbx, adr); 3444 } 3445 3446 void Assembler::fstp_d(int index) { 3447 emit_farith(0xDD, 0xD8, index); 3448 } 3449 3450 void Assembler::fstp_s(Address adr) { 3451 InstructionMark im(this); 3452 emit_byte(0xD9); 3453 emit_operand32(rbx, adr); 3454 } 3455 3456 void Assembler::fstp_x(Address adr) { 3457 InstructionMark im(this); 3458 emit_byte(0xDB); 3459 emit_operand32(rdi, adr); 3460 } 3461 3462 void Assembler::fsub(int i) { 3463 emit_farith(0xD8, 0xE0, i); 3464 } 3465 3466 void Assembler::fsub_d(Address src) { 3467 InstructionMark im(this); 3468 emit_byte(0xDC); 3469 emit_operand32(rsp, src); 3470 } 3471 3472 void Assembler::fsub_s(Address src) { 3473 InstructionMark im(this); 3474 emit_byte(0xD8); 3475 emit_operand32(rsp, src); 3476 } 3477 3478 void Assembler::fsuba(int i) { 3479 emit_farith(0xDC, 0xE8, i); 3480 } 3481 3482 void Assembler::fsubp(int i) { 3483 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) 3484 } 3485 3486 void Assembler::fsubr(int i) { 3487 emit_farith(0xD8, 0xE8, i); 3488 } 3489 3490 void Assembler::fsubr_d(Address src) { 3491 InstructionMark im(this); 3492 emit_byte(0xDC); 3493 emit_operand32(rbp, src); 3494 } 3495 3496 void Assembler::fsubr_s(Address src) { 3497 InstructionMark im(this); 3498 emit_byte(0xD8); 3499 emit_operand32(rbp, src); 3500 } 3501 3502 void Assembler::fsubra(int i) { 3503 emit_farith(0xDC, 0xE0, i); 3504 } 3505 3506 void Assembler::fsubrp(int i) { 3507 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) 3508 } 3509 3510 void Assembler::ftan() { 3511 emit_byte(0xD9); 3512 emit_byte(0xF2); 3513 emit_byte(0xDD); 3514 emit_byte(0xD8); 3515 } 3516 3517 void Assembler::ftst() { 3518 emit_byte(0xD9); 3519 emit_byte(0xE4); 3520 } 3521 3522 void Assembler::fucomi(int i) { 3523 // make sure the instruction is supported (introduced for P6, together with cmov) 3524 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3525 emit_farith(0xDB, 0xE8, i); 3526 } 3527 3528 void Assembler::fucomip(int i) { 3529 // make sure the instruction is supported (introduced for P6, together with cmov) 3530 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3531 emit_farith(0xDF, 0xE8, i); 3532 } 3533 3534 void Assembler::fwait() { 3535 emit_byte(0x9B); 3536 } 3537 3538 void Assembler::fxch(int i) { 3539 emit_farith(0xD9, 0xC8, i); 3540 } 3541 3542 void Assembler::fyl2x() { 3543 emit_byte(0xD9); 3544 emit_byte(0xF1); 3545 } 3546 3547 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. 3548 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; 3549 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding. 3550 static int simd_opc[4] = { 0, 0, 0x38, 0x3A }; 3551 3552 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding. 3553 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 3554 if (pre > 0) { 3555 emit_byte(simd_pre[pre]); 3556 } 3557 if (rex_w) { 3558 prefixq(adr, xreg); 3559 } else { 3560 prefix(adr, xreg); 3561 } 3562 if (opc > 0) { 3563 emit_byte(0x0F); 3564 int opc2 = simd_opc[opc]; 3565 if (opc2 > 0) { 3566 emit_byte(opc2); 3567 } 3568 } 3569 } 3570 3571 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 3572 if (pre > 0) { 3573 emit_byte(simd_pre[pre]); 3574 } 3575 int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : 3576 prefix_and_encode(dst_enc, src_enc); 3577 if (opc > 0) { 3578 emit_byte(0x0F); 3579 int opc2 = simd_opc[opc]; 3580 if (opc2 > 0) { 3581 emit_byte(opc2); 3582 } 3583 } 3584 return encode; 3585 } 3586 3587 3588 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) { 3589 if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { 3590 prefix(VEX_3bytes); 3591 3592 int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0); 3593 byte1 = (~byte1) & 0xE0; 3594 byte1 |= opc; 3595 a_byte(byte1); 3596 3597 int byte2 = ((~nds_enc) & 0xf) << 3; 3598 byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre; 3599 emit_byte(byte2); 3600 } else { 3601 prefix(VEX_2bytes); 3602 3603 int byte1 = vex_r ? VEX_R : 0; 3604 byte1 = (~byte1) & 0x80; 3605 byte1 |= ((~nds_enc) & 0xf) << 3; 3606 byte1 |= (vector256 ? 4 : 0) | pre; 3607 emit_byte(byte1); 3608 } 3609 } 3610 3611 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){ 3612 bool vex_r = (xreg_enc >= 8); 3613 bool vex_b = adr.base_needs_rex(); 3614 bool vex_x = adr.index_needs_rex(); 3615 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 3616 } 3617 3618 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) { 3619 bool vex_r = (dst_enc >= 8); 3620 bool vex_b = (src_enc >= 8); 3621 bool vex_x = false; 3622 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 3623 return (((dst_enc & 7) << 3) | (src_enc & 7)); 3624 } 3625 3626 3627 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 3628 if (UseAVX > 0) { 3629 int xreg_enc = xreg->encoding(); 3630 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 3631 vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256); 3632 } else { 3633 assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); 3634 rex_prefix(adr, xreg, pre, opc, rex_w); 3635 } 3636 } 3637 3638 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 3639 int dst_enc = dst->encoding(); 3640 int src_enc = src->encoding(); 3641 if (UseAVX > 0) { 3642 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 3643 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256); 3644 } else { 3645 assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); 3646 return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); 3647 } 3648 } 3649 3650 #ifndef _LP64 3651 3652 void Assembler::incl(Register dst) { 3653 // Don't use it directly. Use MacroAssembler::incrementl() instead. 3654 emit_byte(0x40 | dst->encoding()); 3655 } 3656 3657 void Assembler::lea(Register dst, Address src) { 3658 leal(dst, src); 3659 } 3660 3661 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { 3662 InstructionMark im(this); 3663 emit_byte(0xC7); 3664 emit_operand(rax, dst); 3665 emit_data((int)imm32, rspec, 0); 3666 } 3667 3668 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { 3669 InstructionMark im(this); 3670 int encode = prefix_and_encode(dst->encoding()); 3671 emit_byte(0xB8 | encode); 3672 emit_data((int)imm32, rspec, 0); 3673 } 3674 3675 void Assembler::popa() { // 32bit 3676 emit_byte(0x61); 3677 } 3678 3679 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { 3680 InstructionMark im(this); 3681 emit_byte(0x68); 3682 emit_data(imm32, rspec, 0); 3683 } 3684 3685 void Assembler::pusha() { // 32bit 3686 emit_byte(0x60); 3687 } 3688 3689 void Assembler::set_byte_if_not_zero(Register dst) { 3690 emit_byte(0x0F); 3691 emit_byte(0x95); 3692 emit_byte(0xE0 | dst->encoding()); 3693 } 3694 3695 void Assembler::shldl(Register dst, Register src) { 3696 emit_byte(0x0F); 3697 emit_byte(0xA5); 3698 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3699 } 3700 3701 void Assembler::shrdl(Register dst, Register src) { 3702 emit_byte(0x0F); 3703 emit_byte(0xAD); 3704 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3705 } 3706 3707 #else // LP64 3708 3709 void Assembler::set_byte_if_not_zero(Register dst) { 3710 int enc = prefix_and_encode(dst->encoding(), true); 3711 emit_byte(0x0F); 3712 emit_byte(0x95); 3713 emit_byte(0xE0 | enc); 3714 } 3715 3716 // 64bit only pieces of the assembler 3717 // This should only be used by 64bit instructions that can use rip-relative 3718 // it cannot be used by instructions that want an immediate value. 3719 3720 bool Assembler::reachable(AddressLiteral adr) { 3721 int64_t disp; 3722 // None will force a 64bit literal to the code stream. Likely a placeholder 3723 // for something that will be patched later and we need to certain it will 3724 // always be reachable. 3725 if (adr.reloc() == relocInfo::none) { 3726 return false; 3727 } 3728 if (adr.reloc() == relocInfo::internal_word_type) { 3729 // This should be rip relative and easily reachable. 3730 return true; 3731 } 3732 if (adr.reloc() == relocInfo::virtual_call_type || 3733 adr.reloc() == relocInfo::opt_virtual_call_type || 3734 adr.reloc() == relocInfo::static_call_type || 3735 adr.reloc() == relocInfo::static_stub_type ) { 3736 // This should be rip relative within the code cache and easily 3737 // reachable until we get huge code caches. (At which point 3738 // ic code is going to have issues). 3739 return true; 3740 } 3741 if (adr.reloc() != relocInfo::external_word_type && 3742 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special 3743 adr.reloc() != relocInfo::poll_type && // relocs to identify them 3744 adr.reloc() != relocInfo::runtime_call_type ) { 3745 return false; 3746 } 3747 3748 // Stress the correction code 3749 if (ForceUnreachable) { 3750 // Must be runtimecall reloc, see if it is in the codecache 3751 // Flipping stuff in the codecache to be unreachable causes issues 3752 // with things like inline caches where the additional instructions 3753 // are not handled. 3754 if (CodeCache::find_blob(adr._target) == NULL) { 3755 return false; 3756 } 3757 } 3758 // For external_word_type/runtime_call_type if it is reachable from where we 3759 // are now (possibly a temp buffer) and where we might end up 3760 // anywhere in the codeCache then we are always reachable. 3761 // This would have to change if we ever save/restore shared code 3762 // to be more pessimistic. 3763 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int)); 3764 if (!is_simm32(disp)) return false; 3765 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int)); 3766 if (!is_simm32(disp)) return false; 3767 3768 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int)); 3769 3770 // Because rip relative is a disp + address_of_next_instruction and we 3771 // don't know the value of address_of_next_instruction we apply a fudge factor 3772 // to make sure we will be ok no matter the size of the instruction we get placed into. 3773 // We don't have to fudge the checks above here because they are already worst case. 3774 3775 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal 3776 // + 4 because better safe than sorry. 3777 const int fudge = 12 + 4; 3778 if (disp < 0) { 3779 disp -= fudge; 3780 } else { 3781 disp += fudge; 3782 } 3783 return is_simm32(disp); 3784 } 3785 3786 // Check if the polling page is not reachable from the code cache using rip-relative 3787 // addressing. 3788 bool Assembler::is_polling_page_far() { 3789 intptr_t addr = (intptr_t)os::get_polling_page(); 3790 return ForceUnreachable || 3791 !is_simm32(addr - (intptr_t)CodeCache::low_bound()) || 3792 !is_simm32(addr - (intptr_t)CodeCache::high_bound()); 3793 } 3794 3795 void Assembler::emit_data64(jlong data, 3796 relocInfo::relocType rtype, 3797 int format) { 3798 if (rtype == relocInfo::none) { 3799 emit_long64(data); 3800 } else { 3801 emit_data64(data, Relocation::spec_simple(rtype), format); 3802 } 3803 } 3804 3805 void Assembler::emit_data64(jlong data, 3806 RelocationHolder const& rspec, 3807 int format) { 3808 assert(imm_operand == 0, "default format must be immediate in this file"); 3809 assert(imm_operand == format, "must be immediate"); 3810 assert(inst_mark() != NULL, "must be inside InstructionMark"); 3811 // Do not use AbstractAssembler::relocate, which is not intended for 3812 // embedded words. Instead, relocate to the enclosing instruction. 3813 code_section()->relocate(inst_mark(), rspec, format); 3814 #ifdef ASSERT 3815 check_relocation(rspec, format); 3816 #endif 3817 emit_long64(data); 3818 } 3819 3820 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { 3821 if (reg_enc >= 8) { 3822 prefix(REX_B); 3823 reg_enc -= 8; 3824 } else if (byteinst && reg_enc >= 4) { 3825 prefix(REX); 3826 } 3827 return reg_enc; 3828 } 3829 3830 int Assembler::prefixq_and_encode(int reg_enc) { 3831 if (reg_enc < 8) { 3832 prefix(REX_W); 3833 } else { 3834 prefix(REX_WB); 3835 reg_enc -= 8; 3836 } 3837 return reg_enc; 3838 } 3839 3840 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { 3841 if (dst_enc < 8) { 3842 if (src_enc >= 8) { 3843 prefix(REX_B); 3844 src_enc -= 8; 3845 } else if (byteinst && src_enc >= 4) { 3846 prefix(REX); 3847 } 3848 } else { 3849 if (src_enc < 8) { 3850 prefix(REX_R); 3851 } else { 3852 prefix(REX_RB); 3853 src_enc -= 8; 3854 } 3855 dst_enc -= 8; 3856 } 3857 return dst_enc << 3 | src_enc; 3858 } 3859 3860 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { 3861 if (dst_enc < 8) { 3862 if (src_enc < 8) { 3863 prefix(REX_W); 3864 } else { 3865 prefix(REX_WB); 3866 src_enc -= 8; 3867 } 3868 } else { 3869 if (src_enc < 8) { 3870 prefix(REX_WR); 3871 } else { 3872 prefix(REX_WRB); 3873 src_enc -= 8; 3874 } 3875 dst_enc -= 8; 3876 } 3877 return dst_enc << 3 | src_enc; 3878 } 3879 3880 void Assembler::prefix(Register reg) { 3881 if (reg->encoding() >= 8) { 3882 prefix(REX_B); 3883 } 3884 } 3885 3886 void Assembler::prefix(Address adr) { 3887 if (adr.base_needs_rex()) { 3888 if (adr.index_needs_rex()) { 3889 prefix(REX_XB); 3890 } else { 3891 prefix(REX_B); 3892 } 3893 } else { 3894 if (adr.index_needs_rex()) { 3895 prefix(REX_X); 3896 } 3897 } 3898 } 3899 3900 void Assembler::prefixq(Address adr) { 3901 if (adr.base_needs_rex()) { 3902 if (adr.index_needs_rex()) { 3903 prefix(REX_WXB); 3904 } else { 3905 prefix(REX_WB); 3906 } 3907 } else { 3908 if (adr.index_needs_rex()) { 3909 prefix(REX_WX); 3910 } else { 3911 prefix(REX_W); 3912 } 3913 } 3914 } 3915 3916 3917 void Assembler::prefix(Address adr, Register reg, bool byteinst) { 3918 if (reg->encoding() < 8) { 3919 if (adr.base_needs_rex()) { 3920 if (adr.index_needs_rex()) { 3921 prefix(REX_XB); 3922 } else { 3923 prefix(REX_B); 3924 } 3925 } else { 3926 if (adr.index_needs_rex()) { 3927 prefix(REX_X); 3928 } else if (byteinst && reg->encoding() >= 4 ) { 3929 prefix(REX); 3930 } 3931 } 3932 } else { 3933 if (adr.base_needs_rex()) { 3934 if (adr.index_needs_rex()) { 3935 prefix(REX_RXB); 3936 } else { 3937 prefix(REX_RB); 3938 } 3939 } else { 3940 if (adr.index_needs_rex()) { 3941 prefix(REX_RX); 3942 } else { 3943 prefix(REX_R); 3944 } 3945 } 3946 } 3947 } 3948 3949 void Assembler::prefixq(Address adr, Register src) { 3950 if (src->encoding() < 8) { 3951 if (adr.base_needs_rex()) { 3952 if (adr.index_needs_rex()) { 3953 prefix(REX_WXB); 3954 } else { 3955 prefix(REX_WB); 3956 } 3957 } else { 3958 if (adr.index_needs_rex()) { 3959 prefix(REX_WX); 3960 } else { 3961 prefix(REX_W); 3962 } 3963 } 3964 } else { 3965 if (adr.base_needs_rex()) { 3966 if (adr.index_needs_rex()) { 3967 prefix(REX_WRXB); 3968 } else { 3969 prefix(REX_WRB); 3970 } 3971 } else { 3972 if (adr.index_needs_rex()) { 3973 prefix(REX_WRX); 3974 } else { 3975 prefix(REX_WR); 3976 } 3977 } 3978 } 3979 } 3980 3981 void Assembler::prefix(Address adr, XMMRegister reg) { 3982 if (reg->encoding() < 8) { 3983 if (adr.base_needs_rex()) { 3984 if (adr.index_needs_rex()) { 3985 prefix(REX_XB); 3986 } else { 3987 prefix(REX_B); 3988 } 3989 } else { 3990 if (adr.index_needs_rex()) { 3991 prefix(REX_X); 3992 } 3993 } 3994 } else { 3995 if (adr.base_needs_rex()) { 3996 if (adr.index_needs_rex()) { 3997 prefix(REX_RXB); 3998 } else { 3999 prefix(REX_RB); 4000 } 4001 } else { 4002 if (adr.index_needs_rex()) { 4003 prefix(REX_RX); 4004 } else { 4005 prefix(REX_R); 4006 } 4007 } 4008 } 4009 } 4010 4011 void Assembler::prefixq(Address adr, XMMRegister src) { 4012 if (src->encoding() < 8) { 4013 if (adr.base_needs_rex()) { 4014 if (adr.index_needs_rex()) { 4015 prefix(REX_WXB); 4016 } else { 4017 prefix(REX_WB); 4018 } 4019 } else { 4020 if (adr.index_needs_rex()) { 4021 prefix(REX_WX); 4022 } else { 4023 prefix(REX_W); 4024 } 4025 } 4026 } else { 4027 if (adr.base_needs_rex()) { 4028 if (adr.index_needs_rex()) { 4029 prefix(REX_WRXB); 4030 } else { 4031 prefix(REX_WRB); 4032 } 4033 } else { 4034 if (adr.index_needs_rex()) { 4035 prefix(REX_WRX); 4036 } else { 4037 prefix(REX_WR); 4038 } 4039 } 4040 } 4041 } 4042 4043 void Assembler::adcq(Register dst, int32_t imm32) { 4044 (void) prefixq_and_encode(dst->encoding()); 4045 emit_arith(0x81, 0xD0, dst, imm32); 4046 } 4047 4048 void Assembler::adcq(Register dst, Address src) { 4049 InstructionMark im(this); 4050 prefixq(src, dst); 4051 emit_byte(0x13); 4052 emit_operand(dst, src); 4053 } 4054 4055 void Assembler::adcq(Register dst, Register src) { 4056 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 4057 emit_arith(0x13, 0xC0, dst, src); 4058 } 4059 4060 void Assembler::addq(Address dst, int32_t imm32) { 4061 InstructionMark im(this); 4062 prefixq(dst); 4063 emit_arith_operand(0x81, rax, dst,imm32); 4064 } 4065 4066 void Assembler::addq(Address dst, Register src) { 4067 InstructionMark im(this); 4068 prefixq(dst, src); 4069 emit_byte(0x01); 4070 emit_operand(src, dst); 4071 } 4072 4073 void Assembler::addq(Register dst, int32_t imm32) { 4074 (void) prefixq_and_encode(dst->encoding()); 4075 emit_arith(0x81, 0xC0, dst, imm32); 4076 } 4077 4078 void Assembler::addq(Register dst, Address src) { 4079 InstructionMark im(this); 4080 prefixq(src, dst); 4081 emit_byte(0x03); 4082 emit_operand(dst, src); 4083 } 4084 4085 void Assembler::addq(Register dst, Register src) { 4086 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4087 emit_arith(0x03, 0xC0, dst, src); 4088 } 4089 4090 void Assembler::andq(Address dst, int32_t imm32) { 4091 InstructionMark im(this); 4092 prefixq(dst); 4093 emit_byte(0x81); 4094 emit_operand(rsp, dst, 4); 4095 emit_long(imm32); 4096 } 4097 4098 void Assembler::andq(Register dst, int32_t imm32) { 4099 (void) prefixq_and_encode(dst->encoding()); 4100 emit_arith(0x81, 0xE0, dst, imm32); 4101 } 4102 4103 void Assembler::andq(Register dst, Address src) { 4104 InstructionMark im(this); 4105 prefixq(src, dst); 4106 emit_byte(0x23); 4107 emit_operand(dst, src); 4108 } 4109 4110 void Assembler::andq(Register dst, Register src) { 4111 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 4112 emit_arith(0x23, 0xC0, dst, src); 4113 } 4114 4115 void Assembler::bsfq(Register dst, Register src) { 4116 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4117 emit_byte(0x0F); 4118 emit_byte(0xBC); 4119 emit_byte(0xC0 | encode); 4120 } 4121 4122 void Assembler::bsrq(Register dst, Register src) { 4123 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 4124 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4125 emit_byte(0x0F); 4126 emit_byte(0xBD); 4127 emit_byte(0xC0 | encode); 4128 } 4129 4130 void Assembler::bswapq(Register reg) { 4131 int encode = prefixq_and_encode(reg->encoding()); 4132 emit_byte(0x0F); 4133 emit_byte(0xC8 | encode); 4134 } 4135 4136 void Assembler::cdqq() { 4137 prefix(REX_W); 4138 emit_byte(0x99); 4139 } 4140 4141 void Assembler::clflush(Address adr) { 4142 prefix(adr); 4143 emit_byte(0x0F); 4144 emit_byte(0xAE); 4145 emit_operand(rdi, adr); 4146 } 4147 4148 void Assembler::cmovq(Condition cc, Register dst, Register src) { 4149 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4150 emit_byte(0x0F); 4151 emit_byte(0x40 | cc); 4152 emit_byte(0xC0 | encode); 4153 } 4154 4155 void Assembler::cmovq(Condition cc, Register dst, Address src) { 4156 InstructionMark im(this); 4157 prefixq(src, dst); 4158 emit_byte(0x0F); 4159 emit_byte(0x40 | cc); 4160 emit_operand(dst, src); 4161 } 4162 4163 void Assembler::cmpq(Address dst, int32_t imm32) { 4164 InstructionMark im(this); 4165 prefixq(dst); 4166 emit_byte(0x81); 4167 emit_operand(rdi, dst, 4); 4168 emit_long(imm32); 4169 } 4170 4171 void Assembler::cmpq(Register dst, int32_t imm32) { 4172 (void) prefixq_and_encode(dst->encoding()); 4173 emit_arith(0x81, 0xF8, dst, imm32); 4174 } 4175 4176 void Assembler::cmpq(Address dst, Register src) { 4177 InstructionMark im(this); 4178 prefixq(dst, src); 4179 emit_byte(0x3B); 4180 emit_operand(src, dst); 4181 } 4182 4183 void Assembler::cmpq(Register dst, Register src) { 4184 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4185 emit_arith(0x3B, 0xC0, dst, src); 4186 } 4187 4188 void Assembler::cmpq(Register dst, Address src) { 4189 InstructionMark im(this); 4190 prefixq(src, dst); 4191 emit_byte(0x3B); 4192 emit_operand(dst, src); 4193 } 4194 4195 void Assembler::cmpxchgq(Register reg, Address adr) { 4196 InstructionMark im(this); 4197 prefixq(adr, reg); 4198 emit_byte(0x0F); 4199 emit_byte(0xB1); 4200 emit_operand(reg, adr); 4201 } 4202 4203 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { 4204 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4205 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2); 4206 emit_byte(0x2A); 4207 emit_byte(0xC0 | encode); 4208 } 4209 4210 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { 4211 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4212 InstructionMark im(this); 4213 simd_prefix_q(dst, dst, src, VEX_SIMD_F2); 4214 emit_byte(0x2A); 4215 emit_operand(dst, src); 4216 } 4217 4218 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { 4219 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4220 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3); 4221 emit_byte(0x2A); 4222 emit_byte(0xC0 | encode); 4223 } 4224 4225 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { 4226 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4227 InstructionMark im(this); 4228 simd_prefix_q(dst, dst, src, VEX_SIMD_F3); 4229 emit_byte(0x2A); 4230 emit_operand(dst, src); 4231 } 4232 4233 void Assembler::cvttsd2siq(Register dst, XMMRegister src) { 4234 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4235 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2); 4236 emit_byte(0x2C); 4237 emit_byte(0xC0 | encode); 4238 } 4239 4240 void Assembler::cvttss2siq(Register dst, XMMRegister src) { 4241 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4242 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3); 4243 emit_byte(0x2C); 4244 emit_byte(0xC0 | encode); 4245 } 4246 4247 void Assembler::decl(Register dst) { 4248 // Don't use it directly. Use MacroAssembler::decrementl() instead. 4249 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 4250 int encode = prefix_and_encode(dst->encoding()); 4251 emit_byte(0xFF); 4252 emit_byte(0xC8 | encode); 4253 } 4254 4255 void Assembler::decq(Register dst) { 4256 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4257 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4258 int encode = prefixq_and_encode(dst->encoding()); 4259 emit_byte(0xFF); 4260 emit_byte(0xC8 | encode); 4261 } 4262 4263 void Assembler::decq(Address dst) { 4264 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4265 InstructionMark im(this); 4266 prefixq(dst); 4267 emit_byte(0xFF); 4268 emit_operand(rcx, dst); 4269 } 4270 4271 void Assembler::fxrstor(Address src) { 4272 prefixq(src); 4273 emit_byte(0x0F); 4274 emit_byte(0xAE); 4275 emit_operand(as_Register(1), src); 4276 } 4277 4278 void Assembler::fxsave(Address dst) { 4279 prefixq(dst); 4280 emit_byte(0x0F); 4281 emit_byte(0xAE); 4282 emit_operand(as_Register(0), dst); 4283 } 4284 4285 void Assembler::idivq(Register src) { 4286 int encode = prefixq_and_encode(src->encoding()); 4287 emit_byte(0xF7); 4288 emit_byte(0xF8 | encode); 4289 } 4290 4291 void Assembler::imulq(Register dst, Register src) { 4292 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4293 emit_byte(0x0F); 4294 emit_byte(0xAF); 4295 emit_byte(0xC0 | encode); 4296 } 4297 4298 void Assembler::imulq(Register dst, Register src, int value) { 4299 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4300 if (is8bit(value)) { 4301 emit_byte(0x6B); 4302 emit_byte(0xC0 | encode); 4303 emit_byte(value & 0xFF); 4304 } else { 4305 emit_byte(0x69); 4306 emit_byte(0xC0 | encode); 4307 emit_long(value); 4308 } 4309 } 4310 4311 void Assembler::incl(Register dst) { 4312 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4313 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4314 int encode = prefix_and_encode(dst->encoding()); 4315 emit_byte(0xFF); 4316 emit_byte(0xC0 | encode); 4317 } 4318 4319 void Assembler::incq(Register dst) { 4320 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4321 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4322 int encode = prefixq_and_encode(dst->encoding()); 4323 emit_byte(0xFF); 4324 emit_byte(0xC0 | encode); 4325 } 4326 4327 void Assembler::incq(Address dst) { 4328 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4329 InstructionMark im(this); 4330 prefixq(dst); 4331 emit_byte(0xFF); 4332 emit_operand(rax, dst); 4333 } 4334 4335 void Assembler::lea(Register dst, Address src) { 4336 leaq(dst, src); 4337 } 4338 4339 void Assembler::leaq(Register dst, Address src) { 4340 InstructionMark im(this); 4341 prefixq(src, dst); 4342 emit_byte(0x8D); 4343 emit_operand(dst, src); 4344 } 4345 4346 void Assembler::mov64(Register dst, int64_t imm64) { 4347 InstructionMark im(this); 4348 int encode = prefixq_and_encode(dst->encoding()); 4349 emit_byte(0xB8 | encode); 4350 emit_long64(imm64); 4351 } 4352 4353 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { 4354 InstructionMark im(this); 4355 int encode = prefixq_and_encode(dst->encoding()); 4356 emit_byte(0xB8 | encode); 4357 emit_data64(imm64, rspec); 4358 } 4359 4360 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4361 InstructionMark im(this); 4362 int encode = prefix_and_encode(dst->encoding()); 4363 emit_byte(0xB8 | encode); 4364 emit_data((int)imm32, rspec, narrow_oop_operand); 4365 } 4366 4367 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4368 InstructionMark im(this); 4369 prefix(dst); 4370 emit_byte(0xC7); 4371 emit_operand(rax, dst, 4); 4372 emit_data((int)imm32, rspec, narrow_oop_operand); 4373 } 4374 4375 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) { 4376 InstructionMark im(this); 4377 int encode = prefix_and_encode(src1->encoding()); 4378 emit_byte(0x81); 4379 emit_byte(0xF8 | encode); 4380 emit_data((int)imm32, rspec, narrow_oop_operand); 4381 } 4382 4383 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) { 4384 InstructionMark im(this); 4385 prefix(src1); 4386 emit_byte(0x81); 4387 emit_operand(rax, src1, 4); 4388 emit_data((int)imm32, rspec, narrow_oop_operand); 4389 } 4390 4391 void Assembler::lzcntq(Register dst, Register src) { 4392 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 4393 emit_byte(0xF3); 4394 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4395 emit_byte(0x0F); 4396 emit_byte(0xBD); 4397 emit_byte(0xC0 | encode); 4398 } 4399 4400 void Assembler::movdq(XMMRegister dst, Register src) { 4401 // table D-1 says MMX/SSE2 4402 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4403 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66); 4404 emit_byte(0x6E); 4405 emit_byte(0xC0 | encode); 4406 } 4407 4408 void Assembler::movdq(Register dst, XMMRegister src) { 4409 // table D-1 says MMX/SSE2 4410 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4411 // swap src/dst to get correct prefix 4412 int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66); 4413 emit_byte(0x7E); 4414 emit_byte(0xC0 | encode); 4415 } 4416 4417 void Assembler::movq(Register dst, Register src) { 4418 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4419 emit_byte(0x8B); 4420 emit_byte(0xC0 | encode); 4421 } 4422 4423 void Assembler::movq(Register dst, Address src) { 4424 InstructionMark im(this); 4425 prefixq(src, dst); 4426 emit_byte(0x8B); 4427 emit_operand(dst, src); 4428 } 4429 4430 void Assembler::movq(Address dst, Register src) { 4431 InstructionMark im(this); 4432 prefixq(dst, src); 4433 emit_byte(0x89); 4434 emit_operand(src, dst); 4435 } 4436 4437 void Assembler::movsbq(Register dst, Address src) { 4438 InstructionMark im(this); 4439 prefixq(src, dst); 4440 emit_byte(0x0F); 4441 emit_byte(0xBE); 4442 emit_operand(dst, src); 4443 } 4444 4445 void Assembler::movsbq(Register dst, Register src) { 4446 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4447 emit_byte(0x0F); 4448 emit_byte(0xBE); 4449 emit_byte(0xC0 | encode); 4450 } 4451 4452 void Assembler::movslq(Register dst, int32_t imm32) { 4453 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx) 4454 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx) 4455 // as a result we shouldn't use until tested at runtime... 4456 ShouldNotReachHere(); 4457 InstructionMark im(this); 4458 int encode = prefixq_and_encode(dst->encoding()); 4459 emit_byte(0xC7 | encode); 4460 emit_long(imm32); 4461 } 4462 4463 void Assembler::movslq(Address dst, int32_t imm32) { 4464 assert(is_simm32(imm32), "lost bits"); 4465 InstructionMark im(this); 4466 prefixq(dst); 4467 emit_byte(0xC7); 4468 emit_operand(rax, dst, 4); 4469 emit_long(imm32); 4470 } 4471 4472 void Assembler::movslq(Register dst, Address src) { 4473 InstructionMark im(this); 4474 prefixq(src, dst); 4475 emit_byte(0x63); 4476 emit_operand(dst, src); 4477 } 4478 4479 void Assembler::movslq(Register dst, Register src) { 4480 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4481 emit_byte(0x63); 4482 emit_byte(0xC0 | encode); 4483 } 4484 4485 void Assembler::movswq(Register dst, Address src) { 4486 InstructionMark im(this); 4487 prefixq(src, dst); 4488 emit_byte(0x0F); 4489 emit_byte(0xBF); 4490 emit_operand(dst, src); 4491 } 4492 4493 void Assembler::movswq(Register dst, Register src) { 4494 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4495 emit_byte(0x0F); 4496 emit_byte(0xBF); 4497 emit_byte(0xC0 | encode); 4498 } 4499 4500 void Assembler::movzbq(Register dst, Address src) { 4501 InstructionMark im(this); 4502 prefixq(src, dst); 4503 emit_byte(0x0F); 4504 emit_byte(0xB6); 4505 emit_operand(dst, src); 4506 } 4507 4508 void Assembler::movzbq(Register dst, Register src) { 4509 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4510 emit_byte(0x0F); 4511 emit_byte(0xB6); 4512 emit_byte(0xC0 | encode); 4513 } 4514 4515 void Assembler::movzwq(Register dst, Address src) { 4516 InstructionMark im(this); 4517 prefixq(src, dst); 4518 emit_byte(0x0F); 4519 emit_byte(0xB7); 4520 emit_operand(dst, src); 4521 } 4522 4523 void Assembler::movzwq(Register dst, Register src) { 4524 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4525 emit_byte(0x0F); 4526 emit_byte(0xB7); 4527 emit_byte(0xC0 | encode); 4528 } 4529 4530 void Assembler::negq(Register dst) { 4531 int encode = prefixq_and_encode(dst->encoding()); 4532 emit_byte(0xF7); 4533 emit_byte(0xD8 | encode); 4534 } 4535 4536 void Assembler::notq(Register dst) { 4537 int encode = prefixq_and_encode(dst->encoding()); 4538 emit_byte(0xF7); 4539 emit_byte(0xD0 | encode); 4540 } 4541 4542 void Assembler::orq(Address dst, int32_t imm32) { 4543 InstructionMark im(this); 4544 prefixq(dst); 4545 emit_byte(0x81); 4546 emit_operand(rcx, dst, 4); 4547 emit_long(imm32); 4548 } 4549 4550 void Assembler::orq(Register dst, int32_t imm32) { 4551 (void) prefixq_and_encode(dst->encoding()); 4552 emit_arith(0x81, 0xC8, dst, imm32); 4553 } 4554 4555 void Assembler::orq(Register dst, Address src) { 4556 InstructionMark im(this); 4557 prefixq(src, dst); 4558 emit_byte(0x0B); 4559 emit_operand(dst, src); 4560 } 4561 4562 void Assembler::orq(Register dst, Register src) { 4563 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4564 emit_arith(0x0B, 0xC0, dst, src); 4565 } 4566 4567 void Assembler::popa() { // 64bit 4568 movq(r15, Address(rsp, 0)); 4569 movq(r14, Address(rsp, wordSize)); 4570 movq(r13, Address(rsp, 2 * wordSize)); 4571 movq(r12, Address(rsp, 3 * wordSize)); 4572 movq(r11, Address(rsp, 4 * wordSize)); 4573 movq(r10, Address(rsp, 5 * wordSize)); 4574 movq(r9, Address(rsp, 6 * wordSize)); 4575 movq(r8, Address(rsp, 7 * wordSize)); 4576 movq(rdi, Address(rsp, 8 * wordSize)); 4577 movq(rsi, Address(rsp, 9 * wordSize)); 4578 movq(rbp, Address(rsp, 10 * wordSize)); 4579 // skip rsp 4580 movq(rbx, Address(rsp, 12 * wordSize)); 4581 movq(rdx, Address(rsp, 13 * wordSize)); 4582 movq(rcx, Address(rsp, 14 * wordSize)); 4583 movq(rax, Address(rsp, 15 * wordSize)); 4584 4585 addq(rsp, 16 * wordSize); 4586 } 4587 4588 void Assembler::popcntq(Register dst, Address src) { 4589 assert(VM_Version::supports_popcnt(), "must support"); 4590 InstructionMark im(this); 4591 emit_byte(0xF3); 4592 prefixq(src, dst); 4593 emit_byte(0x0F); 4594 emit_byte(0xB8); 4595 emit_operand(dst, src); 4596 } 4597 4598 void Assembler::popcntq(Register dst, Register src) { 4599 assert(VM_Version::supports_popcnt(), "must support"); 4600 emit_byte(0xF3); 4601 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4602 emit_byte(0x0F); 4603 emit_byte(0xB8); 4604 emit_byte(0xC0 | encode); 4605 } 4606 4607 void Assembler::popq(Address dst) { 4608 InstructionMark im(this); 4609 prefixq(dst); 4610 emit_byte(0x8F); 4611 emit_operand(rax, dst); 4612 } 4613 4614 void Assembler::pusha() { // 64bit 4615 // we have to store original rsp. ABI says that 128 bytes 4616 // below rsp are local scratch. 4617 movq(Address(rsp, -5 * wordSize), rsp); 4618 4619 subq(rsp, 16 * wordSize); 4620 4621 movq(Address(rsp, 15 * wordSize), rax); 4622 movq(Address(rsp, 14 * wordSize), rcx); 4623 movq(Address(rsp, 13 * wordSize), rdx); 4624 movq(Address(rsp, 12 * wordSize), rbx); 4625 // skip rsp 4626 movq(Address(rsp, 10 * wordSize), rbp); 4627 movq(Address(rsp, 9 * wordSize), rsi); 4628 movq(Address(rsp, 8 * wordSize), rdi); 4629 movq(Address(rsp, 7 * wordSize), r8); 4630 movq(Address(rsp, 6 * wordSize), r9); 4631 movq(Address(rsp, 5 * wordSize), r10); 4632 movq(Address(rsp, 4 * wordSize), r11); 4633 movq(Address(rsp, 3 * wordSize), r12); 4634 movq(Address(rsp, 2 * wordSize), r13); 4635 movq(Address(rsp, wordSize), r14); 4636 movq(Address(rsp, 0), r15); 4637 } 4638 4639 void Assembler::pushq(Address src) { 4640 InstructionMark im(this); 4641 prefixq(src); 4642 emit_byte(0xFF); 4643 emit_operand(rsi, src); 4644 } 4645 4646 void Assembler::rclq(Register dst, int imm8) { 4647 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4648 int encode = prefixq_and_encode(dst->encoding()); 4649 if (imm8 == 1) { 4650 emit_byte(0xD1); 4651 emit_byte(0xD0 | encode); 4652 } else { 4653 emit_byte(0xC1); 4654 emit_byte(0xD0 | encode); 4655 emit_byte(imm8); 4656 } 4657 } 4658 void Assembler::sarq(Register dst, int imm8) { 4659 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4660 int encode = prefixq_and_encode(dst->encoding()); 4661 if (imm8 == 1) { 4662 emit_byte(0xD1); 4663 emit_byte(0xF8 | encode); 4664 } else { 4665 emit_byte(0xC1); 4666 emit_byte(0xF8 | encode); 4667 emit_byte(imm8); 4668 } 4669 } 4670 4671 void Assembler::sarq(Register dst) { 4672 int encode = prefixq_and_encode(dst->encoding()); 4673 emit_byte(0xD3); 4674 emit_byte(0xF8 | encode); 4675 } 4676 4677 void Assembler::sbbq(Address dst, int32_t imm32) { 4678 InstructionMark im(this); 4679 prefixq(dst); 4680 emit_arith_operand(0x81, rbx, dst, imm32); 4681 } 4682 4683 void Assembler::sbbq(Register dst, int32_t imm32) { 4684 (void) prefixq_and_encode(dst->encoding()); 4685 emit_arith(0x81, 0xD8, dst, imm32); 4686 } 4687 4688 void Assembler::sbbq(Register dst, Address src) { 4689 InstructionMark im(this); 4690 prefixq(src, dst); 4691 emit_byte(0x1B); 4692 emit_operand(dst, src); 4693 } 4694 4695 void Assembler::sbbq(Register dst, Register src) { 4696 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4697 emit_arith(0x1B, 0xC0, dst, src); 4698 } 4699 4700 void Assembler::shlq(Register dst, int imm8) { 4701 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4702 int encode = prefixq_and_encode(dst->encoding()); 4703 if (imm8 == 1) { 4704 emit_byte(0xD1); 4705 emit_byte(0xE0 | encode); 4706 } else { 4707 emit_byte(0xC1); 4708 emit_byte(0xE0 | encode); 4709 emit_byte(imm8); 4710 } 4711 } 4712 4713 void Assembler::shlq(Register dst) { 4714 int encode = prefixq_and_encode(dst->encoding()); 4715 emit_byte(0xD3); 4716 emit_byte(0xE0 | encode); 4717 } 4718 4719 void Assembler::shrq(Register dst, int imm8) { 4720 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4721 int encode = prefixq_and_encode(dst->encoding()); 4722 emit_byte(0xC1); 4723 emit_byte(0xE8 | encode); 4724 emit_byte(imm8); 4725 } 4726 4727 void Assembler::shrq(Register dst) { 4728 int encode = prefixq_and_encode(dst->encoding()); 4729 emit_byte(0xD3); 4730 emit_byte(0xE8 | encode); 4731 } 4732 4733 void Assembler::subq(Address dst, int32_t imm32) { 4734 InstructionMark im(this); 4735 prefixq(dst); 4736 emit_arith_operand(0x81, rbp, dst, imm32); 4737 } 4738 4739 void Assembler::subq(Address dst, Register src) { 4740 InstructionMark im(this); 4741 prefixq(dst, src); 4742 emit_byte(0x29); 4743 emit_operand(src, dst); 4744 } 4745 4746 void Assembler::subq(Register dst, int32_t imm32) { 4747 (void) prefixq_and_encode(dst->encoding()); 4748 emit_arith(0x81, 0xE8, dst, imm32); 4749 } 4750 4751 void Assembler::subq(Register dst, Address src) { 4752 InstructionMark im(this); 4753 prefixq(src, dst); 4754 emit_byte(0x2B); 4755 emit_operand(dst, src); 4756 } 4757 4758 void Assembler::subq(Register dst, Register src) { 4759 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4760 emit_arith(0x2B, 0xC0, dst, src); 4761 } 4762 4763 void Assembler::testq(Register dst, int32_t imm32) { 4764 // not using emit_arith because test 4765 // doesn't support sign-extension of 4766 // 8bit operands 4767 int encode = dst->encoding(); 4768 if (encode == 0) { 4769 prefix(REX_W); 4770 emit_byte(0xA9); 4771 } else { 4772 encode = prefixq_and_encode(encode); 4773 emit_byte(0xF7); 4774 emit_byte(0xC0 | encode); 4775 } 4776 emit_long(imm32); 4777 } 4778 4779 void Assembler::testq(Register dst, Register src) { 4780 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4781 emit_arith(0x85, 0xC0, dst, src); 4782 } 4783 4784 void Assembler::xaddq(Address dst, Register src) { 4785 InstructionMark im(this); 4786 prefixq(dst, src); 4787 emit_byte(0x0F); 4788 emit_byte(0xC1); 4789 emit_operand(src, dst); 4790 } 4791 4792 void Assembler::xchgq(Register dst, Address src) { 4793 InstructionMark im(this); 4794 prefixq(src, dst); 4795 emit_byte(0x87); 4796 emit_operand(dst, src); 4797 } 4798 4799 void Assembler::xchgq(Register dst, Register src) { 4800 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4801 emit_byte(0x87); 4802 emit_byte(0xc0 | encode); 4803 } 4804 4805 void Assembler::xorq(Register dst, Register src) { 4806 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4807 emit_arith(0x33, 0xC0, dst, src); 4808 } 4809 4810 void Assembler::xorq(Register dst, Address src) { 4811 InstructionMark im(this); 4812 prefixq(src, dst); 4813 emit_byte(0x33); 4814 emit_operand(dst, src); 4815 } 4816 4817 #endif // !LP64 4818 4819 static Assembler::Condition reverse[] = { 4820 Assembler::noOverflow /* overflow = 0x0 */ , 4821 Assembler::overflow /* noOverflow = 0x1 */ , 4822 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 4823 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 4824 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 4825 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 4826 Assembler::above /* belowEqual = 0x6 */ , 4827 Assembler::belowEqual /* above = 0x7 */ , 4828 Assembler::positive /* negative = 0x8 */ , 4829 Assembler::negative /* positive = 0x9 */ , 4830 Assembler::noParity /* parity = 0xa */ , 4831 Assembler::parity /* noParity = 0xb */ , 4832 Assembler::greaterEqual /* less = 0xc */ , 4833 Assembler::less /* greaterEqual = 0xd */ , 4834 Assembler::greater /* lessEqual = 0xe */ , 4835 Assembler::lessEqual /* greater = 0xf, */ 4836 4837 }; 4838 4839 4840 // Implementation of MacroAssembler 4841 4842 // First all the versions that have distinct versions depending on 32/64 bit 4843 // Unless the difference is trivial (1 line or so). 4844 4845 #ifndef _LP64 4846 4847 // 32bit versions 4848 4849 Address MacroAssembler::as_Address(AddressLiteral adr) { 4850 return Address(adr.target(), adr.rspec()); 4851 } 4852 4853 Address MacroAssembler::as_Address(ArrayAddress adr) { 4854 return Address::make_array(adr); 4855 } 4856 4857 int MacroAssembler::biased_locking_enter(Register lock_reg, 4858 Register obj_reg, 4859 Register swap_reg, 4860 Register tmp_reg, 4861 bool swap_reg_contains_mark, 4862 Label& done, 4863 Label* slow_case, 4864 BiasedLockingCounters* counters) { 4865 assert(UseBiasedLocking, "why call this otherwise?"); 4866 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 4867 assert_different_registers(lock_reg, obj_reg, swap_reg); 4868 4869 if (PrintBiasedLockingStatistics && counters == NULL) 4870 counters = BiasedLocking::counters(); 4871 4872 bool need_tmp_reg = false; 4873 if (tmp_reg == noreg) { 4874 need_tmp_reg = true; 4875 tmp_reg = lock_reg; 4876 } else { 4877 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 4878 } 4879 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 4880 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 4881 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 4882 Address saved_mark_addr(lock_reg, 0); 4883 4884 // Biased locking 4885 // See whether the lock is currently biased toward our thread and 4886 // whether the epoch is still valid 4887 // Note that the runtime guarantees sufficient alignment of JavaThread 4888 // pointers to allow age to be placed into low bits 4889 // First check to see whether biasing is even enabled for this object 4890 Label cas_label; 4891 int null_check_offset = -1; 4892 if (!swap_reg_contains_mark) { 4893 null_check_offset = offset(); 4894 movl(swap_reg, mark_addr); 4895 } 4896 if (need_tmp_reg) { 4897 push(tmp_reg); 4898 } 4899 movl(tmp_reg, swap_reg); 4900 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 4901 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 4902 if (need_tmp_reg) { 4903 pop(tmp_reg); 4904 } 4905 jcc(Assembler::notEqual, cas_label); 4906 // The bias pattern is present in the object's header. Need to check 4907 // whether the bias owner and the epoch are both still current. 4908 // Note that because there is no current thread register on x86 we 4909 // need to store off the mark word we read out of the object to 4910 // avoid reloading it and needing to recheck invariants below. This 4911 // store is unfortunate but it makes the overall code shorter and 4912 // simpler. 4913 movl(saved_mark_addr, swap_reg); 4914 if (need_tmp_reg) { 4915 push(tmp_reg); 4916 } 4917 get_thread(tmp_reg); 4918 xorl(swap_reg, tmp_reg); 4919 if (swap_reg_contains_mark) { 4920 null_check_offset = offset(); 4921 } 4922 movl(tmp_reg, klass_addr); 4923 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4924 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 4925 if (need_tmp_reg) { 4926 pop(tmp_reg); 4927 } 4928 if (counters != NULL) { 4929 cond_inc32(Assembler::zero, 4930 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 4931 } 4932 jcc(Assembler::equal, done); 4933 4934 Label try_revoke_bias; 4935 Label try_rebias; 4936 4937 // At this point we know that the header has the bias pattern and 4938 // that we are not the bias owner in the current epoch. We need to 4939 // figure out more details about the state of the header in order to 4940 // know what operations can be legally performed on the object's 4941 // header. 4942 4943 // If the low three bits in the xor result aren't clear, that means 4944 // the prototype header is no longer biased and we have to revoke 4945 // the bias on this object. 4946 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 4947 jcc(Assembler::notZero, try_revoke_bias); 4948 4949 // Biasing is still enabled for this data type. See whether the 4950 // epoch of the current bias is still valid, meaning that the epoch 4951 // bits of the mark word are equal to the epoch bits of the 4952 // prototype header. (Note that the prototype header's epoch bits 4953 // only change at a safepoint.) If not, attempt to rebias the object 4954 // toward the current thread. Note that we must be absolutely sure 4955 // that the current epoch is invalid in order to do this because 4956 // otherwise the manipulations it performs on the mark word are 4957 // illegal. 4958 testl(swap_reg, markOopDesc::epoch_mask_in_place); 4959 jcc(Assembler::notZero, try_rebias); 4960 4961 // The epoch of the current bias is still valid but we know nothing 4962 // about the owner; it might be set or it might be clear. Try to 4963 // acquire the bias of the object using an atomic operation. If this 4964 // fails we will go in to the runtime to revoke the object's bias. 4965 // Note that we first construct the presumed unbiased header so we 4966 // don't accidentally blow away another thread's valid bias. 4967 movl(swap_reg, saved_mark_addr); 4968 andl(swap_reg, 4969 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 4970 if (need_tmp_reg) { 4971 push(tmp_reg); 4972 } 4973 get_thread(tmp_reg); 4974 orl(tmp_reg, swap_reg); 4975 if (os::is_MP()) { 4976 lock(); 4977 } 4978 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4979 if (need_tmp_reg) { 4980 pop(tmp_reg); 4981 } 4982 // If the biasing toward our thread failed, this means that 4983 // another thread succeeded in biasing it toward itself and we 4984 // need to revoke that bias. The revocation will occur in the 4985 // interpreter runtime in the slow case. 4986 if (counters != NULL) { 4987 cond_inc32(Assembler::zero, 4988 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 4989 } 4990 if (slow_case != NULL) { 4991 jcc(Assembler::notZero, *slow_case); 4992 } 4993 jmp(done); 4994 4995 bind(try_rebias); 4996 // At this point we know the epoch has expired, meaning that the 4997 // current "bias owner", if any, is actually invalid. Under these 4998 // circumstances _only_, we are allowed to use the current header's 4999 // value as the comparison value when doing the cas to acquire the 5000 // bias in the current epoch. In other words, we allow transfer of 5001 // the bias from one thread to another directly in this situation. 5002 // 5003 // FIXME: due to a lack of registers we currently blow away the age 5004 // bits in this situation. Should attempt to preserve them. 5005 if (need_tmp_reg) { 5006 push(tmp_reg); 5007 } 5008 get_thread(tmp_reg); 5009 movl(swap_reg, klass_addr); 5010 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 5011 movl(swap_reg, saved_mark_addr); 5012 if (os::is_MP()) { 5013 lock(); 5014 } 5015 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5016 if (need_tmp_reg) { 5017 pop(tmp_reg); 5018 } 5019 // If the biasing toward our thread failed, then another thread 5020 // succeeded in biasing it toward itself and we need to revoke that 5021 // bias. The revocation will occur in the runtime in the slow case. 5022 if (counters != NULL) { 5023 cond_inc32(Assembler::zero, 5024 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 5025 } 5026 if (slow_case != NULL) { 5027 jcc(Assembler::notZero, *slow_case); 5028 } 5029 jmp(done); 5030 5031 bind(try_revoke_bias); 5032 // The prototype mark in the klass doesn't have the bias bit set any 5033 // more, indicating that objects of this data type are not supposed 5034 // to be biased any more. We are going to try to reset the mark of 5035 // this object to the prototype value and fall through to the 5036 // CAS-based locking scheme. Note that if our CAS fails, it means 5037 // that another thread raced us for the privilege of revoking the 5038 // bias of this particular object, so it's okay to continue in the 5039 // normal locking code. 5040 // 5041 // FIXME: due to a lack of registers we currently blow away the age 5042 // bits in this situation. Should attempt to preserve them. 5043 movl(swap_reg, saved_mark_addr); 5044 if (need_tmp_reg) { 5045 push(tmp_reg); 5046 } 5047 movl(tmp_reg, klass_addr); 5048 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 5049 if (os::is_MP()) { 5050 lock(); 5051 } 5052 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5053 if (need_tmp_reg) { 5054 pop(tmp_reg); 5055 } 5056 // Fall through to the normal CAS-based lock, because no matter what 5057 // the result of the above CAS, some thread must have succeeded in 5058 // removing the bias bit from the object's header. 5059 if (counters != NULL) { 5060 cond_inc32(Assembler::zero, 5061 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 5062 } 5063 5064 bind(cas_label); 5065 5066 return null_check_offset; 5067 } 5068 void MacroAssembler::call_VM_leaf_base(address entry_point, 5069 int number_of_arguments) { 5070 call(RuntimeAddress(entry_point)); 5071 increment(rsp, number_of_arguments * wordSize); 5072 } 5073 5074 void MacroAssembler::cmpoop(Address src1, jobject obj) { 5075 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5076 } 5077 5078 void MacroAssembler::cmpoop(Register src1, jobject obj) { 5079 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5080 } 5081 5082 void MacroAssembler::extend_sign(Register hi, Register lo) { 5083 // According to Intel Doc. AP-526, "Integer Divide", p.18. 5084 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 5085 cdql(); 5086 } else { 5087 movl(hi, lo); 5088 sarl(hi, 31); 5089 } 5090 } 5091 5092 void MacroAssembler::fat_nop() { 5093 // A 5 byte nop that is safe for patching (see patch_verified_entry) 5094 emit_byte(0x26); // es: 5095 emit_byte(0x2e); // cs: 5096 emit_byte(0x64); // fs: 5097 emit_byte(0x65); // gs: 5098 emit_byte(0x90); 5099 } 5100 5101 void MacroAssembler::jC2(Register tmp, Label& L) { 5102 // set parity bit if FPU flag C2 is set (via rax) 5103 save_rax(tmp); 5104 fwait(); fnstsw_ax(); 5105 sahf(); 5106 restore_rax(tmp); 5107 // branch 5108 jcc(Assembler::parity, L); 5109 } 5110 5111 void MacroAssembler::jnC2(Register tmp, Label& L) { 5112 // set parity bit if FPU flag C2 is set (via rax) 5113 save_rax(tmp); 5114 fwait(); fnstsw_ax(); 5115 sahf(); 5116 restore_rax(tmp); 5117 // branch 5118 jcc(Assembler::noParity, L); 5119 } 5120 5121 // 32bit can do a case table jump in one instruction but we no longer allow the base 5122 // to be installed in the Address class 5123 void MacroAssembler::jump(ArrayAddress entry) { 5124 jmp(as_Address(entry)); 5125 } 5126 5127 // Note: y_lo will be destroyed 5128 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5129 // Long compare for Java (semantics as described in JVM spec.) 5130 Label high, low, done; 5131 5132 cmpl(x_hi, y_hi); 5133 jcc(Assembler::less, low); 5134 jcc(Assembler::greater, high); 5135 // x_hi is the return register 5136 xorl(x_hi, x_hi); 5137 cmpl(x_lo, y_lo); 5138 jcc(Assembler::below, low); 5139 jcc(Assembler::equal, done); 5140 5141 bind(high); 5142 xorl(x_hi, x_hi); 5143 increment(x_hi); 5144 jmp(done); 5145 5146 bind(low); 5147 xorl(x_hi, x_hi); 5148 decrementl(x_hi); 5149 5150 bind(done); 5151 } 5152 5153 void MacroAssembler::lea(Register dst, AddressLiteral src) { 5154 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 5155 } 5156 5157 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5158 // leal(dst, as_Address(adr)); 5159 // see note in movl as to why we must use a move 5160 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 5161 } 5162 5163 void MacroAssembler::leave() { 5164 mov(rsp, rbp); 5165 pop(rbp); 5166 } 5167 5168 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 5169 // Multiplication of two Java long values stored on the stack 5170 // as illustrated below. Result is in rdx:rax. 5171 // 5172 // rsp ---> [ ?? ] \ \ 5173 // .... | y_rsp_offset | 5174 // [ y_lo ] / (in bytes) | x_rsp_offset 5175 // [ y_hi ] | (in bytes) 5176 // .... | 5177 // [ x_lo ] / 5178 // [ x_hi ] 5179 // .... 5180 // 5181 // Basic idea: lo(result) = lo(x_lo * y_lo) 5182 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 5183 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 5184 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 5185 Label quick; 5186 // load x_hi, y_hi and check if quick 5187 // multiplication is possible 5188 movl(rbx, x_hi); 5189 movl(rcx, y_hi); 5190 movl(rax, rbx); 5191 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 5192 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 5193 // do full multiplication 5194 // 1st step 5195 mull(y_lo); // x_hi * y_lo 5196 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 5197 // 2nd step 5198 movl(rax, x_lo); 5199 mull(rcx); // x_lo * y_hi 5200 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 5201 // 3rd step 5202 bind(quick); // note: rbx, = 0 if quick multiply! 5203 movl(rax, x_lo); 5204 mull(y_lo); // x_lo * y_lo 5205 addl(rdx, rbx); // correct hi(x_lo * y_lo) 5206 } 5207 5208 void MacroAssembler::lneg(Register hi, Register lo) { 5209 negl(lo); 5210 adcl(hi, 0); 5211 negl(hi); 5212 } 5213 5214 void MacroAssembler::lshl(Register hi, Register lo) { 5215 // Java shift left long support (semantics as described in JVM spec., p.305) 5216 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 5217 // shift value is in rcx ! 5218 assert(hi != rcx, "must not use rcx"); 5219 assert(lo != rcx, "must not use rcx"); 5220 const Register s = rcx; // shift count 5221 const int n = BitsPerWord; 5222 Label L; 5223 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5224 cmpl(s, n); // if (s < n) 5225 jcc(Assembler::less, L); // else (s >= n) 5226 movl(hi, lo); // x := x << n 5227 xorl(lo, lo); 5228 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5229 bind(L); // s (mod n) < n 5230 shldl(hi, lo); // x := x << s 5231 shll(lo); 5232 } 5233 5234 5235 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 5236 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 5237 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 5238 assert(hi != rcx, "must not use rcx"); 5239 assert(lo != rcx, "must not use rcx"); 5240 const Register s = rcx; // shift count 5241 const int n = BitsPerWord; 5242 Label L; 5243 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5244 cmpl(s, n); // if (s < n) 5245 jcc(Assembler::less, L); // else (s >= n) 5246 movl(lo, hi); // x := x >> n 5247 if (sign_extension) sarl(hi, 31); 5248 else xorl(hi, hi); 5249 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5250 bind(L); // s (mod n) < n 5251 shrdl(lo, hi); // x := x >> s 5252 if (sign_extension) sarl(hi); 5253 else shrl(hi); 5254 } 5255 5256 void MacroAssembler::movoop(Register dst, jobject obj) { 5257 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5258 } 5259 5260 void MacroAssembler::movoop(Address dst, jobject obj) { 5261 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5262 } 5263 5264 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5265 if (src.is_lval()) { 5266 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 5267 } else { 5268 movl(dst, as_Address(src)); 5269 } 5270 } 5271 5272 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5273 movl(as_Address(dst), src); 5274 } 5275 5276 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5277 movl(dst, as_Address(src)); 5278 } 5279 5280 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 5281 void MacroAssembler::movptr(Address dst, intptr_t src) { 5282 movl(dst, src); 5283 } 5284 5285 5286 void MacroAssembler::pop_callee_saved_registers() { 5287 pop(rcx); 5288 pop(rdx); 5289 pop(rdi); 5290 pop(rsi); 5291 } 5292 5293 void MacroAssembler::pop_fTOS() { 5294 fld_d(Address(rsp, 0)); 5295 addl(rsp, 2 * wordSize); 5296 } 5297 5298 void MacroAssembler::push_callee_saved_registers() { 5299 push(rsi); 5300 push(rdi); 5301 push(rdx); 5302 push(rcx); 5303 } 5304 5305 void MacroAssembler::push_fTOS() { 5306 subl(rsp, 2 * wordSize); 5307 fstp_d(Address(rsp, 0)); 5308 } 5309 5310 5311 void MacroAssembler::pushoop(jobject obj) { 5312 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 5313 } 5314 5315 5316 void MacroAssembler::pushptr(AddressLiteral src) { 5317 if (src.is_lval()) { 5318 push_literal32((int32_t)src.target(), src.rspec()); 5319 } else { 5320 pushl(as_Address(src)); 5321 } 5322 } 5323 5324 void MacroAssembler::set_word_if_not_zero(Register dst) { 5325 xorl(dst, dst); 5326 set_byte_if_not_zero(dst); 5327 } 5328 5329 static void pass_arg0(MacroAssembler* masm, Register arg) { 5330 masm->push(arg); 5331 } 5332 5333 static void pass_arg1(MacroAssembler* masm, Register arg) { 5334 masm->push(arg); 5335 } 5336 5337 static void pass_arg2(MacroAssembler* masm, Register arg) { 5338 masm->push(arg); 5339 } 5340 5341 static void pass_arg3(MacroAssembler* masm, Register arg) { 5342 masm->push(arg); 5343 } 5344 5345 #ifndef PRODUCT 5346 extern "C" void findpc(intptr_t x); 5347 #endif 5348 5349 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 5350 // In order to get locks to work, we need to fake a in_VM state 5351 JavaThread* thread = JavaThread::current(); 5352 JavaThreadState saved_state = thread->thread_state(); 5353 thread->set_thread_state(_thread_in_vm); 5354 if (ShowMessageBoxOnError) { 5355 JavaThread* thread = JavaThread::current(); 5356 JavaThreadState saved_state = thread->thread_state(); 5357 thread->set_thread_state(_thread_in_vm); 5358 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5359 ttyLocker ttyl; 5360 BytecodeCounter::print(); 5361 } 5362 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5363 // This is the value of eip which points to where verify_oop will return. 5364 if (os::message_box(msg, "Execution stopped, print registers?")) { 5365 ttyLocker ttyl; 5366 tty->print_cr("eip = 0x%08x", eip); 5367 #ifndef PRODUCT 5368 if ((WizardMode || Verbose) && PrintMiscellaneous) { 5369 tty->cr(); 5370 findpc(eip); 5371 tty->cr(); 5372 } 5373 #endif 5374 tty->print_cr("rax = 0x%08x", rax); 5375 tty->print_cr("rbx = 0x%08x", rbx); 5376 tty->print_cr("rcx = 0x%08x", rcx); 5377 tty->print_cr("rdx = 0x%08x", rdx); 5378 tty->print_cr("rdi = 0x%08x", rdi); 5379 tty->print_cr("rsi = 0x%08x", rsi); 5380 tty->print_cr("rbp = 0x%08x", rbp); 5381 tty->print_cr("rsp = 0x%08x", rsp); 5382 BREAKPOINT; 5383 assert(false, "start up GDB"); 5384 } 5385 } else { 5386 ttyLocker ttyl; 5387 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 5388 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 5389 } 5390 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5391 } 5392 5393 void MacroAssembler::stop(const char* msg) { 5394 ExternalAddress message((address)msg); 5395 // push address of message 5396 pushptr(message.addr()); 5397 { Label L; call(L, relocInfo::none); bind(L); } // push eip 5398 pusha(); // push registers 5399 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 5400 hlt(); 5401 } 5402 5403 void MacroAssembler::warn(const char* msg) { 5404 push_CPU_state(); 5405 5406 ExternalAddress message((address) msg); 5407 // push address of message 5408 pushptr(message.addr()); 5409 5410 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 5411 addl(rsp, wordSize); // discard argument 5412 pop_CPU_state(); 5413 } 5414 5415 #else // _LP64 5416 5417 // 64 bit versions 5418 5419 Address MacroAssembler::as_Address(AddressLiteral adr) { 5420 // amd64 always does this as a pc-rel 5421 // we can be absolute or disp based on the instruction type 5422 // jmp/call are displacements others are absolute 5423 assert(!adr.is_lval(), "must be rval"); 5424 assert(reachable(adr), "must be"); 5425 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 5426 5427 } 5428 5429 Address MacroAssembler::as_Address(ArrayAddress adr) { 5430 AddressLiteral base = adr.base(); 5431 lea(rscratch1, base); 5432 Address index = adr.index(); 5433 assert(index._disp == 0, "must not have disp"); // maybe it can? 5434 Address array(rscratch1, index._index, index._scale, index._disp); 5435 return array; 5436 } 5437 5438 int MacroAssembler::biased_locking_enter(Register lock_reg, 5439 Register obj_reg, 5440 Register swap_reg, 5441 Register tmp_reg, 5442 bool swap_reg_contains_mark, 5443 Label& done, 5444 Label* slow_case, 5445 BiasedLockingCounters* counters) { 5446 assert(UseBiasedLocking, "why call this otherwise?"); 5447 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 5448 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 5449 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 5450 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 5451 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 5452 Address saved_mark_addr(lock_reg, 0); 5453 5454 if (PrintBiasedLockingStatistics && counters == NULL) 5455 counters = BiasedLocking::counters(); 5456 5457 // Biased locking 5458 // See whether the lock is currently biased toward our thread and 5459 // whether the epoch is still valid 5460 // Note that the runtime guarantees sufficient alignment of JavaThread 5461 // pointers to allow age to be placed into low bits 5462 // First check to see whether biasing is even enabled for this object 5463 Label cas_label; 5464 int null_check_offset = -1; 5465 if (!swap_reg_contains_mark) { 5466 null_check_offset = offset(); 5467 movq(swap_reg, mark_addr); 5468 } 5469 movq(tmp_reg, swap_reg); 5470 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5471 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 5472 jcc(Assembler::notEqual, cas_label); 5473 // The bias pattern is present in the object's header. Need to check 5474 // whether the bias owner and the epoch are both still current. 5475 load_prototype_header(tmp_reg, obj_reg); 5476 orq(tmp_reg, r15_thread); 5477 xorq(tmp_reg, swap_reg); 5478 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 5479 if (counters != NULL) { 5480 cond_inc32(Assembler::zero, 5481 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5482 } 5483 jcc(Assembler::equal, done); 5484 5485 Label try_revoke_bias; 5486 Label try_rebias; 5487 5488 // At this point we know that the header has the bias pattern and 5489 // that we are not the bias owner in the current epoch. We need to 5490 // figure out more details about the state of the header in order to 5491 // know what operations can be legally performed on the object's 5492 // header. 5493 5494 // If the low three bits in the xor result aren't clear, that means 5495 // the prototype header is no longer biased and we have to revoke 5496 // the bias on this object. 5497 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5498 jcc(Assembler::notZero, try_revoke_bias); 5499 5500 // Biasing is still enabled for this data type. See whether the 5501 // epoch of the current bias is still valid, meaning that the epoch 5502 // bits of the mark word are equal to the epoch bits of the 5503 // prototype header. (Note that the prototype header's epoch bits 5504 // only change at a safepoint.) If not, attempt to rebias the object 5505 // toward the current thread. Note that we must be absolutely sure 5506 // that the current epoch is invalid in order to do this because 5507 // otherwise the manipulations it performs on the mark word are 5508 // illegal. 5509 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 5510 jcc(Assembler::notZero, try_rebias); 5511 5512 // The epoch of the current bias is still valid but we know nothing 5513 // about the owner; it might be set or it might be clear. Try to 5514 // acquire the bias of the object using an atomic operation. If this 5515 // fails we will go in to the runtime to revoke the object's bias. 5516 // Note that we first construct the presumed unbiased header so we 5517 // don't accidentally blow away another thread's valid bias. 5518 andq(swap_reg, 5519 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 5520 movq(tmp_reg, swap_reg); 5521 orq(tmp_reg, r15_thread); 5522 if (os::is_MP()) { 5523 lock(); 5524 } 5525 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5526 // If the biasing toward our thread failed, this means that 5527 // another thread succeeded in biasing it toward itself and we 5528 // need to revoke that bias. The revocation will occur in the 5529 // interpreter runtime in the slow case. 5530 if (counters != NULL) { 5531 cond_inc32(Assembler::zero, 5532 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5533 } 5534 if (slow_case != NULL) { 5535 jcc(Assembler::notZero, *slow_case); 5536 } 5537 jmp(done); 5538 5539 bind(try_rebias); 5540 // At this point we know the epoch has expired, meaning that the 5541 // current "bias owner", if any, is actually invalid. Under these 5542 // circumstances _only_, we are allowed to use the current header's 5543 // value as the comparison value when doing the cas to acquire the 5544 // bias in the current epoch. In other words, we allow transfer of 5545 // the bias from one thread to another directly in this situation. 5546 // 5547 // FIXME: due to a lack of registers we currently blow away the age 5548 // bits in this situation. Should attempt to preserve them. 5549 load_prototype_header(tmp_reg, obj_reg); 5550 orq(tmp_reg, r15_thread); 5551 if (os::is_MP()) { 5552 lock(); 5553 } 5554 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5555 // If the biasing toward our thread failed, then another thread 5556 // succeeded in biasing it toward itself and we need to revoke that 5557 // bias. The revocation will occur in the runtime in the slow case. 5558 if (counters != NULL) { 5559 cond_inc32(Assembler::zero, 5560 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 5561 } 5562 if (slow_case != NULL) { 5563 jcc(Assembler::notZero, *slow_case); 5564 } 5565 jmp(done); 5566 5567 bind(try_revoke_bias); 5568 // The prototype mark in the klass doesn't have the bias bit set any 5569 // more, indicating that objects of this data type are not supposed 5570 // to be biased any more. We are going to try to reset the mark of 5571 // this object to the prototype value and fall through to the 5572 // CAS-based locking scheme. Note that if our CAS fails, it means 5573 // that another thread raced us for the privilege of revoking the 5574 // bias of this particular object, so it's okay to continue in the 5575 // normal locking code. 5576 // 5577 // FIXME: due to a lack of registers we currently blow away the age 5578 // bits in this situation. Should attempt to preserve them. 5579 load_prototype_header(tmp_reg, obj_reg); 5580 if (os::is_MP()) { 5581 lock(); 5582 } 5583 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5584 // Fall through to the normal CAS-based lock, because no matter what 5585 // the result of the above CAS, some thread must have succeeded in 5586 // removing the bias bit from the object's header. 5587 if (counters != NULL) { 5588 cond_inc32(Assembler::zero, 5589 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 5590 } 5591 5592 bind(cas_label); 5593 5594 return null_check_offset; 5595 } 5596 5597 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 5598 Label L, E; 5599 5600 #ifdef _WIN64 5601 // Windows always allocates space for it's register args 5602 assert(num_args <= 4, "only register arguments supported"); 5603 subq(rsp, frame::arg_reg_save_area_bytes); 5604 #endif 5605 5606 // Align stack if necessary 5607 testl(rsp, 15); 5608 jcc(Assembler::zero, L); 5609 5610 subq(rsp, 8); 5611 { 5612 call(RuntimeAddress(entry_point)); 5613 } 5614 addq(rsp, 8); 5615 jmp(E); 5616 5617 bind(L); 5618 { 5619 call(RuntimeAddress(entry_point)); 5620 } 5621 5622 bind(E); 5623 5624 #ifdef _WIN64 5625 // restore stack pointer 5626 addq(rsp, frame::arg_reg_save_area_bytes); 5627 #endif 5628 5629 } 5630 5631 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 5632 assert(!src2.is_lval(), "should use cmpptr"); 5633 5634 if (reachable(src2)) { 5635 cmpq(src1, as_Address(src2)); 5636 } else { 5637 lea(rscratch1, src2); 5638 Assembler::cmpq(src1, Address(rscratch1, 0)); 5639 } 5640 } 5641 5642 int MacroAssembler::corrected_idivq(Register reg) { 5643 // Full implementation of Java ldiv and lrem; checks for special 5644 // case as described in JVM spec., p.243 & p.271. The function 5645 // returns the (pc) offset of the idivl instruction - may be needed 5646 // for implicit exceptions. 5647 // 5648 // normal case special case 5649 // 5650 // input : rax: dividend min_long 5651 // reg: divisor (may not be eax/edx) -1 5652 // 5653 // output: rax: quotient (= rax idiv reg) min_long 5654 // rdx: remainder (= rax irem reg) 0 5655 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 5656 static const int64_t min_long = 0x8000000000000000; 5657 Label normal_case, special_case; 5658 5659 // check for special case 5660 cmp64(rax, ExternalAddress((address) &min_long)); 5661 jcc(Assembler::notEqual, normal_case); 5662 xorl(rdx, rdx); // prepare rdx for possible special case (where 5663 // remainder = 0) 5664 cmpq(reg, -1); 5665 jcc(Assembler::equal, special_case); 5666 5667 // handle normal case 5668 bind(normal_case); 5669 cdqq(); 5670 int idivq_offset = offset(); 5671 idivq(reg); 5672 5673 // normal and special case exit 5674 bind(special_case); 5675 5676 return idivq_offset; 5677 } 5678 5679 void MacroAssembler::decrementq(Register reg, int value) { 5680 if (value == min_jint) { subq(reg, value); return; } 5681 if (value < 0) { incrementq(reg, -value); return; } 5682 if (value == 0) { ; return; } 5683 if (value == 1 && UseIncDec) { decq(reg) ; return; } 5684 /* else */ { subq(reg, value) ; return; } 5685 } 5686 5687 void MacroAssembler::decrementq(Address dst, int value) { 5688 if (value == min_jint) { subq(dst, value); return; } 5689 if (value < 0) { incrementq(dst, -value); return; } 5690 if (value == 0) { ; return; } 5691 if (value == 1 && UseIncDec) { decq(dst) ; return; } 5692 /* else */ { subq(dst, value) ; return; } 5693 } 5694 5695 void MacroAssembler::fat_nop() { 5696 // A 5 byte nop that is safe for patching (see patch_verified_entry) 5697 // Recommened sequence from 'Software Optimization Guide for the AMD 5698 // Hammer Processor' 5699 emit_byte(0x66); 5700 emit_byte(0x66); 5701 emit_byte(0x90); 5702 emit_byte(0x66); 5703 emit_byte(0x90); 5704 } 5705 5706 void MacroAssembler::incrementq(Register reg, int value) { 5707 if (value == min_jint) { addq(reg, value); return; } 5708 if (value < 0) { decrementq(reg, -value); return; } 5709 if (value == 0) { ; return; } 5710 if (value == 1 && UseIncDec) { incq(reg) ; return; } 5711 /* else */ { addq(reg, value) ; return; } 5712 } 5713 5714 void MacroAssembler::incrementq(Address dst, int value) { 5715 if (value == min_jint) { addq(dst, value); return; } 5716 if (value < 0) { decrementq(dst, -value); return; } 5717 if (value == 0) { ; return; } 5718 if (value == 1 && UseIncDec) { incq(dst) ; return; } 5719 /* else */ { addq(dst, value) ; return; } 5720 } 5721 5722 // 32bit can do a case table jump in one instruction but we no longer allow the base 5723 // to be installed in the Address class 5724 void MacroAssembler::jump(ArrayAddress entry) { 5725 lea(rscratch1, entry.base()); 5726 Address dispatch = entry.index(); 5727 assert(dispatch._base == noreg, "must be"); 5728 dispatch._base = rscratch1; 5729 jmp(dispatch); 5730 } 5731 5732 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5733 ShouldNotReachHere(); // 64bit doesn't use two regs 5734 cmpq(x_lo, y_lo); 5735 } 5736 5737 void MacroAssembler::lea(Register dst, AddressLiteral src) { 5738 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5739 } 5740 5741 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5742 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 5743 movptr(dst, rscratch1); 5744 } 5745 5746 void MacroAssembler::leave() { 5747 // %%% is this really better? Why not on 32bit too? 5748 emit_byte(0xC9); // LEAVE 5749 } 5750 5751 void MacroAssembler::lneg(Register hi, Register lo) { 5752 ShouldNotReachHere(); // 64bit doesn't use two regs 5753 negq(lo); 5754 } 5755 5756 void MacroAssembler::movoop(Register dst, jobject obj) { 5757 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5758 } 5759 5760 void MacroAssembler::movoop(Address dst, jobject obj) { 5761 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5762 movq(dst, rscratch1); 5763 } 5764 5765 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5766 if (src.is_lval()) { 5767 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5768 } else { 5769 if (reachable(src)) { 5770 movq(dst, as_Address(src)); 5771 } else { 5772 lea(rscratch1, src); 5773 movq(dst, Address(rscratch1,0)); 5774 } 5775 } 5776 } 5777 5778 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5779 movq(as_Address(dst), src); 5780 } 5781 5782 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5783 movq(dst, as_Address(src)); 5784 } 5785 5786 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 5787 void MacroAssembler::movptr(Address dst, intptr_t src) { 5788 mov64(rscratch1, src); 5789 movq(dst, rscratch1); 5790 } 5791 5792 // These are mostly for initializing NULL 5793 void MacroAssembler::movptr(Address dst, int32_t src) { 5794 movslq(dst, src); 5795 } 5796 5797 void MacroAssembler::movptr(Register dst, int32_t src) { 5798 mov64(dst, (intptr_t)src); 5799 } 5800 5801 void MacroAssembler::pushoop(jobject obj) { 5802 movoop(rscratch1, obj); 5803 push(rscratch1); 5804 } 5805 5806 void MacroAssembler::pushptr(AddressLiteral src) { 5807 lea(rscratch1, src); 5808 if (src.is_lval()) { 5809 push(rscratch1); 5810 } else { 5811 pushq(Address(rscratch1, 0)); 5812 } 5813 } 5814 5815 void MacroAssembler::reset_last_Java_frame(bool clear_fp, 5816 bool clear_pc) { 5817 // we must set sp to zero to clear frame 5818 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 5819 // must clear fp, so that compiled frames are not confused; it is 5820 // possible that we need it only for debugging 5821 if (clear_fp) { 5822 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 5823 } 5824 5825 if (clear_pc) { 5826 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 5827 } 5828 } 5829 5830 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 5831 Register last_java_fp, 5832 address last_java_pc) { 5833 // determine last_java_sp register 5834 if (!last_java_sp->is_valid()) { 5835 last_java_sp = rsp; 5836 } 5837 5838 // last_java_fp is optional 5839 if (last_java_fp->is_valid()) { 5840 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 5841 last_java_fp); 5842 } 5843 5844 // last_java_pc is optional 5845 if (last_java_pc != NULL) { 5846 Address java_pc(r15_thread, 5847 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 5848 lea(rscratch1, InternalAddress(last_java_pc)); 5849 movptr(java_pc, rscratch1); 5850 } 5851 5852 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 5853 } 5854 5855 static void pass_arg0(MacroAssembler* masm, Register arg) { 5856 if (c_rarg0 != arg ) { 5857 masm->mov(c_rarg0, arg); 5858 } 5859 } 5860 5861 static void pass_arg1(MacroAssembler* masm, Register arg) { 5862 if (c_rarg1 != arg ) { 5863 masm->mov(c_rarg1, arg); 5864 } 5865 } 5866 5867 static void pass_arg2(MacroAssembler* masm, Register arg) { 5868 if (c_rarg2 != arg ) { 5869 masm->mov(c_rarg2, arg); 5870 } 5871 } 5872 5873 static void pass_arg3(MacroAssembler* masm, Register arg) { 5874 if (c_rarg3 != arg ) { 5875 masm->mov(c_rarg3, arg); 5876 } 5877 } 5878 5879 void MacroAssembler::stop(const char* msg) { 5880 address rip = pc(); 5881 pusha(); // get regs on stack 5882 lea(c_rarg0, ExternalAddress((address) msg)); 5883 lea(c_rarg1, InternalAddress(rip)); 5884 movq(c_rarg2, rsp); // pass pointer to regs array 5885 andq(rsp, -16); // align stack as required by ABI 5886 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 5887 hlt(); 5888 } 5889 5890 void MacroAssembler::warn(const char* msg) { 5891 push(rsp); 5892 andq(rsp, -16); // align stack as required by push_CPU_state and call 5893 5894 push_CPU_state(); // keeps alignment at 16 bytes 5895 lea(c_rarg0, ExternalAddress((address) msg)); 5896 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 5897 pop_CPU_state(); 5898 pop(rsp); 5899 } 5900 5901 #ifndef PRODUCT 5902 extern "C" void findpc(intptr_t x); 5903 #endif 5904 5905 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 5906 // In order to get locks to work, we need to fake a in_VM state 5907 if (ShowMessageBoxOnError ) { 5908 JavaThread* thread = JavaThread::current(); 5909 JavaThreadState saved_state = thread->thread_state(); 5910 thread->set_thread_state(_thread_in_vm); 5911 #ifndef PRODUCT 5912 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5913 ttyLocker ttyl; 5914 BytecodeCounter::print(); 5915 } 5916 #endif 5917 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5918 // XXX correct this offset for amd64 5919 // This is the value of eip which points to where verify_oop will return. 5920 if (os::message_box(msg, "Execution stopped, print registers?")) { 5921 ttyLocker ttyl; 5922 tty->print_cr("rip = 0x%016lx", pc); 5923 #ifndef PRODUCT 5924 tty->cr(); 5925 findpc(pc); 5926 tty->cr(); 5927 #endif 5928 tty->print_cr("rax = 0x%016lx", regs[15]); 5929 tty->print_cr("rbx = 0x%016lx", regs[12]); 5930 tty->print_cr("rcx = 0x%016lx", regs[14]); 5931 tty->print_cr("rdx = 0x%016lx", regs[13]); 5932 tty->print_cr("rdi = 0x%016lx", regs[8]); 5933 tty->print_cr("rsi = 0x%016lx", regs[9]); 5934 tty->print_cr("rbp = 0x%016lx", regs[10]); 5935 tty->print_cr("rsp = 0x%016lx", regs[11]); 5936 tty->print_cr("r8 = 0x%016lx", regs[7]); 5937 tty->print_cr("r9 = 0x%016lx", regs[6]); 5938 tty->print_cr("r10 = 0x%016lx", regs[5]); 5939 tty->print_cr("r11 = 0x%016lx", regs[4]); 5940 tty->print_cr("r12 = 0x%016lx", regs[3]); 5941 tty->print_cr("r13 = 0x%016lx", regs[2]); 5942 tty->print_cr("r14 = 0x%016lx", regs[1]); 5943 tty->print_cr("r15 = 0x%016lx", regs[0]); 5944 BREAKPOINT; 5945 } 5946 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5947 } else { 5948 ttyLocker ttyl; 5949 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 5950 msg); 5951 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 5952 } 5953 } 5954 5955 #endif // _LP64 5956 5957 // Now versions that are common to 32/64 bit 5958 5959 void MacroAssembler::addptr(Register dst, int32_t imm32) { 5960 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 5961 } 5962 5963 void MacroAssembler::addptr(Register dst, Register src) { 5964 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 5965 } 5966 5967 void MacroAssembler::addptr(Address dst, Register src) { 5968 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 5969 } 5970 5971 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { 5972 if (reachable(src)) { 5973 Assembler::addsd(dst, as_Address(src)); 5974 } else { 5975 lea(rscratch1, src); 5976 Assembler::addsd(dst, Address(rscratch1, 0)); 5977 } 5978 } 5979 5980 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { 5981 if (reachable(src)) { 5982 addss(dst, as_Address(src)); 5983 } else { 5984 lea(rscratch1, src); 5985 addss(dst, Address(rscratch1, 0)); 5986 } 5987 } 5988 5989 void MacroAssembler::align(int modulus) { 5990 if (offset() % modulus != 0) { 5991 nop(modulus - (offset() % modulus)); 5992 } 5993 } 5994 5995 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 5996 // Used in sign-masking with aligned address. 5997 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 5998 if (reachable(src)) { 5999 Assembler::andpd(dst, as_Address(src)); 6000 } else { 6001 lea(rscratch1, src); 6002 Assembler::andpd(dst, Address(rscratch1, 0)); 6003 } 6004 } 6005 6006 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { 6007 // Used in sign-masking with aligned address. 6008 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 6009 if (reachable(src)) { 6010 Assembler::andps(dst, as_Address(src)); 6011 } else { 6012 lea(rscratch1, src); 6013 Assembler::andps(dst, Address(rscratch1, 0)); 6014 } 6015 } 6016 6017 void MacroAssembler::andptr(Register dst, int32_t imm32) { 6018 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 6019 } 6020 6021 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 6022 pushf(); 6023 if (os::is_MP()) 6024 lock(); 6025 incrementl(counter_addr); 6026 popf(); 6027 } 6028 6029 // Writes to stack successive pages until offset reached to check for 6030 // stack overflow + shadow pages. This clobbers tmp. 6031 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 6032 movptr(tmp, rsp); 6033 // Bang stack for total size given plus shadow page size. 6034 // Bang one page at a time because large size can bang beyond yellow and 6035 // red zones. 6036 Label loop; 6037 bind(loop); 6038 movl(Address(tmp, (-os::vm_page_size())), size ); 6039 subptr(tmp, os::vm_page_size()); 6040 subl(size, os::vm_page_size()); 6041 jcc(Assembler::greater, loop); 6042 6043 // Bang down shadow pages too. 6044 // The -1 because we already subtracted 1 page. 6045 for (int i = 0; i< StackShadowPages-1; i++) { 6046 // this could be any sized move but this is can be a debugging crumb 6047 // so the bigger the better. 6048 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 6049 } 6050 } 6051 6052 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 6053 assert(UseBiasedLocking, "why call this otherwise?"); 6054 6055 // Check for biased locking unlock case, which is a no-op 6056 // Note: we do not have to check the thread ID for two reasons. 6057 // First, the interpreter checks for IllegalMonitorStateException at 6058 // a higher level. Second, if the bias was revoked while we held the 6059 // lock, the object could not be rebiased toward another thread, so 6060 // the bias bit would be clear. 6061 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 6062 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 6063 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 6064 jcc(Assembler::equal, done); 6065 } 6066 6067 void MacroAssembler::c2bool(Register x) { 6068 // implements x == 0 ? 0 : 1 6069 // note: must only look at least-significant byte of x 6070 // since C-style booleans are stored in one byte 6071 // only! (was bug) 6072 andl(x, 0xFF); 6073 setb(Assembler::notZero, x); 6074 } 6075 6076 // Wouldn't need if AddressLiteral version had new name 6077 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 6078 Assembler::call(L, rtype); 6079 } 6080 6081 void MacroAssembler::call(Register entry) { 6082 Assembler::call(entry); 6083 } 6084 6085 void MacroAssembler::call(AddressLiteral entry) { 6086 if (reachable(entry)) { 6087 Assembler::call_literal(entry.target(), entry.rspec()); 6088 } else { 6089 lea(rscratch1, entry); 6090 Assembler::call(rscratch1); 6091 } 6092 } 6093 6094 // Implementation of call_VM versions 6095 6096 void MacroAssembler::call_VM(Register oop_result, 6097 address entry_point, 6098 bool check_exceptions) { 6099 Label C, E; 6100 call(C, relocInfo::none); 6101 jmp(E); 6102 6103 bind(C); 6104 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 6105 ret(0); 6106 6107 bind(E); 6108 } 6109 6110 void MacroAssembler::call_VM(Register oop_result, 6111 address entry_point, 6112 Register arg_1, 6113 bool check_exceptions) { 6114 Label C, E; 6115 call(C, relocInfo::none); 6116 jmp(E); 6117 6118 bind(C); 6119 pass_arg1(this, arg_1); 6120 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 6121 ret(0); 6122 6123 bind(E); 6124 } 6125 6126 void MacroAssembler::call_VM(Register oop_result, 6127 address entry_point, 6128 Register arg_1, 6129 Register arg_2, 6130 bool check_exceptions) { 6131 Label C, E; 6132 call(C, relocInfo::none); 6133 jmp(E); 6134 6135 bind(C); 6136 6137 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6138 6139 pass_arg2(this, arg_2); 6140 pass_arg1(this, arg_1); 6141 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 6142 ret(0); 6143 6144 bind(E); 6145 } 6146 6147 void MacroAssembler::call_VM(Register oop_result, 6148 address entry_point, 6149 Register arg_1, 6150 Register arg_2, 6151 Register arg_3, 6152 bool check_exceptions) { 6153 Label C, E; 6154 call(C, relocInfo::none); 6155 jmp(E); 6156 6157 bind(C); 6158 6159 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6160 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6161 pass_arg3(this, arg_3); 6162 6163 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6164 pass_arg2(this, arg_2); 6165 6166 pass_arg1(this, arg_1); 6167 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 6168 ret(0); 6169 6170 bind(E); 6171 } 6172 6173 void MacroAssembler::call_VM(Register oop_result, 6174 Register last_java_sp, 6175 address entry_point, 6176 int number_of_arguments, 6177 bool check_exceptions) { 6178 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6179 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6180 } 6181 6182 void MacroAssembler::call_VM(Register oop_result, 6183 Register last_java_sp, 6184 address entry_point, 6185 Register arg_1, 6186 bool check_exceptions) { 6187 pass_arg1(this, arg_1); 6188 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6189 } 6190 6191 void MacroAssembler::call_VM(Register oop_result, 6192 Register last_java_sp, 6193 address entry_point, 6194 Register arg_1, 6195 Register arg_2, 6196 bool check_exceptions) { 6197 6198 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6199 pass_arg2(this, arg_2); 6200 pass_arg1(this, arg_1); 6201 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6202 } 6203 6204 void MacroAssembler::call_VM(Register oop_result, 6205 Register last_java_sp, 6206 address entry_point, 6207 Register arg_1, 6208 Register arg_2, 6209 Register arg_3, 6210 bool check_exceptions) { 6211 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6212 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6213 pass_arg3(this, arg_3); 6214 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6215 pass_arg2(this, arg_2); 6216 pass_arg1(this, arg_1); 6217 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6218 } 6219 6220 void MacroAssembler::super_call_VM(Register oop_result, 6221 Register last_java_sp, 6222 address entry_point, 6223 int number_of_arguments, 6224 bool check_exceptions) { 6225 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6226 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6227 } 6228 6229 void MacroAssembler::super_call_VM(Register oop_result, 6230 Register last_java_sp, 6231 address entry_point, 6232 Register arg_1, 6233 bool check_exceptions) { 6234 pass_arg1(this, arg_1); 6235 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6236 } 6237 6238 void MacroAssembler::super_call_VM(Register oop_result, 6239 Register last_java_sp, 6240 address entry_point, 6241 Register arg_1, 6242 Register arg_2, 6243 bool check_exceptions) { 6244 6245 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6246 pass_arg2(this, arg_2); 6247 pass_arg1(this, arg_1); 6248 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6249 } 6250 6251 void MacroAssembler::super_call_VM(Register oop_result, 6252 Register last_java_sp, 6253 address entry_point, 6254 Register arg_1, 6255 Register arg_2, 6256 Register arg_3, 6257 bool check_exceptions) { 6258 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6259 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6260 pass_arg3(this, arg_3); 6261 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6262 pass_arg2(this, arg_2); 6263 pass_arg1(this, arg_1); 6264 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6265 } 6266 6267 void MacroAssembler::call_VM_base(Register oop_result, 6268 Register java_thread, 6269 Register last_java_sp, 6270 address entry_point, 6271 int number_of_arguments, 6272 bool check_exceptions) { 6273 // determine java_thread register 6274 if (!java_thread->is_valid()) { 6275 #ifdef _LP64 6276 java_thread = r15_thread; 6277 #else 6278 java_thread = rdi; 6279 get_thread(java_thread); 6280 #endif // LP64 6281 } 6282 // determine last_java_sp register 6283 if (!last_java_sp->is_valid()) { 6284 last_java_sp = rsp; 6285 } 6286 // debugging support 6287 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 6288 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 6289 #ifdef ASSERT 6290 LP64_ONLY(if (UseCompressedOops) verify_heapbase("call_VM_base");) 6291 #endif // ASSERT 6292 6293 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 6294 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 6295 6296 // push java thread (becomes first argument of C function) 6297 6298 NOT_LP64(push(java_thread); number_of_arguments++); 6299 LP64_ONLY(mov(c_rarg0, r15_thread)); 6300 6301 // set last Java frame before call 6302 assert(last_java_sp != rbp, "can't use ebp/rbp"); 6303 6304 // Only interpreter should have to set fp 6305 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 6306 6307 // do the call, remove parameters 6308 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 6309 6310 // restore the thread (cannot use the pushed argument since arguments 6311 // may be overwritten by C code generated by an optimizing compiler); 6312 // however can use the register value directly if it is callee saved. 6313 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 6314 // rdi & rsi (also r15) are callee saved -> nothing to do 6315 #ifdef ASSERT 6316 guarantee(java_thread != rax, "change this code"); 6317 push(rax); 6318 { Label L; 6319 get_thread(rax); 6320 cmpptr(java_thread, rax); 6321 jcc(Assembler::equal, L); 6322 stop("MacroAssembler::call_VM_base: rdi not callee saved?"); 6323 bind(L); 6324 } 6325 pop(rax); 6326 #endif 6327 } else { 6328 get_thread(java_thread); 6329 } 6330 // reset last Java frame 6331 // Only interpreter should have to clear fp 6332 reset_last_Java_frame(java_thread, true, false); 6333 6334 #ifndef CC_INTERP 6335 // C++ interp handles this in the interpreter 6336 check_and_handle_popframe(java_thread); 6337 check_and_handle_earlyret(java_thread); 6338 #endif /* CC_INTERP */ 6339 6340 if (check_exceptions) { 6341 // check for pending exceptions (java_thread is set upon return) 6342 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 6343 #ifndef _LP64 6344 jump_cc(Assembler::notEqual, 6345 RuntimeAddress(StubRoutines::forward_exception_entry())); 6346 #else 6347 // This used to conditionally jump to forward_exception however it is 6348 // possible if we relocate that the branch will not reach. So we must jump 6349 // around so we can always reach 6350 6351 Label ok; 6352 jcc(Assembler::equal, ok); 6353 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 6354 bind(ok); 6355 #endif // LP64 6356 } 6357 6358 // get oop result if there is one and reset the value in the thread 6359 if (oop_result->is_valid()) { 6360 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 6361 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 6362 verify_oop(oop_result, "broken oop in call_VM_base"); 6363 } 6364 } 6365 6366 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 6367 6368 // Calculate the value for last_Java_sp 6369 // somewhat subtle. call_VM does an intermediate call 6370 // which places a return address on the stack just under the 6371 // stack pointer as the user finsihed with it. This allows 6372 // use to retrieve last_Java_pc from last_Java_sp[-1]. 6373 // On 32bit we then have to push additional args on the stack to accomplish 6374 // the actual requested call. On 64bit call_VM only can use register args 6375 // so the only extra space is the return address that call_VM created. 6376 // This hopefully explains the calculations here. 6377 6378 #ifdef _LP64 6379 // We've pushed one address, correct last_Java_sp 6380 lea(rax, Address(rsp, wordSize)); 6381 #else 6382 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 6383 #endif // LP64 6384 6385 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 6386 6387 } 6388 6389 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 6390 call_VM_leaf_base(entry_point, number_of_arguments); 6391 } 6392 6393 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 6394 pass_arg0(this, arg_0); 6395 call_VM_leaf(entry_point, 1); 6396 } 6397 6398 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 6399 6400 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6401 pass_arg1(this, arg_1); 6402 pass_arg0(this, arg_0); 6403 call_VM_leaf(entry_point, 2); 6404 } 6405 6406 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 6407 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6408 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6409 pass_arg2(this, arg_2); 6410 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6411 pass_arg1(this, arg_1); 6412 pass_arg0(this, arg_0); 6413 call_VM_leaf(entry_point, 3); 6414 } 6415 6416 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 6417 pass_arg0(this, arg_0); 6418 MacroAssembler::call_VM_leaf_base(entry_point, 1); 6419 } 6420 6421 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 6422 6423 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6424 pass_arg1(this, arg_1); 6425 pass_arg0(this, arg_0); 6426 MacroAssembler::call_VM_leaf_base(entry_point, 2); 6427 } 6428 6429 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 6430 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6431 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6432 pass_arg2(this, arg_2); 6433 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6434 pass_arg1(this, arg_1); 6435 pass_arg0(this, arg_0); 6436 MacroAssembler::call_VM_leaf_base(entry_point, 3); 6437 } 6438 6439 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 6440 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); 6441 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6442 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6443 pass_arg3(this, arg_3); 6444 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6445 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6446 pass_arg2(this, arg_2); 6447 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6448 pass_arg1(this, arg_1); 6449 pass_arg0(this, arg_0); 6450 MacroAssembler::call_VM_leaf_base(entry_point, 4); 6451 } 6452 6453 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 6454 } 6455 6456 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 6457 } 6458 6459 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 6460 if (reachable(src1)) { 6461 cmpl(as_Address(src1), imm); 6462 } else { 6463 lea(rscratch1, src1); 6464 cmpl(Address(rscratch1, 0), imm); 6465 } 6466 } 6467 6468 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 6469 assert(!src2.is_lval(), "use cmpptr"); 6470 if (reachable(src2)) { 6471 cmpl(src1, as_Address(src2)); 6472 } else { 6473 lea(rscratch1, src2); 6474 cmpl(src1, Address(rscratch1, 0)); 6475 } 6476 } 6477 6478 void MacroAssembler::cmp32(Register src1, int32_t imm) { 6479 Assembler::cmpl(src1, imm); 6480 } 6481 6482 void MacroAssembler::cmp32(Register src1, Address src2) { 6483 Assembler::cmpl(src1, src2); 6484 } 6485 6486 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6487 ucomisd(opr1, opr2); 6488 6489 Label L; 6490 if (unordered_is_less) { 6491 movl(dst, -1); 6492 jcc(Assembler::parity, L); 6493 jcc(Assembler::below , L); 6494 movl(dst, 0); 6495 jcc(Assembler::equal , L); 6496 increment(dst); 6497 } else { // unordered is greater 6498 movl(dst, 1); 6499 jcc(Assembler::parity, L); 6500 jcc(Assembler::above , L); 6501 movl(dst, 0); 6502 jcc(Assembler::equal , L); 6503 decrementl(dst); 6504 } 6505 bind(L); 6506 } 6507 6508 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6509 ucomiss(opr1, opr2); 6510 6511 Label L; 6512 if (unordered_is_less) { 6513 movl(dst, -1); 6514 jcc(Assembler::parity, L); 6515 jcc(Assembler::below , L); 6516 movl(dst, 0); 6517 jcc(Assembler::equal , L); 6518 increment(dst); 6519 } else { // unordered is greater 6520 movl(dst, 1); 6521 jcc(Assembler::parity, L); 6522 jcc(Assembler::above , L); 6523 movl(dst, 0); 6524 jcc(Assembler::equal , L); 6525 decrementl(dst); 6526 } 6527 bind(L); 6528 } 6529 6530 6531 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 6532 if (reachable(src1)) { 6533 cmpb(as_Address(src1), imm); 6534 } else { 6535 lea(rscratch1, src1); 6536 cmpb(Address(rscratch1, 0), imm); 6537 } 6538 } 6539 6540 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 6541 #ifdef _LP64 6542 if (src2.is_lval()) { 6543 movptr(rscratch1, src2); 6544 Assembler::cmpq(src1, rscratch1); 6545 } else if (reachable(src2)) { 6546 cmpq(src1, as_Address(src2)); 6547 } else { 6548 lea(rscratch1, src2); 6549 Assembler::cmpq(src1, Address(rscratch1, 0)); 6550 } 6551 #else 6552 if (src2.is_lval()) { 6553 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 6554 } else { 6555 cmpl(src1, as_Address(src2)); 6556 } 6557 #endif // _LP64 6558 } 6559 6560 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 6561 assert(src2.is_lval(), "not a mem-mem compare"); 6562 #ifdef _LP64 6563 // moves src2's literal address 6564 movptr(rscratch1, src2); 6565 Assembler::cmpq(src1, rscratch1); 6566 #else 6567 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 6568 #endif // _LP64 6569 } 6570 6571 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 6572 if (reachable(adr)) { 6573 if (os::is_MP()) 6574 lock(); 6575 cmpxchgptr(reg, as_Address(adr)); 6576 } else { 6577 lea(rscratch1, adr); 6578 if (os::is_MP()) 6579 lock(); 6580 cmpxchgptr(reg, Address(rscratch1, 0)); 6581 } 6582 } 6583 6584 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 6585 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 6586 } 6587 6588 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 6589 if (reachable(src)) { 6590 Assembler::comisd(dst, as_Address(src)); 6591 } else { 6592 lea(rscratch1, src); 6593 Assembler::comisd(dst, Address(rscratch1, 0)); 6594 } 6595 } 6596 6597 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 6598 if (reachable(src)) { 6599 Assembler::comiss(dst, as_Address(src)); 6600 } else { 6601 lea(rscratch1, src); 6602 Assembler::comiss(dst, Address(rscratch1, 0)); 6603 } 6604 } 6605 6606 6607 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 6608 Condition negated_cond = negate_condition(cond); 6609 Label L; 6610 jcc(negated_cond, L); 6611 atomic_incl(counter_addr); 6612 bind(L); 6613 } 6614 6615 int MacroAssembler::corrected_idivl(Register reg) { 6616 // Full implementation of Java idiv and irem; checks for 6617 // special case as described in JVM spec., p.243 & p.271. 6618 // The function returns the (pc) offset of the idivl 6619 // instruction - may be needed for implicit exceptions. 6620 // 6621 // normal case special case 6622 // 6623 // input : rax,: dividend min_int 6624 // reg: divisor (may not be rax,/rdx) -1 6625 // 6626 // output: rax,: quotient (= rax, idiv reg) min_int 6627 // rdx: remainder (= rax, irem reg) 0 6628 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 6629 const int min_int = 0x80000000; 6630 Label normal_case, special_case; 6631 6632 // check for special case 6633 cmpl(rax, min_int); 6634 jcc(Assembler::notEqual, normal_case); 6635 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 6636 cmpl(reg, -1); 6637 jcc(Assembler::equal, special_case); 6638 6639 // handle normal case 6640 bind(normal_case); 6641 cdql(); 6642 int idivl_offset = offset(); 6643 idivl(reg); 6644 6645 // normal and special case exit 6646 bind(special_case); 6647 6648 return idivl_offset; 6649 } 6650 6651 6652 6653 void MacroAssembler::decrementl(Register reg, int value) { 6654 if (value == min_jint) {subl(reg, value) ; return; } 6655 if (value < 0) { incrementl(reg, -value); return; } 6656 if (value == 0) { ; return; } 6657 if (value == 1 && UseIncDec) { decl(reg) ; return; } 6658 /* else */ { subl(reg, value) ; return; } 6659 } 6660 6661 void MacroAssembler::decrementl(Address dst, int value) { 6662 if (value == min_jint) {subl(dst, value) ; return; } 6663 if (value < 0) { incrementl(dst, -value); return; } 6664 if (value == 0) { ; return; } 6665 if (value == 1 && UseIncDec) { decl(dst) ; return; } 6666 /* else */ { subl(dst, value) ; return; } 6667 } 6668 6669 void MacroAssembler::division_with_shift (Register reg, int shift_value) { 6670 assert (shift_value > 0, "illegal shift value"); 6671 Label _is_positive; 6672 testl (reg, reg); 6673 jcc (Assembler::positive, _is_positive); 6674 int offset = (1 << shift_value) - 1 ; 6675 6676 if (offset == 1) { 6677 incrementl(reg); 6678 } else { 6679 addl(reg, offset); 6680 } 6681 6682 bind (_is_positive); 6683 sarl(reg, shift_value); 6684 } 6685 6686 void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { 6687 if (reachable(src)) { 6688 Assembler::divsd(dst, as_Address(src)); 6689 } else { 6690 lea(rscratch1, src); 6691 Assembler::divsd(dst, Address(rscratch1, 0)); 6692 } 6693 } 6694 6695 void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { 6696 if (reachable(src)) { 6697 Assembler::divss(dst, as_Address(src)); 6698 } else { 6699 lea(rscratch1, src); 6700 Assembler::divss(dst, Address(rscratch1, 0)); 6701 } 6702 } 6703 6704 // !defined(COMPILER2) is because of stupid core builds 6705 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 6706 void MacroAssembler::empty_FPU_stack() { 6707 if (VM_Version::supports_mmx()) { 6708 emms(); 6709 } else { 6710 for (int i = 8; i-- > 0; ) ffree(i); 6711 } 6712 } 6713 #endif // !LP64 || C1 || !C2 6714 6715 6716 // Defines obj, preserves var_size_in_bytes 6717 void MacroAssembler::eden_allocate(Register obj, 6718 Register var_size_in_bytes, 6719 int con_size_in_bytes, 6720 Register t1, 6721 Label& slow_case) { 6722 assert(obj == rax, "obj must be in rax, for cmpxchg"); 6723 assert_different_registers(obj, var_size_in_bytes, t1); 6724 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 6725 jmp(slow_case); 6726 } else { 6727 Register end = t1; 6728 Label retry; 6729 bind(retry); 6730 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 6731 movptr(obj, heap_top); 6732 if (var_size_in_bytes == noreg) { 6733 lea(end, Address(obj, con_size_in_bytes)); 6734 } else { 6735 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 6736 } 6737 // if end < obj then we wrapped around => object too long => slow case 6738 cmpptr(end, obj); 6739 jcc(Assembler::below, slow_case); 6740 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 6741 jcc(Assembler::above, slow_case); 6742 // Compare obj with the top addr, and if still equal, store the new top addr in 6743 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 6744 // it otherwise. Use lock prefix for atomicity on MPs. 6745 locked_cmpxchgptr(end, heap_top); 6746 jcc(Assembler::notEqual, retry); 6747 } 6748 } 6749 6750 void MacroAssembler::enter() { 6751 push(rbp); 6752 mov(rbp, rsp); 6753 } 6754 6755 void MacroAssembler::fcmp(Register tmp) { 6756 fcmp(tmp, 1, true, true); 6757 } 6758 6759 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 6760 assert(!pop_right || pop_left, "usage error"); 6761 if (VM_Version::supports_cmov()) { 6762 assert(tmp == noreg, "unneeded temp"); 6763 if (pop_left) { 6764 fucomip(index); 6765 } else { 6766 fucomi(index); 6767 } 6768 if (pop_right) { 6769 fpop(); 6770 } 6771 } else { 6772 assert(tmp != noreg, "need temp"); 6773 if (pop_left) { 6774 if (pop_right) { 6775 fcompp(); 6776 } else { 6777 fcomp(index); 6778 } 6779 } else { 6780 fcom(index); 6781 } 6782 // convert FPU condition into eflags condition via rax, 6783 save_rax(tmp); 6784 fwait(); fnstsw_ax(); 6785 sahf(); 6786 restore_rax(tmp); 6787 } 6788 // condition codes set as follows: 6789 // 6790 // CF (corresponds to C0) if x < y 6791 // PF (corresponds to C2) if unordered 6792 // ZF (corresponds to C3) if x = y 6793 } 6794 6795 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 6796 fcmp2int(dst, unordered_is_less, 1, true, true); 6797 } 6798 6799 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 6800 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 6801 Label L; 6802 if (unordered_is_less) { 6803 movl(dst, -1); 6804 jcc(Assembler::parity, L); 6805 jcc(Assembler::below , L); 6806 movl(dst, 0); 6807 jcc(Assembler::equal , L); 6808 increment(dst); 6809 } else { // unordered is greater 6810 movl(dst, 1); 6811 jcc(Assembler::parity, L); 6812 jcc(Assembler::above , L); 6813 movl(dst, 0); 6814 jcc(Assembler::equal , L); 6815 decrementl(dst); 6816 } 6817 bind(L); 6818 } 6819 6820 void MacroAssembler::fld_d(AddressLiteral src) { 6821 fld_d(as_Address(src)); 6822 } 6823 6824 void MacroAssembler::fld_s(AddressLiteral src) { 6825 fld_s(as_Address(src)); 6826 } 6827 6828 void MacroAssembler::fld_x(AddressLiteral src) { 6829 Assembler::fld_x(as_Address(src)); 6830 } 6831 6832 void MacroAssembler::fldcw(AddressLiteral src) { 6833 Assembler::fldcw(as_Address(src)); 6834 } 6835 6836 void MacroAssembler::fpop() { 6837 ffree(); 6838 fincstp(); 6839 } 6840 6841 void MacroAssembler::fremr(Register tmp) { 6842 save_rax(tmp); 6843 { Label L; 6844 bind(L); 6845 fprem(); 6846 fwait(); fnstsw_ax(); 6847 #ifdef _LP64 6848 testl(rax, 0x400); 6849 jcc(Assembler::notEqual, L); 6850 #else 6851 sahf(); 6852 jcc(Assembler::parity, L); 6853 #endif // _LP64 6854 } 6855 restore_rax(tmp); 6856 // Result is in ST0. 6857 // Note: fxch & fpop to get rid of ST1 6858 // (otherwise FPU stack could overflow eventually) 6859 fxch(1); 6860 fpop(); 6861 } 6862 6863 6864 void MacroAssembler::incrementl(AddressLiteral dst) { 6865 if (reachable(dst)) { 6866 incrementl(as_Address(dst)); 6867 } else { 6868 lea(rscratch1, dst); 6869 incrementl(Address(rscratch1, 0)); 6870 } 6871 } 6872 6873 void MacroAssembler::incrementl(ArrayAddress dst) { 6874 incrementl(as_Address(dst)); 6875 } 6876 6877 void MacroAssembler::incrementl(Register reg, int value) { 6878 if (value == min_jint) {addl(reg, value) ; return; } 6879 if (value < 0) { decrementl(reg, -value); return; } 6880 if (value == 0) { ; return; } 6881 if (value == 1 && UseIncDec) { incl(reg) ; return; } 6882 /* else */ { addl(reg, value) ; return; } 6883 } 6884 6885 void MacroAssembler::incrementl(Address dst, int value) { 6886 if (value == min_jint) {addl(dst, value) ; return; } 6887 if (value < 0) { decrementl(dst, -value); return; } 6888 if (value == 0) { ; return; } 6889 if (value == 1 && UseIncDec) { incl(dst) ; return; } 6890 /* else */ { addl(dst, value) ; return; } 6891 } 6892 6893 void MacroAssembler::jump(AddressLiteral dst) { 6894 if (reachable(dst)) { 6895 jmp_literal(dst.target(), dst.rspec()); 6896 } else { 6897 lea(rscratch1, dst); 6898 jmp(rscratch1); 6899 } 6900 } 6901 6902 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 6903 if (reachable(dst)) { 6904 InstructionMark im(this); 6905 relocate(dst.reloc()); 6906 const int short_size = 2; 6907 const int long_size = 6; 6908 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos); 6909 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 6910 // 0111 tttn #8-bit disp 6911 emit_byte(0x70 | cc); 6912 emit_byte((offs - short_size) & 0xFF); 6913 } else { 6914 // 0000 1111 1000 tttn #32-bit disp 6915 emit_byte(0x0F); 6916 emit_byte(0x80 | cc); 6917 emit_long(offs - long_size); 6918 } 6919 } else { 6920 #ifdef ASSERT 6921 warning("reversing conditional branch"); 6922 #endif /* ASSERT */ 6923 Label skip; 6924 jccb(reverse[cc], skip); 6925 lea(rscratch1, dst); 6926 Assembler::jmp(rscratch1); 6927 bind(skip); 6928 } 6929 } 6930 6931 void MacroAssembler::ldmxcsr(AddressLiteral src) { 6932 if (reachable(src)) { 6933 Assembler::ldmxcsr(as_Address(src)); 6934 } else { 6935 lea(rscratch1, src); 6936 Assembler::ldmxcsr(Address(rscratch1, 0)); 6937 } 6938 } 6939 6940 int MacroAssembler::load_signed_byte(Register dst, Address src) { 6941 int off; 6942 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6943 off = offset(); 6944 movsbl(dst, src); // movsxb 6945 } else { 6946 off = load_unsigned_byte(dst, src); 6947 shll(dst, 24); 6948 sarl(dst, 24); 6949 } 6950 return off; 6951 } 6952 6953 // Note: load_signed_short used to be called load_signed_word. 6954 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler 6955 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. 6956 // The term "word" in HotSpot means a 32- or 64-bit machine word. 6957 int MacroAssembler::load_signed_short(Register dst, Address src) { 6958 int off; 6959 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6960 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 6961 // version but this is what 64bit has always done. This seems to imply 6962 // that users are only using 32bits worth. 6963 off = offset(); 6964 movswl(dst, src); // movsxw 6965 } else { 6966 off = load_unsigned_short(dst, src); 6967 shll(dst, 16); 6968 sarl(dst, 16); 6969 } 6970 return off; 6971 } 6972 6973 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 6974 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 6975 // and "3.9 Partial Register Penalties", p. 22). 6976 int off; 6977 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 6978 off = offset(); 6979 movzbl(dst, src); // movzxb 6980 } else { 6981 xorl(dst, dst); 6982 off = offset(); 6983 movb(dst, src); 6984 } 6985 return off; 6986 } 6987 6988 // Note: load_unsigned_short used to be called load_unsigned_word. 6989 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 6990 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 6991 // and "3.9 Partial Register Penalties", p. 22). 6992 int off; 6993 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 6994 off = offset(); 6995 movzwl(dst, src); // movzxw 6996 } else { 6997 xorl(dst, dst); 6998 off = offset(); 6999 movw(dst, src); 7000 } 7001 return off; 7002 } 7003 7004 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 7005 switch (size_in_bytes) { 7006 #ifndef _LP64 7007 case 8: 7008 assert(dst2 != noreg, "second dest register required"); 7009 movl(dst, src); 7010 movl(dst2, src.plus_disp(BytesPerInt)); 7011 break; 7012 #else 7013 case 8: movq(dst, src); break; 7014 #endif 7015 case 4: movl(dst, src); break; 7016 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 7017 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 7018 default: ShouldNotReachHere(); 7019 } 7020 } 7021 7022 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 7023 switch (size_in_bytes) { 7024 #ifndef _LP64 7025 case 8: 7026 assert(src2 != noreg, "second source register required"); 7027 movl(dst, src); 7028 movl(dst.plus_disp(BytesPerInt), src2); 7029 break; 7030 #else 7031 case 8: movq(dst, src); break; 7032 #endif 7033 case 4: movl(dst, src); break; 7034 case 2: movw(dst, src); break; 7035 case 1: movb(dst, src); break; 7036 default: ShouldNotReachHere(); 7037 } 7038 } 7039 7040 void MacroAssembler::mov32(AddressLiteral dst, Register src) { 7041 if (reachable(dst)) { 7042 movl(as_Address(dst), src); 7043 } else { 7044 lea(rscratch1, dst); 7045 movl(Address(rscratch1, 0), src); 7046 } 7047 } 7048 7049 void MacroAssembler::mov32(Register dst, AddressLiteral src) { 7050 if (reachable(src)) { 7051 movl(dst, as_Address(src)); 7052 } else { 7053 lea(rscratch1, src); 7054 movl(dst, Address(rscratch1, 0)); 7055 } 7056 } 7057 7058 // C++ bool manipulation 7059 7060 void MacroAssembler::movbool(Register dst, Address src) { 7061 if(sizeof(bool) == 1) 7062 movb(dst, src); 7063 else if(sizeof(bool) == 2) 7064 movw(dst, src); 7065 else if(sizeof(bool) == 4) 7066 movl(dst, src); 7067 else 7068 // unsupported 7069 ShouldNotReachHere(); 7070 } 7071 7072 void MacroAssembler::movbool(Address dst, bool boolconst) { 7073 if(sizeof(bool) == 1) 7074 movb(dst, (int) boolconst); 7075 else if(sizeof(bool) == 2) 7076 movw(dst, (int) boolconst); 7077 else if(sizeof(bool) == 4) 7078 movl(dst, (int) boolconst); 7079 else 7080 // unsupported 7081 ShouldNotReachHere(); 7082 } 7083 7084 void MacroAssembler::movbool(Address dst, Register src) { 7085 if(sizeof(bool) == 1) 7086 movb(dst, src); 7087 else if(sizeof(bool) == 2) 7088 movw(dst, src); 7089 else if(sizeof(bool) == 4) 7090 movl(dst, src); 7091 else 7092 // unsupported 7093 ShouldNotReachHere(); 7094 } 7095 7096 void MacroAssembler::movbyte(ArrayAddress dst, int src) { 7097 movb(as_Address(dst), src); 7098 } 7099 7100 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 7101 if (reachable(src)) { 7102 if (UseXmmLoadAndClearUpper) { 7103 movsd (dst, as_Address(src)); 7104 } else { 7105 movlpd(dst, as_Address(src)); 7106 } 7107 } else { 7108 lea(rscratch1, src); 7109 if (UseXmmLoadAndClearUpper) { 7110 movsd (dst, Address(rscratch1, 0)); 7111 } else { 7112 movlpd(dst, Address(rscratch1, 0)); 7113 } 7114 } 7115 } 7116 7117 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 7118 if (reachable(src)) { 7119 movss(dst, as_Address(src)); 7120 } else { 7121 lea(rscratch1, src); 7122 movss(dst, Address(rscratch1, 0)); 7123 } 7124 } 7125 7126 void MacroAssembler::movptr(Register dst, Register src) { 7127 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 7128 } 7129 7130 void MacroAssembler::movptr(Register dst, Address src) { 7131 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 7132 } 7133 7134 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 7135 void MacroAssembler::movptr(Register dst, intptr_t src) { 7136 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 7137 } 7138 7139 void MacroAssembler::movptr(Address dst, Register src) { 7140 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 7141 } 7142 7143 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 7144 if (reachable(src)) { 7145 Assembler::movsd(dst, as_Address(src)); 7146 } else { 7147 lea(rscratch1, src); 7148 Assembler::movsd(dst, Address(rscratch1, 0)); 7149 } 7150 } 7151 7152 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 7153 if (reachable(src)) { 7154 Assembler::movss(dst, as_Address(src)); 7155 } else { 7156 lea(rscratch1, src); 7157 Assembler::movss(dst, Address(rscratch1, 0)); 7158 } 7159 } 7160 7161 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { 7162 if (reachable(src)) { 7163 Assembler::mulsd(dst, as_Address(src)); 7164 } else { 7165 lea(rscratch1, src); 7166 Assembler::mulsd(dst, Address(rscratch1, 0)); 7167 } 7168 } 7169 7170 void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { 7171 if (reachable(src)) { 7172 Assembler::mulss(dst, as_Address(src)); 7173 } else { 7174 lea(rscratch1, src); 7175 Assembler::mulss(dst, Address(rscratch1, 0)); 7176 } 7177 } 7178 7179 void MacroAssembler::null_check(Register reg, int offset) { 7180 if (needs_explicit_null_check(offset)) { 7181 // provoke OS NULL exception if reg = NULL by 7182 // accessing M[reg] w/o changing any (non-CC) registers 7183 // NOTE: cmpl is plenty here to provoke a segv 7184 cmpptr(rax, Address(reg, 0)); 7185 // Note: should probably use testl(rax, Address(reg, 0)); 7186 // may be shorter code (however, this version of 7187 // testl needs to be implemented first) 7188 } else { 7189 // nothing to do, (later) access of M[reg + offset] 7190 // will provoke OS NULL exception if reg = NULL 7191 } 7192 } 7193 7194 void MacroAssembler::os_breakpoint() { 7195 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 7196 // (e.g., MSVC can't call ps() otherwise) 7197 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 7198 } 7199 7200 void MacroAssembler::pop_CPU_state() { 7201 pop_FPU_state(); 7202 pop_IU_state(); 7203 } 7204 7205 void MacroAssembler::pop_FPU_state() { 7206 NOT_LP64(frstor(Address(rsp, 0));) 7207 LP64_ONLY(fxrstor(Address(rsp, 0));) 7208 addptr(rsp, FPUStateSizeInWords * wordSize); 7209 } 7210 7211 void MacroAssembler::pop_IU_state() { 7212 popa(); 7213 LP64_ONLY(addq(rsp, 8)); 7214 popf(); 7215 } 7216 7217 // Save Integer and Float state 7218 // Warning: Stack must be 16 byte aligned (64bit) 7219 void MacroAssembler::push_CPU_state() { 7220 push_IU_state(); 7221 push_FPU_state(); 7222 } 7223 7224 void MacroAssembler::push_FPU_state() { 7225 subptr(rsp, FPUStateSizeInWords * wordSize); 7226 #ifndef _LP64 7227 fnsave(Address(rsp, 0)); 7228 fwait(); 7229 #else 7230 fxsave(Address(rsp, 0)); 7231 #endif // LP64 7232 } 7233 7234 void MacroAssembler::push_IU_state() { 7235 // Push flags first because pusha kills them 7236 pushf(); 7237 // Make sure rsp stays 16-byte aligned 7238 LP64_ONLY(subq(rsp, 8)); 7239 pusha(); 7240 } 7241 7242 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 7243 // determine java_thread register 7244 if (!java_thread->is_valid()) { 7245 java_thread = rdi; 7246 get_thread(java_thread); 7247 } 7248 // we must set sp to zero to clear frame 7249 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 7250 if (clear_fp) { 7251 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 7252 } 7253 7254 if (clear_pc) 7255 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 7256 7257 } 7258 7259 void MacroAssembler::restore_rax(Register tmp) { 7260 if (tmp == noreg) pop(rax); 7261 else if (tmp != rax) mov(rax, tmp); 7262 } 7263 7264 void MacroAssembler::round_to(Register reg, int modulus) { 7265 addptr(reg, modulus - 1); 7266 andptr(reg, -modulus); 7267 } 7268 7269 void MacroAssembler::save_rax(Register tmp) { 7270 if (tmp == noreg) push(rax); 7271 else if (tmp != rax) mov(tmp, rax); 7272 } 7273 7274 // Write serialization page so VM thread can do a pseudo remote membar. 7275 // We use the current thread pointer to calculate a thread specific 7276 // offset to write to within the page. This minimizes bus traffic 7277 // due to cache line collision. 7278 void MacroAssembler::serialize_memory(Register thread, Register tmp) { 7279 movl(tmp, thread); 7280 shrl(tmp, os::get_serialize_page_shift_count()); 7281 andl(tmp, (os::vm_page_size() - sizeof(int))); 7282 7283 Address index(noreg, tmp, Address::times_1); 7284 ExternalAddress page(os::get_memory_serialize_page()); 7285 7286 // Size of store must match masking code above 7287 movl(as_Address(ArrayAddress(page, index)), tmp); 7288 } 7289 7290 // Calls to C land 7291 // 7292 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded 7293 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 7294 // has to be reset to 0. This is required to allow proper stack traversal. 7295 void MacroAssembler::set_last_Java_frame(Register java_thread, 7296 Register last_java_sp, 7297 Register last_java_fp, 7298 address last_java_pc) { 7299 // determine java_thread register 7300 if (!java_thread->is_valid()) { 7301 java_thread = rdi; 7302 get_thread(java_thread); 7303 } 7304 // determine last_java_sp register 7305 if (!last_java_sp->is_valid()) { 7306 last_java_sp = rsp; 7307 } 7308 7309 // last_java_fp is optional 7310 7311 if (last_java_fp->is_valid()) { 7312 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 7313 } 7314 7315 // last_java_pc is optional 7316 7317 if (last_java_pc != NULL) { 7318 lea(Address(java_thread, 7319 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 7320 InternalAddress(last_java_pc)); 7321 7322 } 7323 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 7324 } 7325 7326 void MacroAssembler::shlptr(Register dst, int imm8) { 7327 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 7328 } 7329 7330 void MacroAssembler::shrptr(Register dst, int imm8) { 7331 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 7332 } 7333 7334 void MacroAssembler::sign_extend_byte(Register reg) { 7335 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 7336 movsbl(reg, reg); // movsxb 7337 } else { 7338 shll(reg, 24); 7339 sarl(reg, 24); 7340 } 7341 } 7342 7343 void MacroAssembler::sign_extend_short(Register reg) { 7344 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7345 movswl(reg, reg); // movsxw 7346 } else { 7347 shll(reg, 16); 7348 sarl(reg, 16); 7349 } 7350 } 7351 7352 void MacroAssembler::testl(Register dst, AddressLiteral src) { 7353 assert(reachable(src), "Address should be reachable"); 7354 testl(dst, as_Address(src)); 7355 } 7356 7357 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { 7358 if (reachable(src)) { 7359 Assembler::sqrtsd(dst, as_Address(src)); 7360 } else { 7361 lea(rscratch1, src); 7362 Assembler::sqrtsd(dst, Address(rscratch1, 0)); 7363 } 7364 } 7365 7366 void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { 7367 if (reachable(src)) { 7368 Assembler::sqrtss(dst, as_Address(src)); 7369 } else { 7370 lea(rscratch1, src); 7371 Assembler::sqrtss(dst, Address(rscratch1, 0)); 7372 } 7373 } 7374 7375 void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { 7376 if (reachable(src)) { 7377 Assembler::subsd(dst, as_Address(src)); 7378 } else { 7379 lea(rscratch1, src); 7380 Assembler::subsd(dst, Address(rscratch1, 0)); 7381 } 7382 } 7383 7384 void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { 7385 if (reachable(src)) { 7386 Assembler::subss(dst, as_Address(src)); 7387 } else { 7388 lea(rscratch1, src); 7389 Assembler::subss(dst, Address(rscratch1, 0)); 7390 } 7391 } 7392 7393 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 7394 if (reachable(src)) { 7395 Assembler::ucomisd(dst, as_Address(src)); 7396 } else { 7397 lea(rscratch1, src); 7398 Assembler::ucomisd(dst, Address(rscratch1, 0)); 7399 } 7400 } 7401 7402 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 7403 if (reachable(src)) { 7404 Assembler::ucomiss(dst, as_Address(src)); 7405 } else { 7406 lea(rscratch1, src); 7407 Assembler::ucomiss(dst, Address(rscratch1, 0)); 7408 } 7409 } 7410 7411 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 7412 // Used in sign-bit flipping with aligned address. 7413 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 7414 if (reachable(src)) { 7415 Assembler::xorpd(dst, as_Address(src)); 7416 } else { 7417 lea(rscratch1, src); 7418 Assembler::xorpd(dst, Address(rscratch1, 0)); 7419 } 7420 } 7421 7422 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 7423 // Used in sign-bit flipping with aligned address. 7424 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 7425 if (reachable(src)) { 7426 Assembler::xorps(dst, as_Address(src)); 7427 } else { 7428 lea(rscratch1, src); 7429 Assembler::xorps(dst, Address(rscratch1, 0)); 7430 } 7431 } 7432 7433 // AVX 3-operands instructions 7434 7435 void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7436 if (reachable(src)) { 7437 vaddsd(dst, nds, as_Address(src)); 7438 } else { 7439 lea(rscratch1, src); 7440 vaddsd(dst, nds, Address(rscratch1, 0)); 7441 } 7442 } 7443 7444 void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7445 if (reachable(src)) { 7446 vaddss(dst, nds, as_Address(src)); 7447 } else { 7448 lea(rscratch1, src); 7449 vaddss(dst, nds, Address(rscratch1, 0)); 7450 } 7451 } 7452 7453 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7454 if (reachable(src)) { 7455 vandpd(dst, nds, as_Address(src)); 7456 } else { 7457 lea(rscratch1, src); 7458 vandpd(dst, nds, Address(rscratch1, 0)); 7459 } 7460 } 7461 7462 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7463 if (reachable(src)) { 7464 vandps(dst, nds, as_Address(src)); 7465 } else { 7466 lea(rscratch1, src); 7467 vandps(dst, nds, Address(rscratch1, 0)); 7468 } 7469 } 7470 7471 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7472 if (reachable(src)) { 7473 vdivsd(dst, nds, as_Address(src)); 7474 } else { 7475 lea(rscratch1, src); 7476 vdivsd(dst, nds, Address(rscratch1, 0)); 7477 } 7478 } 7479 7480 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7481 if (reachable(src)) { 7482 vdivss(dst, nds, as_Address(src)); 7483 } else { 7484 lea(rscratch1, src); 7485 vdivss(dst, nds, Address(rscratch1, 0)); 7486 } 7487 } 7488 7489 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7490 if (reachable(src)) { 7491 vmulsd(dst, nds, as_Address(src)); 7492 } else { 7493 lea(rscratch1, src); 7494 vmulsd(dst, nds, Address(rscratch1, 0)); 7495 } 7496 } 7497 7498 void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7499 if (reachable(src)) { 7500 vmulss(dst, nds, as_Address(src)); 7501 } else { 7502 lea(rscratch1, src); 7503 vmulss(dst, nds, Address(rscratch1, 0)); 7504 } 7505 } 7506 7507 void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7508 if (reachable(src)) { 7509 vsubsd(dst, nds, as_Address(src)); 7510 } else { 7511 lea(rscratch1, src); 7512 vsubsd(dst, nds, Address(rscratch1, 0)); 7513 } 7514 } 7515 7516 void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7517 if (reachable(src)) { 7518 vsubss(dst, nds, as_Address(src)); 7519 } else { 7520 lea(rscratch1, src); 7521 vsubss(dst, nds, Address(rscratch1, 0)); 7522 } 7523 } 7524 7525 void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7526 if (reachable(src)) { 7527 vxorpd(dst, nds, as_Address(src)); 7528 } else { 7529 lea(rscratch1, src); 7530 vxorpd(dst, nds, Address(rscratch1, 0)); 7531 } 7532 } 7533 7534 void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7535 if (reachable(src)) { 7536 vxorps(dst, nds, as_Address(src)); 7537 } else { 7538 lea(rscratch1, src); 7539 vxorps(dst, nds, Address(rscratch1, 0)); 7540 } 7541 } 7542 7543 7544 ////////////////////////////////////////////////////////////////////////////////// 7545 #ifndef SERIALGC 7546 7547 void MacroAssembler::g1_write_barrier_pre(Register obj, 7548 Register pre_val, 7549 Register thread, 7550 Register tmp, 7551 bool tosca_live, 7552 bool expand_call) { 7553 7554 // If expand_call is true then we expand the call_VM_leaf macro 7555 // directly to skip generating the check by 7556 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 7557 7558 #ifdef _LP64 7559 assert(thread == r15_thread, "must be"); 7560 #endif // _LP64 7561 7562 Label done; 7563 Label runtime; 7564 7565 assert(pre_val != noreg, "check this code"); 7566 7567 if (obj != noreg) { 7568 assert_different_registers(obj, pre_val, tmp); 7569 assert(pre_val != rax, "check this code"); 7570 } 7571 7572 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 7573 PtrQueue::byte_offset_of_active())); 7574 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 7575 PtrQueue::byte_offset_of_index())); 7576 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 7577 PtrQueue::byte_offset_of_buf())); 7578 7579 7580 // Is marking active? 7581 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 7582 cmpl(in_progress, 0); 7583 } else { 7584 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 7585 cmpb(in_progress, 0); 7586 } 7587 jcc(Assembler::equal, done); 7588 7589 // Do we need to load the previous value? 7590 if (obj != noreg) { 7591 load_heap_oop(pre_val, Address(obj, 0)); 7592 } 7593 7594 // Is the previous value null? 7595 cmpptr(pre_val, (int32_t) NULL_WORD); 7596 jcc(Assembler::equal, done); 7597 7598 // Can we store original value in the thread's buffer? 7599 // Is index == 0? 7600 // (The index field is typed as size_t.) 7601 7602 movptr(tmp, index); // tmp := *index_adr 7603 cmpptr(tmp, 0); // tmp == 0? 7604 jcc(Assembler::equal, runtime); // If yes, goto runtime 7605 7606 subptr(tmp, wordSize); // tmp := tmp - wordSize 7607 movptr(index, tmp); // *index_adr := tmp 7608 addptr(tmp, buffer); // tmp := tmp + *buffer_adr 7609 7610 // Record the previous value 7611 movptr(Address(tmp, 0), pre_val); 7612 jmp(done); 7613 7614 bind(runtime); 7615 // save the live input values 7616 if(tosca_live) push(rax); 7617 7618 if (obj != noreg && obj != rax) 7619 push(obj); 7620 7621 if (pre_val != rax) 7622 push(pre_val); 7623 7624 // Calling the runtime using the regular call_VM_leaf mechanism generates 7625 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 7626 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 7627 // 7628 // If we care generating the pre-barrier without a frame (e.g. in the 7629 // intrinsified Reference.get() routine) then ebp might be pointing to 7630 // the caller frame and so this check will most likely fail at runtime. 7631 // 7632 // Expanding the call directly bypasses the generation of the check. 7633 // So when we do not have have a full interpreter frame on the stack 7634 // expand_call should be passed true. 7635 7636 NOT_LP64( push(thread); ) 7637 7638 if (expand_call) { 7639 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 7640 pass_arg1(this, thread); 7641 pass_arg0(this, pre_val); 7642 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 7643 } else { 7644 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 7645 } 7646 7647 NOT_LP64( pop(thread); ) 7648 7649 // save the live input values 7650 if (pre_val != rax) 7651 pop(pre_val); 7652 7653 if (obj != noreg && obj != rax) 7654 pop(obj); 7655 7656 if(tosca_live) pop(rax); 7657 7658 bind(done); 7659 } 7660 7661 void MacroAssembler::g1_write_barrier_post(Register store_addr, 7662 Register new_val, 7663 Register thread, 7664 Register tmp, 7665 Register tmp2) { 7666 #ifdef _LP64 7667 assert(thread == r15_thread, "must be"); 7668 #endif // _LP64 7669 7670 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 7671 PtrQueue::byte_offset_of_index())); 7672 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 7673 PtrQueue::byte_offset_of_buf())); 7674 7675 BarrierSet* bs = Universe::heap()->barrier_set(); 7676 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 7677 Label done; 7678 Label runtime; 7679 7680 // Does store cross heap regions? 7681 7682 movptr(tmp, store_addr); 7683 xorptr(tmp, new_val); 7684 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 7685 jcc(Assembler::equal, done); 7686 7687 // crosses regions, storing NULL? 7688 7689 cmpptr(new_val, (int32_t) NULL_WORD); 7690 jcc(Assembler::equal, done); 7691 7692 // storing region crossing non-NULL, is card already dirty? 7693 7694 ExternalAddress cardtable((address) ct->byte_map_base); 7695 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 7696 #ifdef _LP64 7697 const Register card_addr = tmp; 7698 7699 movq(card_addr, store_addr); 7700 shrq(card_addr, CardTableModRefBS::card_shift); 7701 7702 lea(tmp2, cardtable); 7703 7704 // get the address of the card 7705 addq(card_addr, tmp2); 7706 #else 7707 const Register card_index = tmp; 7708 7709 movl(card_index, store_addr); 7710 shrl(card_index, CardTableModRefBS::card_shift); 7711 7712 Address index(noreg, card_index, Address::times_1); 7713 const Register card_addr = tmp; 7714 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 7715 #endif 7716 cmpb(Address(card_addr, 0), 0); 7717 jcc(Assembler::equal, done); 7718 7719 // storing a region crossing, non-NULL oop, card is clean. 7720 // dirty card and log. 7721 7722 movb(Address(card_addr, 0), 0); 7723 7724 cmpl(queue_index, 0); 7725 jcc(Assembler::equal, runtime); 7726 subl(queue_index, wordSize); 7727 movptr(tmp2, buffer); 7728 #ifdef _LP64 7729 movslq(rscratch1, queue_index); 7730 addq(tmp2, rscratch1); 7731 movq(Address(tmp2, 0), card_addr); 7732 #else 7733 addl(tmp2, queue_index); 7734 movl(Address(tmp2, 0), card_index); 7735 #endif 7736 jmp(done); 7737 7738 bind(runtime); 7739 // save the live input values 7740 push(store_addr); 7741 push(new_val); 7742 #ifdef _LP64 7743 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 7744 #else 7745 push(thread); 7746 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 7747 pop(thread); 7748 #endif 7749 pop(new_val); 7750 pop(store_addr); 7751 7752 bind(done); 7753 } 7754 7755 #endif // SERIALGC 7756 ////////////////////////////////////////////////////////////////////////////////// 7757 7758 7759 void MacroAssembler::store_check(Register obj) { 7760 // Does a store check for the oop in register obj. The content of 7761 // register obj is destroyed afterwards. 7762 store_check_part_1(obj); 7763 store_check_part_2(obj); 7764 } 7765 7766 void MacroAssembler::store_check(Register obj, Address dst) { 7767 store_check(obj); 7768 } 7769 7770 7771 // split the store check operation so that other instructions can be scheduled inbetween 7772 void MacroAssembler::store_check_part_1(Register obj) { 7773 BarrierSet* bs = Universe::heap()->barrier_set(); 7774 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 7775 shrptr(obj, CardTableModRefBS::card_shift); 7776 } 7777 7778 void MacroAssembler::store_check_part_2(Register obj) { 7779 BarrierSet* bs = Universe::heap()->barrier_set(); 7780 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 7781 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 7782 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 7783 7784 // The calculation for byte_map_base is as follows: 7785 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 7786 // So this essentially converts an address to a displacement and 7787 // it will never need to be relocated. On 64bit however the value may be too 7788 // large for a 32bit displacement 7789 7790 intptr_t disp = (intptr_t) ct->byte_map_base; 7791 if (is_simm32(disp)) { 7792 Address cardtable(noreg, obj, Address::times_1, disp); 7793 movb(cardtable, 0); 7794 } else { 7795 // By doing it as an ExternalAddress disp could be converted to a rip-relative 7796 // displacement and done in a single instruction given favorable mapping and 7797 // a smarter version of as_Address. Worst case it is two instructions which 7798 // is no worse off then loading disp into a register and doing as a simple 7799 // Address() as above. 7800 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 7801 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 7802 // in some cases we'll get a single instruction version. 7803 7804 ExternalAddress cardtable((address)disp); 7805 Address index(noreg, obj, Address::times_1); 7806 movb(as_Address(ArrayAddress(cardtable, index)), 0); 7807 } 7808 } 7809 7810 void MacroAssembler::subptr(Register dst, int32_t imm32) { 7811 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 7812 } 7813 7814 void MacroAssembler::subptr(Register dst, Register src) { 7815 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 7816 } 7817 7818 // C++ bool manipulation 7819 void MacroAssembler::testbool(Register dst) { 7820 if(sizeof(bool) == 1) 7821 testb(dst, 0xff); 7822 else if(sizeof(bool) == 2) { 7823 // testw implementation needed for two byte bools 7824 ShouldNotReachHere(); 7825 } else if(sizeof(bool) == 4) 7826 testl(dst, dst); 7827 else 7828 // unsupported 7829 ShouldNotReachHere(); 7830 } 7831 7832 void MacroAssembler::testptr(Register dst, Register src) { 7833 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 7834 } 7835 7836 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 7837 void MacroAssembler::tlab_allocate(Register obj, 7838 Register var_size_in_bytes, 7839 int con_size_in_bytes, 7840 Register t1, 7841 Register t2, 7842 Label& slow_case) { 7843 assert_different_registers(obj, t1, t2); 7844 assert_different_registers(obj, var_size_in_bytes, t1); 7845 Register end = t2; 7846 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 7847 7848 verify_tlab(); 7849 7850 NOT_LP64(get_thread(thread)); 7851 7852 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 7853 if (var_size_in_bytes == noreg) { 7854 lea(end, Address(obj, con_size_in_bytes)); 7855 } else { 7856 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 7857 } 7858 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 7859 jcc(Assembler::above, slow_case); 7860 7861 // update the tlab top pointer 7862 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 7863 7864 // recover var_size_in_bytes if necessary 7865 if (var_size_in_bytes == end) { 7866 subptr(var_size_in_bytes, obj); 7867 } 7868 verify_tlab(); 7869 } 7870 7871 // Preserves rbx, and rdx. 7872 Register MacroAssembler::tlab_refill(Label& retry, 7873 Label& try_eden, 7874 Label& slow_case) { 7875 Register top = rax; 7876 Register t1 = rcx; 7877 Register t2 = rsi; 7878 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 7879 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 7880 Label do_refill, discard_tlab; 7881 7882 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 7883 // No allocation in the shared eden. 7884 jmp(slow_case); 7885 } 7886 7887 NOT_LP64(get_thread(thread_reg)); 7888 7889 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 7890 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 7891 7892 // calculate amount of free space 7893 subptr(t1, top); 7894 shrptr(t1, LogHeapWordSize); 7895 7896 // Retain tlab and allocate object in shared space if 7897 // the amount free in the tlab is too large to discard. 7898 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 7899 jcc(Assembler::lessEqual, discard_tlab); 7900 7901 // Retain 7902 // %%% yuck as movptr... 7903 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 7904 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 7905 if (TLABStats) { 7906 // increment number of slow_allocations 7907 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 7908 } 7909 jmp(try_eden); 7910 7911 bind(discard_tlab); 7912 if (TLABStats) { 7913 // increment number of refills 7914 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 7915 // accumulate wastage -- t1 is amount free in tlab 7916 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 7917 } 7918 7919 // if tlab is currently allocated (top or end != null) then 7920 // fill [top, end + alignment_reserve) with array object 7921 testptr(top, top); 7922 jcc(Assembler::zero, do_refill); 7923 7924 // set up the mark word 7925 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 7926 // set the length to the remaining space 7927 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 7928 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 7929 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 7930 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 7931 // set klass to intArrayKlass 7932 // dubious reloc why not an oop reloc? 7933 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); 7934 // store klass last. concurrent gcs assumes klass length is valid if 7935 // klass field is not null. 7936 store_klass(top, t1); 7937 7938 movptr(t1, top); 7939 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 7940 incr_allocated_bytes(thread_reg, t1, 0); 7941 7942 // refill the tlab with an eden allocation 7943 bind(do_refill); 7944 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 7945 shlptr(t1, LogHeapWordSize); 7946 // allocate new tlab, address returned in top 7947 eden_allocate(top, t1, 0, t2, slow_case); 7948 7949 // Check that t1 was preserved in eden_allocate. 7950 #ifdef ASSERT 7951 if (UseTLAB) { 7952 Label ok; 7953 Register tsize = rsi; 7954 assert_different_registers(tsize, thread_reg, t1); 7955 push(tsize); 7956 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 7957 shlptr(tsize, LogHeapWordSize); 7958 cmpptr(t1, tsize); 7959 jcc(Assembler::equal, ok); 7960 stop("assert(t1 != tlab size)"); 7961 should_not_reach_here(); 7962 7963 bind(ok); 7964 pop(tsize); 7965 } 7966 #endif 7967 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 7968 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 7969 addptr(top, t1); 7970 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 7971 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 7972 verify_tlab(); 7973 jmp(retry); 7974 7975 return thread_reg; // for use by caller 7976 } 7977 7978 void MacroAssembler::incr_allocated_bytes(Register thread, 7979 Register var_size_in_bytes, 7980 int con_size_in_bytes, 7981 Register t1) { 7982 #ifdef _LP64 7983 if (var_size_in_bytes->is_valid()) { 7984 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 7985 } else { 7986 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 7987 } 7988 #else 7989 if (!thread->is_valid()) { 7990 assert(t1->is_valid(), "need temp reg"); 7991 thread = t1; 7992 get_thread(thread); 7993 } 7994 7995 if (var_size_in_bytes->is_valid()) { 7996 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 7997 } else { 7998 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 7999 } 8000 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); 8001 #endif 8002 } 8003 8004 static const double pi_4 = 0.7853981633974483; 8005 8006 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 8007 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 8008 // was attempted in this code; unfortunately it appears that the 8009 // switch to 80-bit precision and back causes this to be 8010 // unprofitable compared with simply performing a runtime call if 8011 // the argument is out of the (-pi/4, pi/4) range. 8012 8013 Register tmp = noreg; 8014 if (!VM_Version::supports_cmov()) { 8015 // fcmp needs a temporary so preserve rbx, 8016 tmp = rbx; 8017 push(tmp); 8018 } 8019 8020 Label slow_case, done; 8021 8022 ExternalAddress pi4_adr = (address)&pi_4; 8023 if (reachable(pi4_adr)) { 8024 // x ?<= pi/4 8025 fld_d(pi4_adr); 8026 fld_s(1); // Stack: X PI/4 X 8027 fabs(); // Stack: |X| PI/4 X 8028 fcmp(tmp); 8029 jcc(Assembler::above, slow_case); 8030 8031 // fastest case: -pi/4 <= x <= pi/4 8032 switch(trig) { 8033 case 's': 8034 fsin(); 8035 break; 8036 case 'c': 8037 fcos(); 8038 break; 8039 case 't': 8040 ftan(); 8041 break; 8042 default: 8043 assert(false, "bad intrinsic"); 8044 break; 8045 } 8046 jmp(done); 8047 } 8048 8049 // slow case: runtime call 8050 bind(slow_case); 8051 // Preserve registers across runtime call 8052 pusha(); 8053 int incoming_argument_and_return_value_offset = -1; 8054 if (num_fpu_regs_in_use > 1) { 8055 // Must preserve all other FPU regs (could alternatively convert 8056 // SharedRuntime::dsin and dcos into assembly routines known not to trash 8057 // FPU state, but can not trust C compiler) 8058 NEEDS_CLEANUP; 8059 // NOTE that in this case we also push the incoming argument to 8060 // the stack and restore it later; we also use this stack slot to 8061 // hold the return value from dsin or dcos. 8062 for (int i = 0; i < num_fpu_regs_in_use; i++) { 8063 subptr(rsp, sizeof(jdouble)); 8064 fstp_d(Address(rsp, 0)); 8065 } 8066 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 8067 fld_d(Address(rsp, incoming_argument_and_return_value_offset)); 8068 } 8069 subptr(rsp, sizeof(jdouble)); 8070 fstp_d(Address(rsp, 0)); 8071 #ifdef _LP64 8072 movdbl(xmm0, Address(rsp, 0)); 8073 #endif // _LP64 8074 8075 // NOTE: we must not use call_VM_leaf here because that requires a 8076 // complete interpreter frame in debug mode -- same bug as 4387334 8077 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 8078 // do proper 64bit abi 8079 8080 NEEDS_CLEANUP; 8081 // Need to add stack banging before this runtime call if it needs to 8082 // be taken; however, there is no generic stack banging routine at 8083 // the MacroAssembler level 8084 switch(trig) { 8085 case 's': 8086 { 8087 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0); 8088 } 8089 break; 8090 case 'c': 8091 { 8092 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0); 8093 } 8094 break; 8095 case 't': 8096 { 8097 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0); 8098 } 8099 break; 8100 default: 8101 assert(false, "bad intrinsic"); 8102 break; 8103 } 8104 #ifdef _LP64 8105 movsd(Address(rsp, 0), xmm0); 8106 fld_d(Address(rsp, 0)); 8107 #endif // _LP64 8108 addptr(rsp, sizeof(jdouble)); 8109 if (num_fpu_regs_in_use > 1) { 8110 // Must save return value to stack and then restore entire FPU stack 8111 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 8112 for (int i = 0; i < num_fpu_regs_in_use; i++) { 8113 fld_d(Address(rsp, 0)); 8114 addptr(rsp, sizeof(jdouble)); 8115 } 8116 } 8117 popa(); 8118 8119 // Come here with result in F-TOS 8120 bind(done); 8121 8122 if (tmp != noreg) { 8123 pop(tmp); 8124 } 8125 } 8126 8127 8128 // Look up the method for a megamorphic invokeinterface call. 8129 // The target method is determined by <intf_klass, itable_index>. 8130 // The receiver klass is in recv_klass. 8131 // On success, the result will be in method_result, and execution falls through. 8132 // On failure, execution transfers to the given label. 8133 void MacroAssembler::lookup_interface_method(Register recv_klass, 8134 Register intf_klass, 8135 RegisterOrConstant itable_index, 8136 Register method_result, 8137 Register scan_temp, 8138 Label& L_no_such_interface) { 8139 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 8140 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 8141 "caller must use same register for non-constant itable index as for method"); 8142 8143 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 8144 int vtable_base = instanceKlass::vtable_start_offset() * wordSize; 8145 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 8146 int scan_step = itableOffsetEntry::size() * wordSize; 8147 int vte_size = vtableEntry::size() * wordSize; 8148 Address::ScaleFactor times_vte_scale = Address::times_ptr; 8149 assert(vte_size == wordSize, "else adjust times_vte_scale"); 8150 8151 movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize)); 8152 8153 // %%% Could store the aligned, prescaled offset in the klassoop. 8154 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 8155 if (HeapWordsPerLong > 1) { 8156 // Round up to align_object_offset boundary 8157 // see code for instanceKlass::start_of_itable! 8158 round_to(scan_temp, BytesPerLong); 8159 } 8160 8161 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 8162 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 8163 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 8164 8165 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 8166 // if (scan->interface() == intf) { 8167 // result = (klass + scan->offset() + itable_index); 8168 // } 8169 // } 8170 Label search, found_method; 8171 8172 for (int peel = 1; peel >= 0; peel--) { 8173 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 8174 cmpptr(intf_klass, method_result); 8175 8176 if (peel) { 8177 jccb(Assembler::equal, found_method); 8178 } else { 8179 jccb(Assembler::notEqual, search); 8180 // (invert the test to fall through to found_method...) 8181 } 8182 8183 if (!peel) break; 8184 8185 bind(search); 8186 8187 // Check that the previous entry is non-null. A null entry means that 8188 // the receiver class doesn't implement the interface, and wasn't the 8189 // same as when the caller was compiled. 8190 testptr(method_result, method_result); 8191 jcc(Assembler::zero, L_no_such_interface); 8192 addptr(scan_temp, scan_step); 8193 } 8194 8195 bind(found_method); 8196 8197 // Got a hit. 8198 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 8199 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 8200 } 8201 8202 8203 void MacroAssembler::check_klass_subtype(Register sub_klass, 8204 Register super_klass, 8205 Register temp_reg, 8206 Label& L_success) { 8207 Label L_failure; 8208 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 8209 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 8210 bind(L_failure); 8211 } 8212 8213 8214 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 8215 Register super_klass, 8216 Register temp_reg, 8217 Label* L_success, 8218 Label* L_failure, 8219 Label* L_slow_path, 8220 RegisterOrConstant super_check_offset) { 8221 assert_different_registers(sub_klass, super_klass, temp_reg); 8222 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 8223 if (super_check_offset.is_register()) { 8224 assert_different_registers(sub_klass, super_klass, 8225 super_check_offset.as_register()); 8226 } else if (must_load_sco) { 8227 assert(temp_reg != noreg, "supply either a temp or a register offset"); 8228 } 8229 8230 Label L_fallthrough; 8231 int label_nulls = 0; 8232 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 8233 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 8234 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 8235 assert(label_nulls <= 1, "at most one NULL in the batch"); 8236 8237 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 8238 Klass::secondary_super_cache_offset_in_bytes()); 8239 int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 8240 Klass::super_check_offset_offset_in_bytes()); 8241 Address super_check_offset_addr(super_klass, sco_offset); 8242 8243 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 8244 // range of a jccb. If this routine grows larger, reconsider at 8245 // least some of these. 8246 #define local_jcc(assembler_cond, label) \ 8247 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 8248 else jcc( assembler_cond, label) /*omit semi*/ 8249 8250 // Hacked jmp, which may only be used just before L_fallthrough. 8251 #define final_jmp(label) \ 8252 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 8253 else jmp(label) /*omit semi*/ 8254 8255 // If the pointers are equal, we are done (e.g., String[] elements). 8256 // This self-check enables sharing of secondary supertype arrays among 8257 // non-primary types such as array-of-interface. Otherwise, each such 8258 // type would need its own customized SSA. 8259 // We move this check to the front of the fast path because many 8260 // type checks are in fact trivially successful in this manner, 8261 // so we get a nicely predicted branch right at the start of the check. 8262 cmpptr(sub_klass, super_klass); 8263 local_jcc(Assembler::equal, *L_success); 8264 8265 // Check the supertype display: 8266 if (must_load_sco) { 8267 // Positive movl does right thing on LP64. 8268 movl(temp_reg, super_check_offset_addr); 8269 super_check_offset = RegisterOrConstant(temp_reg); 8270 } 8271 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 8272 cmpptr(super_klass, super_check_addr); // load displayed supertype 8273 8274 // This check has worked decisively for primary supers. 8275 // Secondary supers are sought in the super_cache ('super_cache_addr'). 8276 // (Secondary supers are interfaces and very deeply nested subtypes.) 8277 // This works in the same check above because of a tricky aliasing 8278 // between the super_cache and the primary super display elements. 8279 // (The 'super_check_addr' can address either, as the case requires.) 8280 // Note that the cache is updated below if it does not help us find 8281 // what we need immediately. 8282 // So if it was a primary super, we can just fail immediately. 8283 // Otherwise, it's the slow path for us (no success at this point). 8284 8285 if (super_check_offset.is_register()) { 8286 local_jcc(Assembler::equal, *L_success); 8287 cmpl(super_check_offset.as_register(), sc_offset); 8288 if (L_failure == &L_fallthrough) { 8289 local_jcc(Assembler::equal, *L_slow_path); 8290 } else { 8291 local_jcc(Assembler::notEqual, *L_failure); 8292 final_jmp(*L_slow_path); 8293 } 8294 } else if (super_check_offset.as_constant() == sc_offset) { 8295 // Need a slow path; fast failure is impossible. 8296 if (L_slow_path == &L_fallthrough) { 8297 local_jcc(Assembler::equal, *L_success); 8298 } else { 8299 local_jcc(Assembler::notEqual, *L_slow_path); 8300 final_jmp(*L_success); 8301 } 8302 } else { 8303 // No slow path; it's a fast decision. 8304 if (L_failure == &L_fallthrough) { 8305 local_jcc(Assembler::equal, *L_success); 8306 } else { 8307 local_jcc(Assembler::notEqual, *L_failure); 8308 final_jmp(*L_success); 8309 } 8310 } 8311 8312 bind(L_fallthrough); 8313 8314 #undef local_jcc 8315 #undef final_jmp 8316 } 8317 8318 8319 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 8320 Register super_klass, 8321 Register temp_reg, 8322 Register temp2_reg, 8323 Label* L_success, 8324 Label* L_failure, 8325 bool set_cond_codes) { 8326 assert_different_registers(sub_klass, super_klass, temp_reg); 8327 if (temp2_reg != noreg) 8328 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 8329 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 8330 8331 Label L_fallthrough; 8332 int label_nulls = 0; 8333 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 8334 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 8335 assert(label_nulls <= 1, "at most one NULL in the batch"); 8336 8337 // a couple of useful fields in sub_klass: 8338 int ss_offset = (klassOopDesc::header_size() * HeapWordSize + 8339 Klass::secondary_supers_offset_in_bytes()); 8340 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 8341 Klass::secondary_super_cache_offset_in_bytes()); 8342 Address secondary_supers_addr(sub_klass, ss_offset); 8343 Address super_cache_addr( sub_klass, sc_offset); 8344 8345 // Do a linear scan of the secondary super-klass chain. 8346 // This code is rarely used, so simplicity is a virtue here. 8347 // The repne_scan instruction uses fixed registers, which we must spill. 8348 // Don't worry too much about pre-existing connections with the input regs. 8349 8350 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 8351 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 8352 8353 // Get super_klass value into rax (even if it was in rdi or rcx). 8354 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 8355 if (super_klass != rax || UseCompressedOops) { 8356 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 8357 mov(rax, super_klass); 8358 } 8359 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 8360 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 8361 8362 #ifndef PRODUCT 8363 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 8364 ExternalAddress pst_counter_addr((address) pst_counter); 8365 NOT_LP64( incrementl(pst_counter_addr) ); 8366 LP64_ONLY( lea(rcx, pst_counter_addr) ); 8367 LP64_ONLY( incrementl(Address(rcx, 0)) ); 8368 #endif //PRODUCT 8369 8370 // We will consult the secondary-super array. 8371 movptr(rdi, secondary_supers_addr); 8372 // Load the array length. (Positive movl does right thing on LP64.) 8373 movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 8374 // Skip to start of data. 8375 addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 8376 8377 // Scan RCX words at [RDI] for an occurrence of RAX. 8378 // Set NZ/Z based on last compare. 8379 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 8380 // not change flags (only scas instruction which is repeated sets flags). 8381 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 8382 #ifdef _LP64 8383 // This part is tricky, as values in supers array could be 32 or 64 bit wide 8384 // and we store values in objArrays always encoded, thus we need to encode 8385 // the value of rax before repne. Note that rax is dead after the repne. 8386 if (UseCompressedOops) { 8387 encode_heap_oop_not_null(rax); // Changes flags. 8388 // The superclass is never null; it would be a basic system error if a null 8389 // pointer were to sneak in here. Note that we have already loaded the 8390 // Klass::super_check_offset from the super_klass in the fast path, 8391 // so if there is a null in that register, we are already in the afterlife. 8392 testl(rax,rax); // Set Z = 0 8393 repne_scanl(); 8394 } else 8395 #endif // _LP64 8396 { 8397 testptr(rax,rax); // Set Z = 0 8398 repne_scan(); 8399 } 8400 // Unspill the temp. registers: 8401 if (pushed_rdi) pop(rdi); 8402 if (pushed_rcx) pop(rcx); 8403 if (pushed_rax) pop(rax); 8404 8405 if (set_cond_codes) { 8406 // Special hack for the AD files: rdi is guaranteed non-zero. 8407 assert(!pushed_rdi, "rdi must be left non-NULL"); 8408 // Also, the condition codes are properly set Z/NZ on succeed/failure. 8409 } 8410 8411 if (L_failure == &L_fallthrough) 8412 jccb(Assembler::notEqual, *L_failure); 8413 else jcc(Assembler::notEqual, *L_failure); 8414 8415 // Success. Cache the super we found and proceed in triumph. 8416 movptr(super_cache_addr, super_klass); 8417 8418 if (L_success != &L_fallthrough) { 8419 jmp(*L_success); 8420 } 8421 8422 #undef IS_A_TEMP 8423 8424 bind(L_fallthrough); 8425 } 8426 8427 8428 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 8429 if (VM_Version::supports_cmov()) { 8430 cmovl(cc, dst, src); 8431 } else { 8432 Label L; 8433 jccb(negate_condition(cc), L); 8434 movl(dst, src); 8435 bind(L); 8436 } 8437 } 8438 8439 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { 8440 if (VM_Version::supports_cmov()) { 8441 cmovl(cc, dst, src); 8442 } else { 8443 Label L; 8444 jccb(negate_condition(cc), L); 8445 movl(dst, src); 8446 bind(L); 8447 } 8448 } 8449 8450 void MacroAssembler::verify_oop(Register reg, const char* s) { 8451 if (!VerifyOops) return; 8452 8453 // Pass register number to verify_oop_subroutine 8454 char* b = new char[strlen(s) + 50]; 8455 sprintf(b, "verify_oop: %s: %s", reg->name(), s); 8456 #ifdef _LP64 8457 push(rscratch1); // save r10, trashed by movptr() 8458 #endif 8459 push(rax); // save rax, 8460 push(reg); // pass register argument 8461 ExternalAddress buffer((address) b); 8462 // avoid using pushptr, as it modifies scratch registers 8463 // and our contract is not to modify anything 8464 movptr(rax, buffer.addr()); 8465 push(rax); 8466 // call indirectly to solve generation ordering problem 8467 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 8468 call(rax); 8469 // Caller pops the arguments (oop, message) and restores rax, r10 8470 } 8471 8472 8473 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 8474 Register tmp, 8475 int offset) { 8476 intptr_t value = *delayed_value_addr; 8477 if (value != 0) 8478 return RegisterOrConstant(value + offset); 8479 8480 // load indirectly to solve generation ordering problem 8481 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 8482 8483 #ifdef ASSERT 8484 { Label L; 8485 testptr(tmp, tmp); 8486 if (WizardMode) { 8487 jcc(Assembler::notZero, L); 8488 char* buf = new char[40]; 8489 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 8490 stop(buf); 8491 } else { 8492 jccb(Assembler::notZero, L); 8493 hlt(); 8494 } 8495 bind(L); 8496 } 8497 #endif 8498 8499 if (offset != 0) 8500 addptr(tmp, offset); 8501 8502 return RegisterOrConstant(tmp); 8503 } 8504 8505 8506 // registers on entry: 8507 // - rax ('check' register): required MethodType 8508 // - rcx: method handle 8509 // - rdx, rsi, or ?: killable temp 8510 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg, 8511 Register temp_reg, 8512 Label& wrong_method_type) { 8513 Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)); 8514 // compare method type against that of the receiver 8515 if (UseCompressedOops) { 8516 load_heap_oop(temp_reg, type_addr); 8517 cmpptr(mtype_reg, temp_reg); 8518 } else { 8519 cmpptr(mtype_reg, type_addr); 8520 } 8521 jcc(Assembler::notEqual, wrong_method_type); 8522 } 8523 8524 8525 // A method handle has a "vmslots" field which gives the size of its 8526 // argument list in JVM stack slots. This field is either located directly 8527 // in every method handle, or else is indirectly accessed through the 8528 // method handle's MethodType. This macro hides the distinction. 8529 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg, 8530 Register temp_reg) { 8531 assert_different_registers(vmslots_reg, mh_reg, temp_reg); 8532 // load mh.type.form.vmslots 8533 Register temp2_reg = vmslots_reg; 8534 load_heap_oop(temp2_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg))); 8535 load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg))); 8536 movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg))); 8537 } 8538 8539 8540 // registers on entry: 8541 // - rcx: method handle 8542 // - rdx: killable temp (interpreted only) 8543 // - rax: killable temp (compiled only) 8544 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) { 8545 assert(mh_reg == rcx, "caller must put MH object in rcx"); 8546 assert_different_registers(mh_reg, temp_reg); 8547 8548 // pick out the interpreted side of the handler 8549 // NOTE: vmentry is not an oop! 8550 movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg))); 8551 8552 // off we go... 8553 jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes())); 8554 8555 // for the various stubs which take control at this point, 8556 // see MethodHandles::generate_method_handle_stub 8557 } 8558 8559 8560 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 8561 int extra_slot_offset) { 8562 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 8563 int stackElementSize = Interpreter::stackElementSize; 8564 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 8565 #ifdef ASSERT 8566 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 8567 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 8568 #endif 8569 Register scale_reg = noreg; 8570 Address::ScaleFactor scale_factor = Address::no_scale; 8571 if (arg_slot.is_constant()) { 8572 offset += arg_slot.as_constant() * stackElementSize; 8573 } else { 8574 scale_reg = arg_slot.as_register(); 8575 scale_factor = Address::times(stackElementSize); 8576 } 8577 offset += wordSize; // return PC is on stack 8578 return Address(rsp, scale_reg, scale_factor, offset); 8579 } 8580 8581 8582 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 8583 if (!VerifyOops) return; 8584 8585 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 8586 // Pass register number to verify_oop_subroutine 8587 char* b = new char[strlen(s) + 50]; 8588 sprintf(b, "verify_oop_addr: %s", s); 8589 8590 #ifdef _LP64 8591 push(rscratch1); // save r10, trashed by movptr() 8592 #endif 8593 push(rax); // save rax, 8594 // addr may contain rsp so we will have to adjust it based on the push 8595 // we just did (and on 64 bit we do two pushes) 8596 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 8597 // stores rax into addr which is backwards of what was intended. 8598 if (addr.uses(rsp)) { 8599 lea(rax, addr); 8600 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); 8601 } else { 8602 pushptr(addr); 8603 } 8604 8605 ExternalAddress buffer((address) b); 8606 // pass msg argument 8607 // avoid using pushptr, as it modifies scratch registers 8608 // and our contract is not to modify anything 8609 movptr(rax, buffer.addr()); 8610 push(rax); 8611 8612 // call indirectly to solve generation ordering problem 8613 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 8614 call(rax); 8615 // Caller pops the arguments (addr, message) and restores rax, r10. 8616 } 8617 8618 void MacroAssembler::verify_tlab() { 8619 #ifdef ASSERT 8620 if (UseTLAB && VerifyOops) { 8621 Label next, ok; 8622 Register t1 = rsi; 8623 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 8624 8625 push(t1); 8626 NOT_LP64(push(thread_reg)); 8627 NOT_LP64(get_thread(thread_reg)); 8628 8629 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8630 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 8631 jcc(Assembler::aboveEqual, next); 8632 stop("assert(top >= start)"); 8633 should_not_reach_here(); 8634 8635 bind(next); 8636 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 8637 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8638 jcc(Assembler::aboveEqual, ok); 8639 stop("assert(top <= end)"); 8640 should_not_reach_here(); 8641 8642 bind(ok); 8643 NOT_LP64(pop(thread_reg)); 8644 pop(t1); 8645 } 8646 #endif 8647 } 8648 8649 class ControlWord { 8650 public: 8651 int32_t _value; 8652 8653 int rounding_control() const { return (_value >> 10) & 3 ; } 8654 int precision_control() const { return (_value >> 8) & 3 ; } 8655 bool precision() const { return ((_value >> 5) & 1) != 0; } 8656 bool underflow() const { return ((_value >> 4) & 1) != 0; } 8657 bool overflow() const { return ((_value >> 3) & 1) != 0; } 8658 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 8659 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 8660 bool invalid() const { return ((_value >> 0) & 1) != 0; } 8661 8662 void print() const { 8663 // rounding control 8664 const char* rc; 8665 switch (rounding_control()) { 8666 case 0: rc = "round near"; break; 8667 case 1: rc = "round down"; break; 8668 case 2: rc = "round up "; break; 8669 case 3: rc = "chop "; break; 8670 }; 8671 // precision control 8672 const char* pc; 8673 switch (precision_control()) { 8674 case 0: pc = "24 bits "; break; 8675 case 1: pc = "reserved"; break; 8676 case 2: pc = "53 bits "; break; 8677 case 3: pc = "64 bits "; break; 8678 }; 8679 // flags 8680 char f[9]; 8681 f[0] = ' '; 8682 f[1] = ' '; 8683 f[2] = (precision ()) ? 'P' : 'p'; 8684 f[3] = (underflow ()) ? 'U' : 'u'; 8685 f[4] = (overflow ()) ? 'O' : 'o'; 8686 f[5] = (zero_divide ()) ? 'Z' : 'z'; 8687 f[6] = (denormalized()) ? 'D' : 'd'; 8688 f[7] = (invalid ()) ? 'I' : 'i'; 8689 f[8] = '\x0'; 8690 // output 8691 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 8692 } 8693 8694 }; 8695 8696 class StatusWord { 8697 public: 8698 int32_t _value; 8699 8700 bool busy() const { return ((_value >> 15) & 1) != 0; } 8701 bool C3() const { return ((_value >> 14) & 1) != 0; } 8702 bool C2() const { return ((_value >> 10) & 1) != 0; } 8703 bool C1() const { return ((_value >> 9) & 1) != 0; } 8704 bool C0() const { return ((_value >> 8) & 1) != 0; } 8705 int top() const { return (_value >> 11) & 7 ; } 8706 bool error_status() const { return ((_value >> 7) & 1) != 0; } 8707 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 8708 bool precision() const { return ((_value >> 5) & 1) != 0; } 8709 bool underflow() const { return ((_value >> 4) & 1) != 0; } 8710 bool overflow() const { return ((_value >> 3) & 1) != 0; } 8711 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 8712 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 8713 bool invalid() const { return ((_value >> 0) & 1) != 0; } 8714 8715 void print() const { 8716 // condition codes 8717 char c[5]; 8718 c[0] = (C3()) ? '3' : '-'; 8719 c[1] = (C2()) ? '2' : '-'; 8720 c[2] = (C1()) ? '1' : '-'; 8721 c[3] = (C0()) ? '0' : '-'; 8722 c[4] = '\x0'; 8723 // flags 8724 char f[9]; 8725 f[0] = (error_status()) ? 'E' : '-'; 8726 f[1] = (stack_fault ()) ? 'S' : '-'; 8727 f[2] = (precision ()) ? 'P' : '-'; 8728 f[3] = (underflow ()) ? 'U' : '-'; 8729 f[4] = (overflow ()) ? 'O' : '-'; 8730 f[5] = (zero_divide ()) ? 'Z' : '-'; 8731 f[6] = (denormalized()) ? 'D' : '-'; 8732 f[7] = (invalid ()) ? 'I' : '-'; 8733 f[8] = '\x0'; 8734 // output 8735 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 8736 } 8737 8738 }; 8739 8740 class TagWord { 8741 public: 8742 int32_t _value; 8743 8744 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 8745 8746 void print() const { 8747 printf("%04x", _value & 0xFFFF); 8748 } 8749 8750 }; 8751 8752 class FPU_Register { 8753 public: 8754 int32_t _m0; 8755 int32_t _m1; 8756 int16_t _ex; 8757 8758 bool is_indefinite() const { 8759 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 8760 } 8761 8762 void print() const { 8763 char sign = (_ex < 0) ? '-' : '+'; 8764 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 8765 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 8766 }; 8767 8768 }; 8769 8770 class FPU_State { 8771 public: 8772 enum { 8773 register_size = 10, 8774 number_of_registers = 8, 8775 register_mask = 7 8776 }; 8777 8778 ControlWord _control_word; 8779 StatusWord _status_word; 8780 TagWord _tag_word; 8781 int32_t _error_offset; 8782 int32_t _error_selector; 8783 int32_t _data_offset; 8784 int32_t _data_selector; 8785 int8_t _register[register_size * number_of_registers]; 8786 8787 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 8788 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 8789 8790 const char* tag_as_string(int tag) const { 8791 switch (tag) { 8792 case 0: return "valid"; 8793 case 1: return "zero"; 8794 case 2: return "special"; 8795 case 3: return "empty"; 8796 } 8797 ShouldNotReachHere(); 8798 return NULL; 8799 } 8800 8801 void print() const { 8802 // print computation registers 8803 { int t = _status_word.top(); 8804 for (int i = 0; i < number_of_registers; i++) { 8805 int j = (i - t) & register_mask; 8806 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 8807 st(j)->print(); 8808 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 8809 } 8810 } 8811 printf("\n"); 8812 // print control registers 8813 printf("ctrl = "); _control_word.print(); printf("\n"); 8814 printf("stat = "); _status_word .print(); printf("\n"); 8815 printf("tags = "); _tag_word .print(); printf("\n"); 8816 } 8817 8818 }; 8819 8820 class Flag_Register { 8821 public: 8822 int32_t _value; 8823 8824 bool overflow() const { return ((_value >> 11) & 1) != 0; } 8825 bool direction() const { return ((_value >> 10) & 1) != 0; } 8826 bool sign() const { return ((_value >> 7) & 1) != 0; } 8827 bool zero() const { return ((_value >> 6) & 1) != 0; } 8828 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 8829 bool parity() const { return ((_value >> 2) & 1) != 0; } 8830 bool carry() const { return ((_value >> 0) & 1) != 0; } 8831 8832 void print() const { 8833 // flags 8834 char f[8]; 8835 f[0] = (overflow ()) ? 'O' : '-'; 8836 f[1] = (direction ()) ? 'D' : '-'; 8837 f[2] = (sign ()) ? 'S' : '-'; 8838 f[3] = (zero ()) ? 'Z' : '-'; 8839 f[4] = (auxiliary_carry()) ? 'A' : '-'; 8840 f[5] = (parity ()) ? 'P' : '-'; 8841 f[6] = (carry ()) ? 'C' : '-'; 8842 f[7] = '\x0'; 8843 // output 8844 printf("%08x flags = %s", _value, f); 8845 } 8846 8847 }; 8848 8849 class IU_Register { 8850 public: 8851 int32_t _value; 8852 8853 void print() const { 8854 printf("%08x %11d", _value, _value); 8855 } 8856 8857 }; 8858 8859 class IU_State { 8860 public: 8861 Flag_Register _eflags; 8862 IU_Register _rdi; 8863 IU_Register _rsi; 8864 IU_Register _rbp; 8865 IU_Register _rsp; 8866 IU_Register _rbx; 8867 IU_Register _rdx; 8868 IU_Register _rcx; 8869 IU_Register _rax; 8870 8871 void print() const { 8872 // computation registers 8873 printf("rax, = "); _rax.print(); printf("\n"); 8874 printf("rbx, = "); _rbx.print(); printf("\n"); 8875 printf("rcx = "); _rcx.print(); printf("\n"); 8876 printf("rdx = "); _rdx.print(); printf("\n"); 8877 printf("rdi = "); _rdi.print(); printf("\n"); 8878 printf("rsi = "); _rsi.print(); printf("\n"); 8879 printf("rbp, = "); _rbp.print(); printf("\n"); 8880 printf("rsp = "); _rsp.print(); printf("\n"); 8881 printf("\n"); 8882 // control registers 8883 printf("flgs = "); _eflags.print(); printf("\n"); 8884 } 8885 }; 8886 8887 8888 class CPU_State { 8889 public: 8890 FPU_State _fpu_state; 8891 IU_State _iu_state; 8892 8893 void print() const { 8894 printf("--------------------------------------------------\n"); 8895 _iu_state .print(); 8896 printf("\n"); 8897 _fpu_state.print(); 8898 printf("--------------------------------------------------\n"); 8899 } 8900 8901 }; 8902 8903 8904 static void _print_CPU_state(CPU_State* state) { 8905 state->print(); 8906 }; 8907 8908 8909 void MacroAssembler::print_CPU_state() { 8910 push_CPU_state(); 8911 push(rsp); // pass CPU state 8912 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 8913 addptr(rsp, wordSize); // discard argument 8914 pop_CPU_state(); 8915 } 8916 8917 8918 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 8919 static int counter = 0; 8920 FPU_State* fs = &state->_fpu_state; 8921 counter++; 8922 // For leaf calls, only verify that the top few elements remain empty. 8923 // We only need 1 empty at the top for C2 code. 8924 if( stack_depth < 0 ) { 8925 if( fs->tag_for_st(7) != 3 ) { 8926 printf("FPR7 not empty\n"); 8927 state->print(); 8928 assert(false, "error"); 8929 return false; 8930 } 8931 return true; // All other stack states do not matter 8932 } 8933 8934 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 8935 "bad FPU control word"); 8936 8937 // compute stack depth 8938 int i = 0; 8939 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 8940 int d = i; 8941 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 8942 // verify findings 8943 if (i != FPU_State::number_of_registers) { 8944 // stack not contiguous 8945 printf("%s: stack not contiguous at ST%d\n", s, i); 8946 state->print(); 8947 assert(false, "error"); 8948 return false; 8949 } 8950 // check if computed stack depth corresponds to expected stack depth 8951 if (stack_depth < 0) { 8952 // expected stack depth is -stack_depth or less 8953 if (d > -stack_depth) { 8954 // too many elements on the stack 8955 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 8956 state->print(); 8957 assert(false, "error"); 8958 return false; 8959 } 8960 } else { 8961 // expected stack depth is stack_depth 8962 if (d != stack_depth) { 8963 // wrong stack depth 8964 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 8965 state->print(); 8966 assert(false, "error"); 8967 return false; 8968 } 8969 } 8970 // everything is cool 8971 return true; 8972 } 8973 8974 8975 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 8976 if (!VerifyFPU) return; 8977 push_CPU_state(); 8978 push(rsp); // pass CPU state 8979 ExternalAddress msg((address) s); 8980 // pass message string s 8981 pushptr(msg.addr()); 8982 push(stack_depth); // pass stack depth 8983 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 8984 addptr(rsp, 3 * wordSize); // discard arguments 8985 // check for error 8986 { Label L; 8987 testl(rax, rax); 8988 jcc(Assembler::notZero, L); 8989 int3(); // break if error condition 8990 bind(L); 8991 } 8992 pop_CPU_state(); 8993 } 8994 8995 void MacroAssembler::load_klass(Register dst, Register src) { 8996 #ifdef _LP64 8997 if (UseCompressedOops) { 8998 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8999 decode_heap_oop_not_null(dst); 9000 } else 9001 #endif 9002 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9003 } 9004 9005 void MacroAssembler::load_prototype_header(Register dst, Register src) { 9006 #ifdef _LP64 9007 if (UseCompressedOops) { 9008 assert (Universe::heap() != NULL, "java heap should be initialized"); 9009 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9010 if (Universe::narrow_oop_shift() != 0) { 9011 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9012 if (LogMinObjAlignmentInBytes == Address::times_8) { 9013 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 9014 } else { 9015 // OK to use shift since we don't need to preserve flags. 9016 shlq(dst, LogMinObjAlignmentInBytes); 9017 movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 9018 } 9019 } else { 9020 movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 9021 } 9022 } else 9023 #endif 9024 { 9025 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9026 movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 9027 } 9028 } 9029 9030 void MacroAssembler::store_klass(Register dst, Register src) { 9031 #ifdef _LP64 9032 if (UseCompressedOops) { 9033 encode_heap_oop_not_null(src); 9034 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 9035 } else 9036 #endif 9037 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 9038 } 9039 9040 void MacroAssembler::load_heap_oop(Register dst, Address src) { 9041 #ifdef _LP64 9042 if (UseCompressedOops) { 9043 movl(dst, src); 9044 decode_heap_oop(dst); 9045 } else 9046 #endif 9047 movptr(dst, src); 9048 } 9049 9050 // Doesn't do verfication, generates fixed size code 9051 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { 9052 #ifdef _LP64 9053 if (UseCompressedOops) { 9054 movl(dst, src); 9055 decode_heap_oop_not_null(dst); 9056 } else 9057 #endif 9058 movptr(dst, src); 9059 } 9060 9061 void MacroAssembler::store_heap_oop(Address dst, Register src) { 9062 #ifdef _LP64 9063 if (UseCompressedOops) { 9064 assert(!dst.uses(src), "not enough registers"); 9065 encode_heap_oop(src); 9066 movl(dst, src); 9067 } else 9068 #endif 9069 movptr(dst, src); 9070 } 9071 9072 // Used for storing NULLs. 9073 void MacroAssembler::store_heap_oop_null(Address dst) { 9074 #ifdef _LP64 9075 if (UseCompressedOops) { 9076 movl(dst, (int32_t)NULL_WORD); 9077 } else { 9078 movslq(dst, (int32_t)NULL_WORD); 9079 } 9080 #else 9081 movl(dst, (int32_t)NULL_WORD); 9082 #endif 9083 } 9084 9085 #ifdef _LP64 9086 void MacroAssembler::store_klass_gap(Register dst, Register src) { 9087 if (UseCompressedOops) { 9088 // Store to klass gap in destination 9089 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 9090 } 9091 } 9092 9093 #ifdef ASSERT 9094 void MacroAssembler::verify_heapbase(const char* msg) { 9095 assert (UseCompressedOops, "should be compressed"); 9096 assert (Universe::heap() != NULL, "java heap should be initialized"); 9097 if (CheckCompressedOops) { 9098 Label ok; 9099 push(rscratch1); // cmpptr trashes rscratch1 9100 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 9101 jcc(Assembler::equal, ok); 9102 stop(msg); 9103 bind(ok); 9104 pop(rscratch1); 9105 } 9106 } 9107 #endif 9108 9109 // Algorithm must match oop.inline.hpp encode_heap_oop. 9110 void MacroAssembler::encode_heap_oop(Register r) { 9111 #ifdef ASSERT 9112 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 9113 #endif 9114 verify_oop(r, "broken oop in encode_heap_oop"); 9115 if (Universe::narrow_oop_base() == NULL) { 9116 if (Universe::narrow_oop_shift() != 0) { 9117 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9118 shrq(r, LogMinObjAlignmentInBytes); 9119 } 9120 return; 9121 } 9122 testq(r, r); 9123 cmovq(Assembler::equal, r, r12_heapbase); 9124 subq(r, r12_heapbase); 9125 shrq(r, LogMinObjAlignmentInBytes); 9126 } 9127 9128 void MacroAssembler::encode_heap_oop_not_null(Register r) { 9129 #ifdef ASSERT 9130 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 9131 if (CheckCompressedOops) { 9132 Label ok; 9133 testq(r, r); 9134 jcc(Assembler::notEqual, ok); 9135 stop("null oop passed to encode_heap_oop_not_null"); 9136 bind(ok); 9137 } 9138 #endif 9139 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 9140 if (Universe::narrow_oop_base() != NULL) { 9141 subq(r, r12_heapbase); 9142 } 9143 if (Universe::narrow_oop_shift() != 0) { 9144 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9145 shrq(r, LogMinObjAlignmentInBytes); 9146 } 9147 } 9148 9149 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 9150 #ifdef ASSERT 9151 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 9152 if (CheckCompressedOops) { 9153 Label ok; 9154 testq(src, src); 9155 jcc(Assembler::notEqual, ok); 9156 stop("null oop passed to encode_heap_oop_not_null2"); 9157 bind(ok); 9158 } 9159 #endif 9160 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 9161 if (dst != src) { 9162 movq(dst, src); 9163 } 9164 if (Universe::narrow_oop_base() != NULL) { 9165 subq(dst, r12_heapbase); 9166 } 9167 if (Universe::narrow_oop_shift() != 0) { 9168 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9169 shrq(dst, LogMinObjAlignmentInBytes); 9170 } 9171 } 9172 9173 void MacroAssembler::decode_heap_oop(Register r) { 9174 #ifdef ASSERT 9175 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 9176 #endif 9177 if (Universe::narrow_oop_base() == NULL) { 9178 if (Universe::narrow_oop_shift() != 0) { 9179 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9180 shlq(r, LogMinObjAlignmentInBytes); 9181 } 9182 } else { 9183 Label done; 9184 shlq(r, LogMinObjAlignmentInBytes); 9185 jccb(Assembler::equal, done); 9186 addq(r, r12_heapbase); 9187 bind(done); 9188 } 9189 verify_oop(r, "broken oop in decode_heap_oop"); 9190 } 9191 9192 void MacroAssembler::decode_heap_oop_not_null(Register r) { 9193 // Note: it will change flags 9194 assert (UseCompressedOops, "should only be used for compressed headers"); 9195 assert (Universe::heap() != NULL, "java heap should be initialized"); 9196 // Cannot assert, unverified entry point counts instructions (see .ad file) 9197 // vtableStubs also counts instructions in pd_code_size_limit. 9198 // Also do not verify_oop as this is called by verify_oop. 9199 if (Universe::narrow_oop_shift() != 0) { 9200 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9201 shlq(r, LogMinObjAlignmentInBytes); 9202 if (Universe::narrow_oop_base() != NULL) { 9203 addq(r, r12_heapbase); 9204 } 9205 } else { 9206 assert (Universe::narrow_oop_base() == NULL, "sanity"); 9207 } 9208 } 9209 9210 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 9211 // Note: it will change flags 9212 assert (UseCompressedOops, "should only be used for compressed headers"); 9213 assert (Universe::heap() != NULL, "java heap should be initialized"); 9214 // Cannot assert, unverified entry point counts instructions (see .ad file) 9215 // vtableStubs also counts instructions in pd_code_size_limit. 9216 // Also do not verify_oop as this is called by verify_oop. 9217 if (Universe::narrow_oop_shift() != 0) { 9218 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9219 if (LogMinObjAlignmentInBytes == Address::times_8) { 9220 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 9221 } else { 9222 if (dst != src) { 9223 movq(dst, src); 9224 } 9225 shlq(dst, LogMinObjAlignmentInBytes); 9226 if (Universe::narrow_oop_base() != NULL) { 9227 addq(dst, r12_heapbase); 9228 } 9229 } 9230 } else { 9231 assert (Universe::narrow_oop_base() == NULL, "sanity"); 9232 if (dst != src) { 9233 movq(dst, src); 9234 } 9235 } 9236 } 9237 9238 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 9239 assert (UseCompressedOops, "should only be used for compressed headers"); 9240 assert (Universe::heap() != NULL, "java heap should be initialized"); 9241 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 9242 int oop_index = oop_recorder()->find_index(obj); 9243 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9244 mov_narrow_oop(dst, oop_index, rspec); 9245 } 9246 9247 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 9248 assert (UseCompressedOops, "should only be used for compressed headers"); 9249 assert (Universe::heap() != NULL, "java heap should be initialized"); 9250 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 9251 int oop_index = oop_recorder()->find_index(obj); 9252 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9253 mov_narrow_oop(dst, oop_index, rspec); 9254 } 9255 9256 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 9257 assert (UseCompressedOops, "should only be used for compressed headers"); 9258 assert (Universe::heap() != NULL, "java heap should be initialized"); 9259 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 9260 int oop_index = oop_recorder()->find_index(obj); 9261 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9262 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 9263 } 9264 9265 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 9266 assert (UseCompressedOops, "should only be used for compressed headers"); 9267 assert (Universe::heap() != NULL, "java heap should be initialized"); 9268 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 9269 int oop_index = oop_recorder()->find_index(obj); 9270 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9271 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 9272 } 9273 9274 void MacroAssembler::reinit_heapbase() { 9275 if (UseCompressedOops) { 9276 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 9277 } 9278 } 9279 #endif // _LP64 9280 9281 // IndexOf for constant substrings with size >= 8 chars 9282 // which don't need to be loaded through stack. 9283 void MacroAssembler::string_indexofC8(Register str1, Register str2, 9284 Register cnt1, Register cnt2, 9285 int int_cnt2, Register result, 9286 XMMRegister vec, Register tmp) { 9287 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 9288 9289 // This method uses pcmpestri inxtruction with bound registers 9290 // inputs: 9291 // xmm - substring 9292 // rax - substring length (elements count) 9293 // mem - scanned string 9294 // rdx - string length (elements count) 9295 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 9296 // outputs: 9297 // rcx - matched index in string 9298 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 9299 9300 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, 9301 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, 9302 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; 9303 9304 // Note, inline_string_indexOf() generates checks: 9305 // if (substr.count > string.count) return -1; 9306 // if (substr.count == 0) return 0; 9307 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); 9308 9309 // Load substring. 9310 movdqu(vec, Address(str2, 0)); 9311 movl(cnt2, int_cnt2); 9312 movptr(result, str1); // string addr 9313 9314 if (int_cnt2 > 8) { 9315 jmpb(SCAN_TO_SUBSTR); 9316 9317 // Reload substr for rescan, this code 9318 // is executed only for large substrings (> 8 chars) 9319 bind(RELOAD_SUBSTR); 9320 movdqu(vec, Address(str2, 0)); 9321 negptr(cnt2); // Jumped here with negative cnt2, convert to positive 9322 9323 bind(RELOAD_STR); 9324 // We came here after the beginning of the substring was 9325 // matched but the rest of it was not so we need to search 9326 // again. Start from the next element after the previous match. 9327 9328 // cnt2 is number of substring reminding elements and 9329 // cnt1 is number of string reminding elements when cmp failed. 9330 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 9331 subl(cnt1, cnt2); 9332 addl(cnt1, int_cnt2); 9333 movl(cnt2, int_cnt2); // Now restore cnt2 9334 9335 decrementl(cnt1); // Shift to next element 9336 cmpl(cnt1, cnt2); 9337 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 9338 9339 addptr(result, 2); 9340 9341 } // (int_cnt2 > 8) 9342 9343 // Scan string for start of substr in 16-byte vectors 9344 bind(SCAN_TO_SUBSTR); 9345 pcmpestri(vec, Address(result, 0), 0x0d); 9346 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 9347 subl(cnt1, 8); 9348 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 9349 cmpl(cnt1, cnt2); 9350 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 9351 addptr(result, 16); 9352 jmpb(SCAN_TO_SUBSTR); 9353 9354 // Found a potential substr 9355 bind(FOUND_CANDIDATE); 9356 // Matched whole vector if first element matched (tmp(rcx) == 0). 9357 if (int_cnt2 == 8) { 9358 jccb(Assembler::overflow, RET_FOUND); // OF == 1 9359 } else { // int_cnt2 > 8 9360 jccb(Assembler::overflow, FOUND_SUBSTR); 9361 } 9362 // After pcmpestri tmp(rcx) contains matched element index 9363 // Compute start addr of substr 9364 lea(result, Address(result, tmp, Address::times_2)); 9365 9366 // Make sure string is still long enough 9367 subl(cnt1, tmp); 9368 cmpl(cnt1, cnt2); 9369 if (int_cnt2 == 8) { 9370 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 9371 } else { // int_cnt2 > 8 9372 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); 9373 } 9374 // Left less then substring. 9375 9376 bind(RET_NOT_FOUND); 9377 movl(result, -1); 9378 jmpb(EXIT); 9379 9380 if (int_cnt2 > 8) { 9381 // This code is optimized for the case when whole substring 9382 // is matched if its head is matched. 9383 bind(MATCH_SUBSTR_HEAD); 9384 pcmpestri(vec, Address(result, 0), 0x0d); 9385 // Reload only string if does not match 9386 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 9387 9388 Label CONT_SCAN_SUBSTR; 9389 // Compare the rest of substring (> 8 chars). 9390 bind(FOUND_SUBSTR); 9391 // First 8 chars are already matched. 9392 negptr(cnt2); 9393 addptr(cnt2, 8); 9394 9395 bind(SCAN_SUBSTR); 9396 subl(cnt1, 8); 9397 cmpl(cnt2, -8); // Do not read beyond substring 9398 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); 9399 // Back-up strings to avoid reading beyond substring: 9400 // cnt1 = cnt1 - cnt2 + 8 9401 addl(cnt1, cnt2); // cnt2 is negative 9402 addl(cnt1, 8); 9403 movl(cnt2, 8); negptr(cnt2); 9404 bind(CONT_SCAN_SUBSTR); 9405 if (int_cnt2 < (int)G) { 9406 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); 9407 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); 9408 } else { 9409 // calculate index in register to avoid integer overflow (int_cnt2*2) 9410 movl(tmp, int_cnt2); 9411 addptr(tmp, cnt2); 9412 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); 9413 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 9414 } 9415 // Need to reload strings pointers if not matched whole vector 9416 jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 9417 addptr(cnt2, 8); 9418 jccb(Assembler::negative, SCAN_SUBSTR); 9419 // Fall through if found full substring 9420 9421 } // (int_cnt2 > 8) 9422 9423 bind(RET_FOUND); 9424 // Found result if we matched full small substring. 9425 // Compute substr offset 9426 subptr(result, str1); 9427 shrl(result, 1); // index 9428 bind(EXIT); 9429 9430 } // string_indexofC8 9431 9432 // Small strings are loaded through stack if they cross page boundary. 9433 void MacroAssembler::string_indexof(Register str1, Register str2, 9434 Register cnt1, Register cnt2, 9435 int int_cnt2, Register result, 9436 XMMRegister vec, Register tmp) { 9437 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 9438 // 9439 // int_cnt2 is length of small (< 8 chars) constant substring 9440 // or (-1) for non constant substring in which case its length 9441 // is in cnt2 register. 9442 // 9443 // Note, inline_string_indexOf() generates checks: 9444 // if (substr.count > string.count) return -1; 9445 // if (substr.count == 0) return 0; 9446 // 9447 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); 9448 9449 // This method uses pcmpestri inxtruction with bound registers 9450 // inputs: 9451 // xmm - substring 9452 // rax - substring length (elements count) 9453 // mem - scanned string 9454 // rdx - string length (elements count) 9455 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 9456 // outputs: 9457 // rcx - matched index in string 9458 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 9459 9460 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, 9461 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, 9462 FOUND_CANDIDATE; 9463 9464 { //======================================================== 9465 // We don't know where these strings are located 9466 // and we can't read beyond them. Load them through stack. 9467 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; 9468 9469 movptr(tmp, rsp); // save old SP 9470 9471 if (int_cnt2 > 0) { // small (< 8 chars) constant substring 9472 if (int_cnt2 == 1) { // One char 9473 load_unsigned_short(result, Address(str2, 0)); 9474 movdl(vec, result); // move 32 bits 9475 } else if (int_cnt2 == 2) { // Two chars 9476 movdl(vec, Address(str2, 0)); // move 32 bits 9477 } else if (int_cnt2 == 4) { // Four chars 9478 movq(vec, Address(str2, 0)); // move 64 bits 9479 } else { // cnt2 = { 3, 5, 6, 7 } 9480 // Array header size is 12 bytes in 32-bit VM 9481 // + 6 bytes for 3 chars == 18 bytes, 9482 // enough space to load vec and shift. 9483 assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity"); 9484 movdqu(vec, Address(str2, (int_cnt2*2)-16)); 9485 psrldq(vec, 16-(int_cnt2*2)); 9486 } 9487 } else { // not constant substring 9488 cmpl(cnt2, 8); 9489 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough 9490 9491 // We can read beyond string if srt+16 does not cross page boundary 9492 // since heaps are aligned and mapped by pages. 9493 assert(os::vm_page_size() < (int)G, "default page should be small"); 9494 movl(result, str2); // We need only low 32 bits 9495 andl(result, (os::vm_page_size()-1)); 9496 cmpl(result, (os::vm_page_size()-16)); 9497 jccb(Assembler::belowEqual, CHECK_STR); 9498 9499 // Move small strings to stack to allow load 16 bytes into vec. 9500 subptr(rsp, 16); 9501 int stk_offset = wordSize-2; 9502 push(cnt2); 9503 9504 bind(COPY_SUBSTR); 9505 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); 9506 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 9507 decrement(cnt2); 9508 jccb(Assembler::notZero, COPY_SUBSTR); 9509 9510 pop(cnt2); 9511 movptr(str2, rsp); // New substring address 9512 } // non constant 9513 9514 bind(CHECK_STR); 9515 cmpl(cnt1, 8); 9516 jccb(Assembler::aboveEqual, BIG_STRINGS); 9517 9518 // Check cross page boundary. 9519 movl(result, str1); // We need only low 32 bits 9520 andl(result, (os::vm_page_size()-1)); 9521 cmpl(result, (os::vm_page_size()-16)); 9522 jccb(Assembler::belowEqual, BIG_STRINGS); 9523 9524 subptr(rsp, 16); 9525 int stk_offset = -2; 9526 if (int_cnt2 < 0) { // not constant 9527 push(cnt2); 9528 stk_offset += wordSize; 9529 } 9530 movl(cnt2, cnt1); 9531 9532 bind(COPY_STR); 9533 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); 9534 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 9535 decrement(cnt2); 9536 jccb(Assembler::notZero, COPY_STR); 9537 9538 if (int_cnt2 < 0) { // not constant 9539 pop(cnt2); 9540 } 9541 movptr(str1, rsp); // New string address 9542 9543 bind(BIG_STRINGS); 9544 // Load substring. 9545 if (int_cnt2 < 0) { // -1 9546 movdqu(vec, Address(str2, 0)); 9547 push(cnt2); // substr count 9548 push(str2); // substr addr 9549 push(str1); // string addr 9550 } else { 9551 // Small (< 8 chars) constant substrings are loaded already. 9552 movl(cnt2, int_cnt2); 9553 } 9554 push(tmp); // original SP 9555 9556 } // Finished loading 9557 9558 //======================================================== 9559 // Start search 9560 // 9561 9562 movptr(result, str1); // string addr 9563 9564 if (int_cnt2 < 0) { // Only for non constant substring 9565 jmpb(SCAN_TO_SUBSTR); 9566 9567 // SP saved at sp+0 9568 // String saved at sp+1*wordSize 9569 // Substr saved at sp+2*wordSize 9570 // Substr count saved at sp+3*wordSize 9571 9572 // Reload substr for rescan, this code 9573 // is executed only for large substrings (> 8 chars) 9574 bind(RELOAD_SUBSTR); 9575 movptr(str2, Address(rsp, 2*wordSize)); 9576 movl(cnt2, Address(rsp, 3*wordSize)); 9577 movdqu(vec, Address(str2, 0)); 9578 // We came here after the beginning of the substring was 9579 // matched but the rest of it was not so we need to search 9580 // again. Start from the next element after the previous match. 9581 subptr(str1, result); // Restore counter 9582 shrl(str1, 1); 9583 addl(cnt1, str1); 9584 decrementl(cnt1); // Shift to next element 9585 cmpl(cnt1, cnt2); 9586 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 9587 9588 addptr(result, 2); 9589 } // non constant 9590 9591 // Scan string for start of substr in 16-byte vectors 9592 bind(SCAN_TO_SUBSTR); 9593 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 9594 pcmpestri(vec, Address(result, 0), 0x0d); 9595 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 9596 subl(cnt1, 8); 9597 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 9598 cmpl(cnt1, cnt2); 9599 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 9600 addptr(result, 16); 9601 9602 bind(ADJUST_STR); 9603 cmpl(cnt1, 8); // Do not read beyond string 9604 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 9605 // Back-up string to avoid reading beyond string. 9606 lea(result, Address(result, cnt1, Address::times_2, -16)); 9607 movl(cnt1, 8); 9608 jmpb(SCAN_TO_SUBSTR); 9609 9610 // Found a potential substr 9611 bind(FOUND_CANDIDATE); 9612 // After pcmpestri tmp(rcx) contains matched element index 9613 9614 // Make sure string is still long enough 9615 subl(cnt1, tmp); 9616 cmpl(cnt1, cnt2); 9617 jccb(Assembler::greaterEqual, FOUND_SUBSTR); 9618 // Left less then substring. 9619 9620 bind(RET_NOT_FOUND); 9621 movl(result, -1); 9622 jmpb(CLEANUP); 9623 9624 bind(FOUND_SUBSTR); 9625 // Compute start addr of substr 9626 lea(result, Address(result, tmp, Address::times_2)); 9627 9628 if (int_cnt2 > 0) { // Constant substring 9629 // Repeat search for small substring (< 8 chars) 9630 // from new point without reloading substring. 9631 // Have to check that we don't read beyond string. 9632 cmpl(tmp, 8-int_cnt2); 9633 jccb(Assembler::greater, ADJUST_STR); 9634 // Fall through if matched whole substring. 9635 } else { // non constant 9636 assert(int_cnt2 == -1, "should be != 0"); 9637 9638 addl(tmp, cnt2); 9639 // Found result if we matched whole substring. 9640 cmpl(tmp, 8); 9641 jccb(Assembler::lessEqual, RET_FOUND); 9642 9643 // Repeat search for small substring (<= 8 chars) 9644 // from new point 'str1' without reloading substring. 9645 cmpl(cnt2, 8); 9646 // Have to check that we don't read beyond string. 9647 jccb(Assembler::lessEqual, ADJUST_STR); 9648 9649 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; 9650 // Compare the rest of substring (> 8 chars). 9651 movptr(str1, result); 9652 9653 cmpl(tmp, cnt2); 9654 // First 8 chars are already matched. 9655 jccb(Assembler::equal, CHECK_NEXT); 9656 9657 bind(SCAN_SUBSTR); 9658 pcmpestri(vec, Address(str1, 0), 0x0d); 9659 // Need to reload strings pointers if not matched whole vector 9660 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 9661 9662 bind(CHECK_NEXT); 9663 subl(cnt2, 8); 9664 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring 9665 addptr(str1, 16); 9666 addptr(str2, 16); 9667 subl(cnt1, 8); 9668 cmpl(cnt2, 8); // Do not read beyond substring 9669 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); 9670 // Back-up strings to avoid reading beyond substring. 9671 lea(str2, Address(str2, cnt2, Address::times_2, -16)); 9672 lea(str1, Address(str1, cnt2, Address::times_2, -16)); 9673 subl(cnt1, cnt2); 9674 movl(cnt2, 8); 9675 addl(cnt1, 8); 9676 bind(CONT_SCAN_SUBSTR); 9677 movdqu(vec, Address(str2, 0)); 9678 jmpb(SCAN_SUBSTR); 9679 9680 bind(RET_FOUND_LONG); 9681 movptr(str1, Address(rsp, wordSize)); 9682 } // non constant 9683 9684 bind(RET_FOUND); 9685 // Compute substr offset 9686 subptr(result, str1); 9687 shrl(result, 1); // index 9688 9689 bind(CLEANUP); 9690 pop(rsp); // restore SP 9691 9692 } // string_indexof 9693 9694 // Compare strings. 9695 void MacroAssembler::string_compare(Register str1, Register str2, 9696 Register cnt1, Register cnt2, Register result, 9697 XMMRegister vec1) { 9698 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 9699 9700 // Compute the minimum of the string lengths and the 9701 // difference of the string lengths (stack). 9702 // Do the conditional move stuff 9703 movl(result, cnt1); 9704 subl(cnt1, cnt2); 9705 push(cnt1); 9706 cmov32(Assembler::lessEqual, cnt2, result); 9707 9708 // Is the minimum length zero? 9709 testl(cnt2, cnt2); 9710 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 9711 9712 // Load first characters 9713 load_unsigned_short(result, Address(str1, 0)); 9714 load_unsigned_short(cnt1, Address(str2, 0)); 9715 9716 // Compare first characters 9717 subl(result, cnt1); 9718 jcc(Assembler::notZero, POP_LABEL); 9719 decrementl(cnt2); 9720 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 9721 9722 { 9723 // Check after comparing first character to see if strings are equivalent 9724 Label LSkip2; 9725 // Check if the strings start at same location 9726 cmpptr(str1, str2); 9727 jccb(Assembler::notEqual, LSkip2); 9728 9729 // Check if the length difference is zero (from stack) 9730 cmpl(Address(rsp, 0), 0x0); 9731 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 9732 9733 // Strings might not be equivalent 9734 bind(LSkip2); 9735 } 9736 9737 Address::ScaleFactor scale = Address::times_2; 9738 int stride = 8; 9739 9740 // Advance to next element 9741 addptr(str1, 16/stride); 9742 addptr(str2, 16/stride); 9743 9744 if (UseSSE42Intrinsics) { 9745 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 9746 int pcmpmask = 0x19; 9747 // Setup to compare 16-byte vectors 9748 movl(result, cnt2); 9749 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 9750 jccb(Assembler::zero, COMPARE_TAIL); 9751 9752 lea(str1, Address(str1, result, scale)); 9753 lea(str2, Address(str2, result, scale)); 9754 negptr(result); 9755 9756 // pcmpestri 9757 // inputs: 9758 // vec1- substring 9759 // rax - negative string length (elements count) 9760 // mem - scaned string 9761 // rdx - string length (elements count) 9762 // pcmpmask - cmp mode: 11000 (string compare with negated result) 9763 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 9764 // outputs: 9765 // rcx - first mismatched element index 9766 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 9767 9768 bind(COMPARE_WIDE_VECTORS); 9769 movdqu(vec1, Address(str1, result, scale)); 9770 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 9771 // After pcmpestri cnt1(rcx) contains mismatched element index 9772 9773 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 9774 addptr(result, stride); 9775 subptr(cnt2, stride); 9776 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); 9777 9778 // compare wide vectors tail 9779 testl(result, result); 9780 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 9781 9782 movl(cnt2, stride); 9783 movl(result, stride); 9784 negptr(result); 9785 movdqu(vec1, Address(str1, result, scale)); 9786 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 9787 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); 9788 9789 // Mismatched characters in the vectors 9790 bind(VECTOR_NOT_EQUAL); 9791 addptr(result, cnt1); 9792 movptr(cnt2, result); 9793 load_unsigned_short(result, Address(str1, cnt2, scale)); 9794 load_unsigned_short(cnt1, Address(str2, cnt2, scale)); 9795 subl(result, cnt1); 9796 jmpb(POP_LABEL); 9797 9798 bind(COMPARE_TAIL); // limit is zero 9799 movl(cnt2, result); 9800 // Fallthru to tail compare 9801 } 9802 9803 // Shift str2 and str1 to the end of the arrays, negate min 9804 lea(str1, Address(str1, cnt2, scale, 0)); 9805 lea(str2, Address(str2, cnt2, scale, 0)); 9806 negptr(cnt2); 9807 9808 // Compare the rest of the elements 9809 bind(WHILE_HEAD_LABEL); 9810 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); 9811 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); 9812 subl(result, cnt1); 9813 jccb(Assembler::notZero, POP_LABEL); 9814 increment(cnt2); 9815 jccb(Assembler::notZero, WHILE_HEAD_LABEL); 9816 9817 // Strings are equal up to min length. Return the length difference. 9818 bind(LENGTH_DIFF_LABEL); 9819 pop(result); 9820 jmpb(DONE_LABEL); 9821 9822 // Discard the stored length difference 9823 bind(POP_LABEL); 9824 pop(cnt1); 9825 9826 // That's it 9827 bind(DONE_LABEL); 9828 } 9829 9830 // Compare char[] arrays aligned to 4 bytes or substrings. 9831 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 9832 Register limit, Register result, Register chr, 9833 XMMRegister vec1, XMMRegister vec2) { 9834 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 9835 9836 int length_offset = arrayOopDesc::length_offset_in_bytes(); 9837 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 9838 9839 // Check the input args 9840 cmpptr(ary1, ary2); 9841 jcc(Assembler::equal, TRUE_LABEL); 9842 9843 if (is_array_equ) { 9844 // Need additional checks for arrays_equals. 9845 testptr(ary1, ary1); 9846 jcc(Assembler::zero, FALSE_LABEL); 9847 testptr(ary2, ary2); 9848 jcc(Assembler::zero, FALSE_LABEL); 9849 9850 // Check the lengths 9851 movl(limit, Address(ary1, length_offset)); 9852 cmpl(limit, Address(ary2, length_offset)); 9853 jcc(Assembler::notEqual, FALSE_LABEL); 9854 } 9855 9856 // count == 0 9857 testl(limit, limit); 9858 jcc(Assembler::zero, TRUE_LABEL); 9859 9860 if (is_array_equ) { 9861 // Load array address 9862 lea(ary1, Address(ary1, base_offset)); 9863 lea(ary2, Address(ary2, base_offset)); 9864 } 9865 9866 shll(limit, 1); // byte count != 0 9867 movl(result, limit); // copy 9868 9869 if (UseSSE42Intrinsics) { 9870 // With SSE4.2, use double quad vector compare 9871 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 9872 9873 // Compare 16-byte vectors 9874 andl(result, 0x0000000e); // tail count (in bytes) 9875 andl(limit, 0xfffffff0); // vector count (in bytes) 9876 jccb(Assembler::zero, COMPARE_TAIL); 9877 9878 lea(ary1, Address(ary1, limit, Address::times_1)); 9879 lea(ary2, Address(ary2, limit, Address::times_1)); 9880 negptr(limit); 9881 9882 bind(COMPARE_WIDE_VECTORS); 9883 movdqu(vec1, Address(ary1, limit, Address::times_1)); 9884 movdqu(vec2, Address(ary2, limit, Address::times_1)); 9885 pxor(vec1, vec2); 9886 9887 ptest(vec1, vec1); 9888 jccb(Assembler::notZero, FALSE_LABEL); 9889 addptr(limit, 16); 9890 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 9891 9892 testl(result, result); 9893 jccb(Assembler::zero, TRUE_LABEL); 9894 9895 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); 9896 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); 9897 pxor(vec1, vec2); 9898 9899 ptest(vec1, vec1); 9900 jccb(Assembler::notZero, FALSE_LABEL); 9901 jmpb(TRUE_LABEL); 9902 9903 bind(COMPARE_TAIL); // limit is zero 9904 movl(limit, result); 9905 // Fallthru to tail compare 9906 } 9907 9908 // Compare 4-byte vectors 9909 andl(limit, 0xfffffffc); // vector count (in bytes) 9910 jccb(Assembler::zero, COMPARE_CHAR); 9911 9912 lea(ary1, Address(ary1, limit, Address::times_1)); 9913 lea(ary2, Address(ary2, limit, Address::times_1)); 9914 negptr(limit); 9915 9916 bind(COMPARE_VECTORS); 9917 movl(chr, Address(ary1, limit, Address::times_1)); 9918 cmpl(chr, Address(ary2, limit, Address::times_1)); 9919 jccb(Assembler::notEqual, FALSE_LABEL); 9920 addptr(limit, 4); 9921 jcc(Assembler::notZero, COMPARE_VECTORS); 9922 9923 // Compare trailing char (final 2 bytes), if any 9924 bind(COMPARE_CHAR); 9925 testl(result, 0x2); // tail char 9926 jccb(Assembler::zero, TRUE_LABEL); 9927 load_unsigned_short(chr, Address(ary1, 0)); 9928 load_unsigned_short(limit, Address(ary2, 0)); 9929 cmpl(chr, limit); 9930 jccb(Assembler::notEqual, FALSE_LABEL); 9931 9932 bind(TRUE_LABEL); 9933 movl(result, 1); // return true 9934 jmpb(DONE); 9935 9936 bind(FALSE_LABEL); 9937 xorl(result, result); // return false 9938 9939 // That's it 9940 bind(DONE); 9941 } 9942 9943 #ifdef PRODUCT 9944 #define BLOCK_COMMENT(str) /* nothing */ 9945 #else 9946 #define BLOCK_COMMENT(str) block_comment(str) 9947 #endif 9948 9949 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 9950 void MacroAssembler::generate_fill(BasicType t, bool aligned, 9951 Register to, Register value, Register count, 9952 Register rtmp, XMMRegister xtmp) { 9953 assert_different_registers(to, value, count, rtmp); 9954 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 9955 Label L_fill_2_bytes, L_fill_4_bytes; 9956 9957 int shift = -1; 9958 switch (t) { 9959 case T_BYTE: 9960 shift = 2; 9961 break; 9962 case T_SHORT: 9963 shift = 1; 9964 break; 9965 case T_INT: 9966 shift = 0; 9967 break; 9968 default: ShouldNotReachHere(); 9969 } 9970 9971 if (t == T_BYTE) { 9972 andl(value, 0xff); 9973 movl(rtmp, value); 9974 shll(rtmp, 8); 9975 orl(value, rtmp); 9976 } 9977 if (t == T_SHORT) { 9978 andl(value, 0xffff); 9979 } 9980 if (t == T_BYTE || t == T_SHORT) { 9981 movl(rtmp, value); 9982 shll(rtmp, 16); 9983 orl(value, rtmp); 9984 } 9985 9986 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 9987 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 9988 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 9989 // align source address at 4 bytes address boundary 9990 if (t == T_BYTE) { 9991 // One byte misalignment happens only for byte arrays 9992 testptr(to, 1); 9993 jccb(Assembler::zero, L_skip_align1); 9994 movb(Address(to, 0), value); 9995 increment(to); 9996 decrement(count); 9997 BIND(L_skip_align1); 9998 } 9999 // Two bytes misalignment happens only for byte and short (char) arrays 10000 testptr(to, 2); 10001 jccb(Assembler::zero, L_skip_align2); 10002 movw(Address(to, 0), value); 10003 addptr(to, 2); 10004 subl(count, 1<<(shift-1)); 10005 BIND(L_skip_align2); 10006 } 10007 if (UseSSE < 2) { 10008 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 10009 // Fill 32-byte chunks 10010 subl(count, 8 << shift); 10011 jcc(Assembler::less, L_check_fill_8_bytes); 10012 align(16); 10013 10014 BIND(L_fill_32_bytes_loop); 10015 10016 for (int i = 0; i < 32; i += 4) { 10017 movl(Address(to, i), value); 10018 } 10019 10020 addptr(to, 32); 10021 subl(count, 8 << shift); 10022 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 10023 BIND(L_check_fill_8_bytes); 10024 addl(count, 8 << shift); 10025 jccb(Assembler::zero, L_exit); 10026 jmpb(L_fill_8_bytes); 10027 10028 // 10029 // length is too short, just fill qwords 10030 // 10031 BIND(L_fill_8_bytes_loop); 10032 movl(Address(to, 0), value); 10033 movl(Address(to, 4), value); 10034 addptr(to, 8); 10035 BIND(L_fill_8_bytes); 10036 subl(count, 1 << (shift + 1)); 10037 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 10038 // fall through to fill 4 bytes 10039 } else { 10040 Label L_fill_32_bytes; 10041 if (!UseUnalignedLoadStores) { 10042 // align to 8 bytes, we know we are 4 byte aligned to start 10043 testptr(to, 4); 10044 jccb(Assembler::zero, L_fill_32_bytes); 10045 movl(Address(to, 0), value); 10046 addptr(to, 4); 10047 subl(count, 1<<shift); 10048 } 10049 BIND(L_fill_32_bytes); 10050 { 10051 assert( UseSSE >= 2, "supported cpu only" ); 10052 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 10053 // Fill 32-byte chunks 10054 movdl(xtmp, value); 10055 pshufd(xtmp, xtmp, 0); 10056 10057 subl(count, 8 << shift); 10058 jcc(Assembler::less, L_check_fill_8_bytes); 10059 align(16); 10060 10061 BIND(L_fill_32_bytes_loop); 10062 10063 if (UseUnalignedLoadStores) { 10064 movdqu(Address(to, 0), xtmp); 10065 movdqu(Address(to, 16), xtmp); 10066 } else { 10067 movq(Address(to, 0), xtmp); 10068 movq(Address(to, 8), xtmp); 10069 movq(Address(to, 16), xtmp); 10070 movq(Address(to, 24), xtmp); 10071 } 10072 10073 addptr(to, 32); 10074 subl(count, 8 << shift); 10075 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 10076 BIND(L_check_fill_8_bytes); 10077 addl(count, 8 << shift); 10078 jccb(Assembler::zero, L_exit); 10079 jmpb(L_fill_8_bytes); 10080 10081 // 10082 // length is too short, just fill qwords 10083 // 10084 BIND(L_fill_8_bytes_loop); 10085 movq(Address(to, 0), xtmp); 10086 addptr(to, 8); 10087 BIND(L_fill_8_bytes); 10088 subl(count, 1 << (shift + 1)); 10089 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 10090 } 10091 } 10092 // fill trailing 4 bytes 10093 BIND(L_fill_4_bytes); 10094 testl(count, 1<<shift); 10095 jccb(Assembler::zero, L_fill_2_bytes); 10096 movl(Address(to, 0), value); 10097 if (t == T_BYTE || t == T_SHORT) { 10098 addptr(to, 4); 10099 BIND(L_fill_2_bytes); 10100 // fill trailing 2 bytes 10101 testl(count, 1<<(shift-1)); 10102 jccb(Assembler::zero, L_fill_byte); 10103 movw(Address(to, 0), value); 10104 if (t == T_BYTE) { 10105 addptr(to, 2); 10106 BIND(L_fill_byte); 10107 // fill trailing byte 10108 testl(count, 1); 10109 jccb(Assembler::zero, L_exit); 10110 movb(Address(to, 0), value); 10111 } else { 10112 BIND(L_fill_byte); 10113 } 10114 } else { 10115 BIND(L_fill_2_bytes); 10116 } 10117 BIND(L_exit); 10118 } 10119 #undef BIND 10120 #undef BLOCK_COMMENT 10121 10122 10123 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 10124 switch (cond) { 10125 // Note some conditions are synonyms for others 10126 case Assembler::zero: return Assembler::notZero; 10127 case Assembler::notZero: return Assembler::zero; 10128 case Assembler::less: return Assembler::greaterEqual; 10129 case Assembler::lessEqual: return Assembler::greater; 10130 case Assembler::greater: return Assembler::lessEqual; 10131 case Assembler::greaterEqual: return Assembler::less; 10132 case Assembler::below: return Assembler::aboveEqual; 10133 case Assembler::belowEqual: return Assembler::above; 10134 case Assembler::above: return Assembler::belowEqual; 10135 case Assembler::aboveEqual: return Assembler::below; 10136 case Assembler::overflow: return Assembler::noOverflow; 10137 case Assembler::noOverflow: return Assembler::overflow; 10138 case Assembler::negative: return Assembler::positive; 10139 case Assembler::positive: return Assembler::negative; 10140 case Assembler::parity: return Assembler::noParity; 10141 case Assembler::noParity: return Assembler::parity; 10142 } 10143 ShouldNotReachHere(); return Assembler::overflow; 10144 } 10145 10146 SkipIfEqual::SkipIfEqual( 10147 MacroAssembler* masm, const bool* flag_addr, bool value) { 10148 _masm = masm; 10149 _masm->cmp8(ExternalAddress((address)flag_addr), value); 10150 _masm->jcc(Assembler::equal, _label); 10151 } 10152 10153 SkipIfEqual::~SkipIfEqual() { 10154 _masm->bind(_label); 10155 }