1 /* 2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "assembler_x86.inline.hpp" 27 #include "gc_interface/collectedHeap.inline.hpp" 28 #include "interpreter/interpreter.hpp" 29 #include "memory/cardTableModRefBS.hpp" 30 #include "memory/resourceArea.hpp" 31 #include "prims/methodHandles.hpp" 32 #include "runtime/biasedLocking.hpp" 33 #include "runtime/interfaceSupport.hpp" 34 #include "runtime/objectMonitor.hpp" 35 #include "runtime/os.hpp" 36 #include "runtime/sharedRuntime.hpp" 37 #include "runtime/stubRoutines.hpp" 38 #ifndef SERIALGC 39 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 40 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 41 #include "gc_implementation/g1/heapRegion.hpp" 42 #endif 43 44 // Implementation of AddressLiteral 45 46 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { 47 _is_lval = false; 48 _target = target; 49 switch (rtype) { 50 case relocInfo::oop_type: 51 // Oops are a special case. Normally they would be their own section 52 // but in cases like icBuffer they are literals in the code stream that 53 // we don't have a section for. We use none so that we get a literal address 54 // which is always patchable. 55 break; 56 case relocInfo::external_word_type: 57 _rspec = external_word_Relocation::spec(target); 58 break; 59 case relocInfo::internal_word_type: 60 _rspec = internal_word_Relocation::spec(target); 61 break; 62 case relocInfo::opt_virtual_call_type: 63 _rspec = opt_virtual_call_Relocation::spec(); 64 break; 65 case relocInfo::static_call_type: 66 _rspec = static_call_Relocation::spec(); 67 break; 68 case relocInfo::runtime_call_type: 69 _rspec = runtime_call_Relocation::spec(); 70 break; 71 case relocInfo::poll_type: 72 case relocInfo::poll_return_type: 73 _rspec = Relocation::spec_simple(rtype); 74 break; 75 case relocInfo::none: 76 break; 77 default: 78 ShouldNotReachHere(); 79 break; 80 } 81 } 82 83 // Implementation of Address 84 85 #ifdef _LP64 86 87 Address Address::make_array(ArrayAddress adr) { 88 // Not implementable on 64bit machines 89 // Should have been handled higher up the call chain. 90 ShouldNotReachHere(); 91 return Address(); 92 } 93 94 // exceedingly dangerous constructor 95 Address::Address(int disp, address loc, relocInfo::relocType rtype) { 96 _base = noreg; 97 _index = noreg; 98 _scale = no_scale; 99 _disp = disp; 100 switch (rtype) { 101 case relocInfo::external_word_type: 102 _rspec = external_word_Relocation::spec(loc); 103 break; 104 case relocInfo::internal_word_type: 105 _rspec = internal_word_Relocation::spec(loc); 106 break; 107 case relocInfo::runtime_call_type: 108 // HMM 109 _rspec = runtime_call_Relocation::spec(); 110 break; 111 case relocInfo::poll_type: 112 case relocInfo::poll_return_type: 113 _rspec = Relocation::spec_simple(rtype); 114 break; 115 case relocInfo::none: 116 break; 117 default: 118 ShouldNotReachHere(); 119 } 120 } 121 #else // LP64 122 123 Address Address::make_array(ArrayAddress adr) { 124 AddressLiteral base = adr.base(); 125 Address index = adr.index(); 126 assert(index._disp == 0, "must not have disp"); // maybe it can? 127 Address array(index._base, index._index, index._scale, (intptr_t) base.target()); 128 array._rspec = base._rspec; 129 return array; 130 } 131 132 // exceedingly dangerous constructor 133 Address::Address(address loc, RelocationHolder spec) { 134 _base = noreg; 135 _index = noreg; 136 _scale = no_scale; 137 _disp = (intptr_t) loc; 138 _rspec = spec; 139 } 140 141 #endif // _LP64 142 143 144 145 // Convert the raw encoding form into the form expected by the constructor for 146 // Address. An index of 4 (rsp) corresponds to having no index, so convert 147 // that to noreg for the Address constructor. 148 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) { 149 RelocationHolder rspec; 150 if (disp_is_oop) { 151 rspec = Relocation::spec_simple(relocInfo::oop_type); 152 } 153 bool valid_index = index != rsp->encoding(); 154 if (valid_index) { 155 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); 156 madr._rspec = rspec; 157 return madr; 158 } else { 159 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); 160 madr._rspec = rspec; 161 return madr; 162 } 163 } 164 165 // Implementation of Assembler 166 167 int AbstractAssembler::code_fill_byte() { 168 return (u_char)'\xF4'; // hlt 169 } 170 171 // make this go away someday 172 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { 173 if (rtype == relocInfo::none) 174 emit_long(data); 175 else emit_data(data, Relocation::spec_simple(rtype), format); 176 } 177 178 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { 179 assert(imm_operand == 0, "default format must be immediate in this file"); 180 assert(inst_mark() != NULL, "must be inside InstructionMark"); 181 if (rspec.type() != relocInfo::none) { 182 #ifdef ASSERT 183 check_relocation(rspec, format); 184 #endif 185 // Do not use AbstractAssembler::relocate, which is not intended for 186 // embedded words. Instead, relocate to the enclosing instruction. 187 188 // hack. call32 is too wide for mask so use disp32 189 if (format == call32_operand) 190 code_section()->relocate(inst_mark(), rspec, disp32_operand); 191 else 192 code_section()->relocate(inst_mark(), rspec, format); 193 } 194 emit_long(data); 195 } 196 197 static int encode(Register r) { 198 int enc = r->encoding(); 199 if (enc >= 8) { 200 enc -= 8; 201 } 202 return enc; 203 } 204 205 static int encode(XMMRegister r) { 206 int enc = r->encoding(); 207 if (enc >= 8) { 208 enc -= 8; 209 } 210 return enc; 211 } 212 213 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { 214 assert(dst->has_byte_register(), "must have byte register"); 215 assert(isByte(op1) && isByte(op2), "wrong opcode"); 216 assert(isByte(imm8), "not a byte"); 217 assert((op1 & 0x01) == 0, "should be 8bit operation"); 218 emit_byte(op1); 219 emit_byte(op2 | encode(dst)); 220 emit_byte(imm8); 221 } 222 223 224 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 225 assert(isByte(op1) && isByte(op2), "wrong opcode"); 226 assert((op1 & 0x01) == 1, "should be 32bit operation"); 227 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 228 if (is8bit(imm32)) { 229 emit_byte(op1 | 0x02); // set sign bit 230 emit_byte(op2 | encode(dst)); 231 emit_byte(imm32 & 0xFF); 232 } else { 233 emit_byte(op1); 234 emit_byte(op2 | encode(dst)); 235 emit_long(imm32); 236 } 237 } 238 239 // immediate-to-memory forms 240 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 241 assert((op1 & 0x01) == 1, "should be 32bit operation"); 242 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 243 if (is8bit(imm32)) { 244 emit_byte(op1 | 0x02); // set sign bit 245 emit_operand(rm, adr, 1); 246 emit_byte(imm32 & 0xFF); 247 } else { 248 emit_byte(op1); 249 emit_operand(rm, adr, 4); 250 emit_long(imm32); 251 } 252 } 253 254 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) { 255 LP64_ONLY(ShouldNotReachHere()); 256 assert(isByte(op1) && isByte(op2), "wrong opcode"); 257 assert((op1 & 0x01) == 1, "should be 32bit operation"); 258 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 259 InstructionMark im(this); 260 emit_byte(op1); 261 emit_byte(op2 | encode(dst)); 262 emit_data((intptr_t)obj, relocInfo::oop_type, 0); 263 } 264 265 266 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { 267 assert(isByte(op1) && isByte(op2), "wrong opcode"); 268 emit_byte(op1); 269 emit_byte(op2 | encode(dst) << 3 | encode(src)); 270 } 271 272 273 void Assembler::emit_operand(Register reg, Register base, Register index, 274 Address::ScaleFactor scale, int disp, 275 RelocationHolder const& rspec, 276 int rip_relative_correction) { 277 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type(); 278 279 // Encode the registers as needed in the fields they are used in 280 281 int regenc = encode(reg) << 3; 282 int indexenc = index->is_valid() ? encode(index) << 3 : 0; 283 int baseenc = base->is_valid() ? encode(base) : 0; 284 285 if (base->is_valid()) { 286 if (index->is_valid()) { 287 assert(scale != Address::no_scale, "inconsistent address"); 288 // [base + index*scale + disp] 289 if (disp == 0 && rtype == relocInfo::none && 290 base != rbp LP64_ONLY(&& base != r13)) { 291 // [base + index*scale] 292 // [00 reg 100][ss index base] 293 assert(index != rsp, "illegal addressing mode"); 294 emit_byte(0x04 | regenc); 295 emit_byte(scale << 6 | indexenc | baseenc); 296 } else if (is8bit(disp) && rtype == relocInfo::none) { 297 // [base + index*scale + imm8] 298 // [01 reg 100][ss index base] imm8 299 assert(index != rsp, "illegal addressing mode"); 300 emit_byte(0x44 | regenc); 301 emit_byte(scale << 6 | indexenc | baseenc); 302 emit_byte(disp & 0xFF); 303 } else { 304 // [base + index*scale + disp32] 305 // [10 reg 100][ss index base] disp32 306 assert(index != rsp, "illegal addressing mode"); 307 emit_byte(0x84 | regenc); 308 emit_byte(scale << 6 | indexenc | baseenc); 309 emit_data(disp, rspec, disp32_operand); 310 } 311 } else if (base == rsp LP64_ONLY(|| base == r12)) { 312 // [rsp + disp] 313 if (disp == 0 && rtype == relocInfo::none) { 314 // [rsp] 315 // [00 reg 100][00 100 100] 316 emit_byte(0x04 | regenc); 317 emit_byte(0x24); 318 } else if (is8bit(disp) && rtype == relocInfo::none) { 319 // [rsp + imm8] 320 // [01 reg 100][00 100 100] disp8 321 emit_byte(0x44 | regenc); 322 emit_byte(0x24); 323 emit_byte(disp & 0xFF); 324 } else { 325 // [rsp + imm32] 326 // [10 reg 100][00 100 100] disp32 327 emit_byte(0x84 | regenc); 328 emit_byte(0x24); 329 emit_data(disp, rspec, disp32_operand); 330 } 331 } else { 332 // [base + disp] 333 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode"); 334 if (disp == 0 && rtype == relocInfo::none && 335 base != rbp LP64_ONLY(&& base != r13)) { 336 // [base] 337 // [00 reg base] 338 emit_byte(0x00 | regenc | baseenc); 339 } else if (is8bit(disp) && rtype == relocInfo::none) { 340 // [base + disp8] 341 // [01 reg base] disp8 342 emit_byte(0x40 | regenc | baseenc); 343 emit_byte(disp & 0xFF); 344 } else { 345 // [base + disp32] 346 // [10 reg base] disp32 347 emit_byte(0x80 | regenc | baseenc); 348 emit_data(disp, rspec, disp32_operand); 349 } 350 } 351 } else { 352 if (index->is_valid()) { 353 assert(scale != Address::no_scale, "inconsistent address"); 354 // [index*scale + disp] 355 // [00 reg 100][ss index 101] disp32 356 assert(index != rsp, "illegal addressing mode"); 357 emit_byte(0x04 | regenc); 358 emit_byte(scale << 6 | indexenc | 0x05); 359 emit_data(disp, rspec, disp32_operand); 360 } else if (rtype != relocInfo::none ) { 361 // [disp] (64bit) RIP-RELATIVE (32bit) abs 362 // [00 000 101] disp32 363 364 emit_byte(0x05 | regenc); 365 // Note that the RIP-rel. correction applies to the generated 366 // disp field, but _not_ to the target address in the rspec. 367 368 // disp was created by converting the target address minus the pc 369 // at the start of the instruction. That needs more correction here. 370 // intptr_t disp = target - next_ip; 371 assert(inst_mark() != NULL, "must be inside InstructionMark"); 372 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction; 373 int64_t adjusted = disp; 374 // Do rip-rel adjustment for 64bit 375 LP64_ONLY(adjusted -= (next_ip - inst_mark())); 376 assert(is_simm32(adjusted), 377 "must be 32bit offset (RIP relative address)"); 378 emit_data((int32_t) adjusted, rspec, disp32_operand); 379 380 } else { 381 // 32bit never did this, did everything as the rip-rel/disp code above 382 // [disp] ABSOLUTE 383 // [00 reg 100][00 100 101] disp32 384 emit_byte(0x04 | regenc); 385 emit_byte(0x25); 386 emit_data(disp, rspec, disp32_operand); 387 } 388 } 389 } 390 391 void Assembler::emit_operand(XMMRegister reg, Register base, Register index, 392 Address::ScaleFactor scale, int disp, 393 RelocationHolder const& rspec) { 394 emit_operand((Register)reg, base, index, scale, disp, rspec); 395 } 396 397 // Secret local extension to Assembler::WhichOperand: 398 #define end_pc_operand (_WhichOperand_limit) 399 400 address Assembler::locate_operand(address inst, WhichOperand which) { 401 // Decode the given instruction, and return the address of 402 // an embedded 32-bit operand word. 403 404 // If "which" is disp32_operand, selects the displacement portion 405 // of an effective address specifier. 406 // If "which" is imm64_operand, selects the trailing immediate constant. 407 // If "which" is call32_operand, selects the displacement of a call or jump. 408 // Caller is responsible for ensuring that there is such an operand, 409 // and that it is 32/64 bits wide. 410 411 // If "which" is end_pc_operand, find the end of the instruction. 412 413 address ip = inst; 414 bool is_64bit = false; 415 416 debug_only(bool has_disp32 = false); 417 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn 418 419 again_after_prefix: 420 switch (0xFF & *ip++) { 421 422 // These convenience macros generate groups of "case" labels for the switch. 423 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3 424 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \ 425 case (x)+4: case (x)+5: case (x)+6: case (x)+7 426 #define REP16(x) REP8((x)+0): \ 427 case REP8((x)+8) 428 429 case CS_segment: 430 case SS_segment: 431 case DS_segment: 432 case ES_segment: 433 case FS_segment: 434 case GS_segment: 435 // Seems dubious 436 LP64_ONLY(assert(false, "shouldn't have that prefix")); 437 assert(ip == inst+1, "only one prefix allowed"); 438 goto again_after_prefix; 439 440 case 0x67: 441 case REX: 442 case REX_B: 443 case REX_X: 444 case REX_XB: 445 case REX_R: 446 case REX_RB: 447 case REX_RX: 448 case REX_RXB: 449 NOT_LP64(assert(false, "64bit prefixes")); 450 goto again_after_prefix; 451 452 case REX_W: 453 case REX_WB: 454 case REX_WX: 455 case REX_WXB: 456 case REX_WR: 457 case REX_WRB: 458 case REX_WRX: 459 case REX_WRXB: 460 NOT_LP64(assert(false, "64bit prefixes")); 461 is_64bit = true; 462 goto again_after_prefix; 463 464 case 0xFF: // pushq a; decl a; incl a; call a; jmp a 465 case 0x88: // movb a, r 466 case 0x89: // movl a, r 467 case 0x8A: // movb r, a 468 case 0x8B: // movl r, a 469 case 0x8F: // popl a 470 debug_only(has_disp32 = true); 471 break; 472 473 case 0x68: // pushq #32 474 if (which == end_pc_operand) { 475 return ip + 4; 476 } 477 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate"); 478 return ip; // not produced by emit_operand 479 480 case 0x66: // movw ... (size prefix) 481 again_after_size_prefix2: 482 switch (0xFF & *ip++) { 483 case REX: 484 case REX_B: 485 case REX_X: 486 case REX_XB: 487 case REX_R: 488 case REX_RB: 489 case REX_RX: 490 case REX_RXB: 491 case REX_W: 492 case REX_WB: 493 case REX_WX: 494 case REX_WXB: 495 case REX_WR: 496 case REX_WRB: 497 case REX_WRX: 498 case REX_WRXB: 499 NOT_LP64(assert(false, "64bit prefix found")); 500 goto again_after_size_prefix2; 501 case 0x8B: // movw r, a 502 case 0x89: // movw a, r 503 debug_only(has_disp32 = true); 504 break; 505 case 0xC7: // movw a, #16 506 debug_only(has_disp32 = true); 507 tail_size = 2; // the imm16 508 break; 509 case 0x0F: // several SSE/SSE2 variants 510 ip--; // reparse the 0x0F 511 goto again_after_prefix; 512 default: 513 ShouldNotReachHere(); 514 } 515 break; 516 517 case REP8(0xB8): // movl/q r, #32/#64(oop?) 518 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4); 519 // these asserts are somewhat nonsensical 520 #ifndef _LP64 521 assert(which == imm_operand || which == disp32_operand, ""); 522 #else 523 assert((which == call32_operand || which == imm_operand) && is_64bit || 524 which == narrow_oop_operand && !is_64bit, ""); 525 #endif // _LP64 526 return ip; 527 528 case 0x69: // imul r, a, #32 529 case 0xC7: // movl a, #32(oop?) 530 tail_size = 4; 531 debug_only(has_disp32 = true); // has both kinds of operands! 532 break; 533 534 case 0x0F: // movx..., etc. 535 switch (0xFF & *ip++) { 536 case 0x12: // movlps 537 case 0x28: // movaps 538 case 0x2E: // ucomiss 539 case 0x2F: // comiss 540 case 0x54: // andps 541 case 0x55: // andnps 542 case 0x56: // orps 543 case 0x57: // xorps 544 case 0x6E: // movd 545 case 0x7E: // movd 546 case 0xAE: // ldmxcsr a 547 // 64bit side says it these have both operands but that doesn't 548 // appear to be true 549 debug_only(has_disp32 = true); 550 break; 551 552 case 0xAD: // shrd r, a, %cl 553 case 0xAF: // imul r, a 554 case 0xBE: // movsbl r, a (movsxb) 555 case 0xBF: // movswl r, a (movsxw) 556 case 0xB6: // movzbl r, a (movzxb) 557 case 0xB7: // movzwl r, a (movzxw) 558 case REP16(0x40): // cmovl cc, r, a 559 case 0xB0: // cmpxchgb 560 case 0xB1: // cmpxchg 561 case 0xC1: // xaddl 562 case 0xC7: // cmpxchg8 563 case REP16(0x90): // setcc a 564 debug_only(has_disp32 = true); 565 // fall out of the switch to decode the address 566 break; 567 568 case 0xAC: // shrd r, a, #8 569 debug_only(has_disp32 = true); 570 tail_size = 1; // the imm8 571 break; 572 573 case REP16(0x80): // jcc rdisp32 574 if (which == end_pc_operand) return ip + 4; 575 assert(which == call32_operand, "jcc has no disp32 or imm"); 576 return ip; 577 default: 578 ShouldNotReachHere(); 579 } 580 break; 581 582 case 0x81: // addl a, #32; addl r, #32 583 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 584 // on 32bit in the case of cmpl, the imm might be an oop 585 tail_size = 4; 586 debug_only(has_disp32 = true); // has both kinds of operands! 587 break; 588 589 case 0x83: // addl a, #8; addl r, #8 590 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 591 debug_only(has_disp32 = true); // has both kinds of operands! 592 tail_size = 1; 593 break; 594 595 case 0x9B: 596 switch (0xFF & *ip++) { 597 case 0xD9: // fnstcw a 598 debug_only(has_disp32 = true); 599 break; 600 default: 601 ShouldNotReachHere(); 602 } 603 break; 604 605 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a 606 case REP4(0x10): // adc... 607 case REP4(0x20): // and... 608 case REP4(0x30): // xor... 609 case REP4(0x08): // or... 610 case REP4(0x18): // sbb... 611 case REP4(0x28): // sub... 612 case 0xF7: // mull a 613 case 0x8D: // lea r, a 614 case 0x87: // xchg r, a 615 case REP4(0x38): // cmp... 616 case 0x85: // test r, a 617 debug_only(has_disp32 = true); // has both kinds of operands! 618 break; 619 620 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8 621 case 0xC6: // movb a, #8 622 case 0x80: // cmpb a, #8 623 case 0x6B: // imul r, a, #8 624 debug_only(has_disp32 = true); // has both kinds of operands! 625 tail_size = 1; // the imm8 626 break; 627 628 case 0xE8: // call rdisp32 629 case 0xE9: // jmp rdisp32 630 if (which == end_pc_operand) return ip + 4; 631 assert(which == call32_operand, "call has no disp32 or imm"); 632 return ip; 633 634 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 635 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 636 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a 637 case 0xDD: // fld_d a; fst_d a; fstp_d a 638 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a 639 case 0xDF: // fild_d a; fistp_d a 640 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a 641 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a 642 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a 643 debug_only(has_disp32 = true); 644 break; 645 646 case 0xF0: // Lock 647 assert(os::is_MP(), "only on MP"); 648 goto again_after_prefix; 649 650 case 0xF3: // For SSE 651 case 0xF2: // For SSE2 652 switch (0xFF & *ip++) { 653 case REX: 654 case REX_B: 655 case REX_X: 656 case REX_XB: 657 case REX_R: 658 case REX_RB: 659 case REX_RX: 660 case REX_RXB: 661 case REX_W: 662 case REX_WB: 663 case REX_WX: 664 case REX_WXB: 665 case REX_WR: 666 case REX_WRB: 667 case REX_WRX: 668 case REX_WRXB: 669 NOT_LP64(assert(false, "found 64bit prefix")); 670 ip++; 671 default: 672 ip++; 673 } 674 debug_only(has_disp32 = true); // has both kinds of operands! 675 break; 676 677 default: 678 ShouldNotReachHere(); 679 680 #undef REP8 681 #undef REP16 682 } 683 684 assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); 685 #ifdef _LP64 686 assert(which != imm_operand, "instruction is not a movq reg, imm64"); 687 #else 688 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field"); 689 assert(which != imm_operand || has_disp32, "instruction has no imm32 field"); 690 #endif // LP64 691 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field"); 692 693 // parse the output of emit_operand 694 int op2 = 0xFF & *ip++; 695 int base = op2 & 0x07; 696 int op3 = -1; 697 const int b100 = 4; 698 const int b101 = 5; 699 if (base == b100 && (op2 >> 6) != 3) { 700 op3 = 0xFF & *ip++; 701 base = op3 & 0x07; // refetch the base 702 } 703 // now ip points at the disp (if any) 704 705 switch (op2 >> 6) { 706 case 0: 707 // [00 reg 100][ss index base] 708 // [00 reg 100][00 100 esp] 709 // [00 reg base] 710 // [00 reg 100][ss index 101][disp32] 711 // [00 reg 101] [disp32] 712 713 if (base == b101) { 714 if (which == disp32_operand) 715 return ip; // caller wants the disp32 716 ip += 4; // skip the disp32 717 } 718 break; 719 720 case 1: 721 // [01 reg 100][ss index base][disp8] 722 // [01 reg 100][00 100 esp][disp8] 723 // [01 reg base] [disp8] 724 ip += 1; // skip the disp8 725 break; 726 727 case 2: 728 // [10 reg 100][ss index base][disp32] 729 // [10 reg 100][00 100 esp][disp32] 730 // [10 reg base] [disp32] 731 if (which == disp32_operand) 732 return ip; // caller wants the disp32 733 ip += 4; // skip the disp32 734 break; 735 736 case 3: 737 // [11 reg base] (not a memory addressing mode) 738 break; 739 } 740 741 if (which == end_pc_operand) { 742 return ip + tail_size; 743 } 744 745 #ifdef _LP64 746 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32"); 747 #else 748 assert(which == imm_operand, "instruction has only an imm field"); 749 #endif // LP64 750 return ip; 751 } 752 753 address Assembler::locate_next_instruction(address inst) { 754 // Secretly share code with locate_operand: 755 return locate_operand(inst, end_pc_operand); 756 } 757 758 759 #ifdef ASSERT 760 void Assembler::check_relocation(RelocationHolder const& rspec, int format) { 761 address inst = inst_mark(); 762 assert(inst != NULL && inst < pc(), "must point to beginning of instruction"); 763 address opnd; 764 765 Relocation* r = rspec.reloc(); 766 if (r->type() == relocInfo::none) { 767 return; 768 } else if (r->is_call() || format == call32_operand) { 769 // assert(format == imm32_operand, "cannot specify a nonzero format"); 770 opnd = locate_operand(inst, call32_operand); 771 } else if (r->is_data()) { 772 assert(format == imm_operand || format == disp32_operand 773 LP64_ONLY(|| format == narrow_oop_operand), "format ok"); 774 opnd = locate_operand(inst, (WhichOperand)format); 775 } else { 776 assert(format == imm_operand, "cannot specify a format"); 777 return; 778 } 779 assert(opnd == pc(), "must put operand where relocs can find it"); 780 } 781 #endif // ASSERT 782 783 void Assembler::emit_operand32(Register reg, Address adr) { 784 assert(reg->encoding() < 8, "no extended registers"); 785 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 786 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 787 adr._rspec); 788 } 789 790 void Assembler::emit_operand(Register reg, Address adr, 791 int rip_relative_correction) { 792 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 793 adr._rspec, 794 rip_relative_correction); 795 } 796 797 void Assembler::emit_operand(XMMRegister reg, Address adr) { 798 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 799 adr._rspec); 800 } 801 802 // MMX operations 803 void Assembler::emit_operand(MMXRegister reg, Address adr) { 804 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 805 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 806 } 807 808 // work around gcc (3.2.1-7a) bug 809 void Assembler::emit_operand(Address adr, MMXRegister reg) { 810 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 811 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 812 } 813 814 815 void Assembler::emit_farith(int b1, int b2, int i) { 816 assert(isByte(b1) && isByte(b2), "wrong opcode"); 817 assert(0 <= i && i < 8, "illegal stack offset"); 818 emit_byte(b1); 819 emit_byte(b2 + i); 820 } 821 822 823 // Now the Assembler instructions (identical for 32/64 bits) 824 825 void Assembler::adcl(Address dst, int32_t imm32) { 826 InstructionMark im(this); 827 prefix(dst); 828 emit_arith_operand(0x81, rdx, dst, imm32); 829 } 830 831 void Assembler::adcl(Address dst, Register src) { 832 InstructionMark im(this); 833 prefix(dst, src); 834 emit_byte(0x11); 835 emit_operand(src, dst); 836 } 837 838 void Assembler::adcl(Register dst, int32_t imm32) { 839 prefix(dst); 840 emit_arith(0x81, 0xD0, dst, imm32); 841 } 842 843 void Assembler::adcl(Register dst, Address src) { 844 InstructionMark im(this); 845 prefix(src, dst); 846 emit_byte(0x13); 847 emit_operand(dst, src); 848 } 849 850 void Assembler::adcl(Register dst, Register src) { 851 (void) prefix_and_encode(dst->encoding(), src->encoding()); 852 emit_arith(0x13, 0xC0, dst, src); 853 } 854 855 void Assembler::addl(Address dst, int32_t imm32) { 856 InstructionMark im(this); 857 prefix(dst); 858 emit_arith_operand(0x81, rax, dst, imm32); 859 } 860 861 void Assembler::addl(Address dst, Register src) { 862 InstructionMark im(this); 863 prefix(dst, src); 864 emit_byte(0x01); 865 emit_operand(src, dst); 866 } 867 868 void Assembler::addl(Register dst, int32_t imm32) { 869 prefix(dst); 870 emit_arith(0x81, 0xC0, dst, imm32); 871 } 872 873 void Assembler::addl(Register dst, Address src) { 874 InstructionMark im(this); 875 prefix(src, dst); 876 emit_byte(0x03); 877 emit_operand(dst, src); 878 } 879 880 void Assembler::addl(Register dst, Register src) { 881 (void) prefix_and_encode(dst->encoding(), src->encoding()); 882 emit_arith(0x03, 0xC0, dst, src); 883 } 884 885 void Assembler::addr_nop_4() { 886 // 4 bytes: NOP DWORD PTR [EAX+0] 887 emit_byte(0x0F); 888 emit_byte(0x1F); 889 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 890 emit_byte(0); // 8-bits offset (1 byte) 891 } 892 893 void Assembler::addr_nop_5() { 894 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 895 emit_byte(0x0F); 896 emit_byte(0x1F); 897 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 898 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 899 emit_byte(0); // 8-bits offset (1 byte) 900 } 901 902 void Assembler::addr_nop_7() { 903 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 904 emit_byte(0x0F); 905 emit_byte(0x1F); 906 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 907 emit_long(0); // 32-bits offset (4 bytes) 908 } 909 910 void Assembler::addr_nop_8() { 911 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 912 emit_byte(0x0F); 913 emit_byte(0x1F); 914 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 915 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 916 emit_long(0); // 32-bits offset (4 bytes) 917 } 918 919 void Assembler::addsd(XMMRegister dst, XMMRegister src) { 920 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 921 emit_byte(0xF2); 922 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 923 emit_byte(0x0F); 924 emit_byte(0x58); 925 emit_byte(0xC0 | encode); 926 } 927 928 void Assembler::addsd(XMMRegister dst, Address src) { 929 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 930 InstructionMark im(this); 931 emit_byte(0xF2); 932 prefix(src, dst); 933 emit_byte(0x0F); 934 emit_byte(0x58); 935 emit_operand(dst, src); 936 } 937 938 void Assembler::addss(XMMRegister dst, XMMRegister src) { 939 NOT_LP64(assert(VM_Version::supports_sse(), "")); 940 emit_byte(0xF3); 941 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 942 emit_byte(0x0F); 943 emit_byte(0x58); 944 emit_byte(0xC0 | encode); 945 } 946 947 void Assembler::addss(XMMRegister dst, Address src) { 948 NOT_LP64(assert(VM_Version::supports_sse(), "")); 949 InstructionMark im(this); 950 emit_byte(0xF3); 951 prefix(src, dst); 952 emit_byte(0x0F); 953 emit_byte(0x58); 954 emit_operand(dst, src); 955 } 956 957 void Assembler::andl(Register dst, int32_t imm32) { 958 prefix(dst); 959 emit_arith(0x81, 0xE0, dst, imm32); 960 } 961 962 void Assembler::andl(Register dst, Address src) { 963 InstructionMark im(this); 964 prefix(src, dst); 965 emit_byte(0x23); 966 emit_operand(dst, src); 967 } 968 969 void Assembler::andl(Register dst, Register src) { 970 (void) prefix_and_encode(dst->encoding(), src->encoding()); 971 emit_arith(0x23, 0xC0, dst, src); 972 } 973 974 void Assembler::andpd(XMMRegister dst, Address src) { 975 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 976 InstructionMark im(this); 977 emit_byte(0x66); 978 prefix(src, dst); 979 emit_byte(0x0F); 980 emit_byte(0x54); 981 emit_operand(dst, src); 982 } 983 984 void Assembler::bsfl(Register dst, Register src) { 985 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 986 emit_byte(0x0F); 987 emit_byte(0xBC); 988 emit_byte(0xC0 | encode); 989 } 990 991 void Assembler::bsrl(Register dst, Register src) { 992 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 993 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 994 emit_byte(0x0F); 995 emit_byte(0xBD); 996 emit_byte(0xC0 | encode); 997 } 998 999 void Assembler::bswapl(Register reg) { // bswap 1000 int encode = prefix_and_encode(reg->encoding()); 1001 emit_byte(0x0F); 1002 emit_byte(0xC8 | encode); 1003 } 1004 1005 void Assembler::call(Label& L, relocInfo::relocType rtype) { 1006 // suspect disp32 is always good 1007 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); 1008 1009 if (L.is_bound()) { 1010 const int long_size = 5; 1011 int offs = (int)( target(L) - pc() ); 1012 assert(offs <= 0, "assembler error"); 1013 InstructionMark im(this); 1014 // 1110 1000 #32-bit disp 1015 emit_byte(0xE8); 1016 emit_data(offs - long_size, rtype, operand); 1017 } else { 1018 InstructionMark im(this); 1019 // 1110 1000 #32-bit disp 1020 L.add_patch_at(code(), locator()); 1021 1022 emit_byte(0xE8); 1023 emit_data(int(0), rtype, operand); 1024 } 1025 } 1026 1027 void Assembler::call(Register dst) { 1028 // This was originally using a 32bit register encoding 1029 // and surely we want 64bit! 1030 // this is a 32bit encoding but in 64bit mode the default 1031 // operand size is 64bit so there is no need for the 1032 // wide prefix. So prefix only happens if we use the 1033 // new registers. Much like push/pop. 1034 int x = offset(); 1035 // this may be true but dbx disassembles it as if it 1036 // were 32bits... 1037 // int encode = prefix_and_encode(dst->encoding()); 1038 // if (offset() != x) assert(dst->encoding() >= 8, "what?"); 1039 int encode = prefixq_and_encode(dst->encoding()); 1040 1041 emit_byte(0xFF); 1042 emit_byte(0xD0 | encode); 1043 } 1044 1045 1046 void Assembler::call(Address adr) { 1047 InstructionMark im(this); 1048 prefix(adr); 1049 emit_byte(0xFF); 1050 emit_operand(rdx, adr); 1051 } 1052 1053 void Assembler::call_literal(address entry, RelocationHolder const& rspec) { 1054 assert(entry != NULL, "call most probably wrong"); 1055 InstructionMark im(this); 1056 emit_byte(0xE8); 1057 intptr_t disp = entry - (_code_pos + sizeof(int32_t)); 1058 assert(is_simm32(disp), "must be 32bit offset (call2)"); 1059 // Technically, should use call32_operand, but this format is 1060 // implied by the fact that we're emitting a call instruction. 1061 1062 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand); 1063 emit_data((int) disp, rspec, operand); 1064 } 1065 1066 void Assembler::cdql() { 1067 emit_byte(0x99); 1068 } 1069 1070 void Assembler::cmovl(Condition cc, Register dst, Register src) { 1071 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1072 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1073 emit_byte(0x0F); 1074 emit_byte(0x40 | cc); 1075 emit_byte(0xC0 | encode); 1076 } 1077 1078 1079 void Assembler::cmovl(Condition cc, Register dst, Address src) { 1080 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1081 prefix(src, dst); 1082 emit_byte(0x0F); 1083 emit_byte(0x40 | cc); 1084 emit_operand(dst, src); 1085 } 1086 1087 void Assembler::cmpb(Address dst, int imm8) { 1088 InstructionMark im(this); 1089 prefix(dst); 1090 emit_byte(0x80); 1091 emit_operand(rdi, dst, 1); 1092 emit_byte(imm8); 1093 } 1094 1095 void Assembler::cmpl(Address dst, int32_t imm32) { 1096 InstructionMark im(this); 1097 prefix(dst); 1098 emit_byte(0x81); 1099 emit_operand(rdi, dst, 4); 1100 emit_long(imm32); 1101 } 1102 1103 void Assembler::cmpl(Register dst, int32_t imm32) { 1104 prefix(dst); 1105 emit_arith(0x81, 0xF8, dst, imm32); 1106 } 1107 1108 void Assembler::cmpl(Register dst, Register src) { 1109 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1110 emit_arith(0x3B, 0xC0, dst, src); 1111 } 1112 1113 1114 void Assembler::cmpl(Register dst, Address src) { 1115 InstructionMark im(this); 1116 prefix(src, dst); 1117 emit_byte(0x3B); 1118 emit_operand(dst, src); 1119 } 1120 1121 void Assembler::cmpw(Address dst, int imm16) { 1122 InstructionMark im(this); 1123 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers"); 1124 emit_byte(0x66); 1125 emit_byte(0x81); 1126 emit_operand(rdi, dst, 2); 1127 emit_word(imm16); 1128 } 1129 1130 // The 32-bit cmpxchg compares the value at adr with the contents of rax, 1131 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1132 // The ZF is set if the compared values were equal, and cleared otherwise. 1133 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg 1134 if (Atomics & 2) { 1135 // caveat: no instructionmark, so this isn't relocatable. 1136 // Emit a synthetic, non-atomic, CAS equivalent. 1137 // Beware. The synthetic form sets all ICCs, not just ZF. 1138 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r) 1139 cmpl(rax, adr); 1140 movl(rax, adr); 1141 if (reg != rax) { 1142 Label L ; 1143 jcc(Assembler::notEqual, L); 1144 movl(adr, reg); 1145 bind(L); 1146 } 1147 } else { 1148 InstructionMark im(this); 1149 prefix(adr, reg); 1150 emit_byte(0x0F); 1151 emit_byte(0xB1); 1152 emit_operand(reg, adr); 1153 } 1154 } 1155 1156 void Assembler::comisd(XMMRegister dst, Address src) { 1157 // NOTE: dbx seems to decode this as comiss even though the 1158 // 0x66 is there. Strangly ucomisd comes out correct 1159 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1160 emit_byte(0x66); 1161 comiss(dst, src); 1162 } 1163 1164 void Assembler::comiss(XMMRegister dst, Address src) { 1165 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1166 1167 InstructionMark im(this); 1168 prefix(src, dst); 1169 emit_byte(0x0F); 1170 emit_byte(0x2F); 1171 emit_operand(dst, src); 1172 } 1173 1174 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 1175 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1176 emit_byte(0xF3); 1177 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1178 emit_byte(0x0F); 1179 emit_byte(0xE6); 1180 emit_byte(0xC0 | encode); 1181 } 1182 1183 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 1184 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1185 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1186 emit_byte(0x0F); 1187 emit_byte(0x5B); 1188 emit_byte(0xC0 | encode); 1189 } 1190 1191 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 1192 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1193 emit_byte(0xF2); 1194 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1195 emit_byte(0x0F); 1196 emit_byte(0x5A); 1197 emit_byte(0xC0 | encode); 1198 } 1199 1200 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { 1201 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1202 emit_byte(0xF2); 1203 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1204 emit_byte(0x0F); 1205 emit_byte(0x2A); 1206 emit_byte(0xC0 | encode); 1207 } 1208 1209 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { 1210 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1211 emit_byte(0xF3); 1212 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1213 emit_byte(0x0F); 1214 emit_byte(0x2A); 1215 emit_byte(0xC0 | encode); 1216 } 1217 1218 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { 1219 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1220 emit_byte(0xF3); 1221 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1222 emit_byte(0x0F); 1223 emit_byte(0x5A); 1224 emit_byte(0xC0 | encode); 1225 } 1226 1227 void Assembler::cvttsd2sil(Register dst, XMMRegister src) { 1228 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1229 emit_byte(0xF2); 1230 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1231 emit_byte(0x0F); 1232 emit_byte(0x2C); 1233 emit_byte(0xC0 | encode); 1234 } 1235 1236 void Assembler::cvttss2sil(Register dst, XMMRegister src) { 1237 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1238 emit_byte(0xF3); 1239 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1240 emit_byte(0x0F); 1241 emit_byte(0x2C); 1242 emit_byte(0xC0 | encode); 1243 } 1244 1245 void Assembler::decl(Address dst) { 1246 // Don't use it directly. Use MacroAssembler::decrement() instead. 1247 InstructionMark im(this); 1248 prefix(dst); 1249 emit_byte(0xFF); 1250 emit_operand(rcx, dst); 1251 } 1252 1253 void Assembler::divsd(XMMRegister dst, Address src) { 1254 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1255 InstructionMark im(this); 1256 emit_byte(0xF2); 1257 prefix(src, dst); 1258 emit_byte(0x0F); 1259 emit_byte(0x5E); 1260 emit_operand(dst, src); 1261 } 1262 1263 void Assembler::divsd(XMMRegister dst, XMMRegister src) { 1264 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1265 emit_byte(0xF2); 1266 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1267 emit_byte(0x0F); 1268 emit_byte(0x5E); 1269 emit_byte(0xC0 | encode); 1270 } 1271 1272 void Assembler::divss(XMMRegister dst, Address src) { 1273 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1274 InstructionMark im(this); 1275 emit_byte(0xF3); 1276 prefix(src, dst); 1277 emit_byte(0x0F); 1278 emit_byte(0x5E); 1279 emit_operand(dst, src); 1280 } 1281 1282 void Assembler::divss(XMMRegister dst, XMMRegister src) { 1283 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1284 emit_byte(0xF3); 1285 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1286 emit_byte(0x0F); 1287 emit_byte(0x5E); 1288 emit_byte(0xC0 | encode); 1289 } 1290 1291 void Assembler::emms() { 1292 NOT_LP64(assert(VM_Version::supports_mmx(), "")); 1293 emit_byte(0x0F); 1294 emit_byte(0x77); 1295 } 1296 1297 void Assembler::hlt() { 1298 emit_byte(0xF4); 1299 } 1300 1301 void Assembler::idivl(Register src) { 1302 int encode = prefix_and_encode(src->encoding()); 1303 emit_byte(0xF7); 1304 emit_byte(0xF8 | encode); 1305 } 1306 1307 void Assembler::divl(Register src) { // Unsigned 1308 int encode = prefix_and_encode(src->encoding()); 1309 emit_byte(0xF7); 1310 emit_byte(0xF0 | encode); 1311 } 1312 1313 void Assembler::imull(Register dst, Register src) { 1314 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1315 emit_byte(0x0F); 1316 emit_byte(0xAF); 1317 emit_byte(0xC0 | encode); 1318 } 1319 1320 1321 void Assembler::imull(Register dst, Register src, int value) { 1322 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1323 if (is8bit(value)) { 1324 emit_byte(0x6B); 1325 emit_byte(0xC0 | encode); 1326 emit_byte(value & 0xFF); 1327 } else { 1328 emit_byte(0x69); 1329 emit_byte(0xC0 | encode); 1330 emit_long(value); 1331 } 1332 } 1333 1334 void Assembler::incl(Address dst) { 1335 // Don't use it directly. Use MacroAssembler::increment() instead. 1336 InstructionMark im(this); 1337 prefix(dst); 1338 emit_byte(0xFF); 1339 emit_operand(rax, dst); 1340 } 1341 1342 void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) { 1343 InstructionMark im(this); 1344 relocate(rtype); 1345 assert((0 <= cc) && (cc < 16), "illegal cc"); 1346 if (L.is_bound()) { 1347 address dst = target(L); 1348 assert(dst != NULL, "jcc most probably wrong"); 1349 1350 const int short_size = 2; 1351 const int long_size = 6; 1352 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos; 1353 if (rtype == relocInfo::none && is8bit(offs - short_size)) { 1354 // 0111 tttn #8-bit disp 1355 emit_byte(0x70 | cc); 1356 emit_byte((offs - short_size) & 0xFF); 1357 } else { 1358 // 0000 1111 1000 tttn #32-bit disp 1359 assert(is_simm32(offs - long_size), 1360 "must be 32bit offset (call4)"); 1361 emit_byte(0x0F); 1362 emit_byte(0x80 | cc); 1363 emit_long(offs - long_size); 1364 } 1365 } else { 1366 // Note: could eliminate cond. jumps to this jump if condition 1367 // is the same however, seems to be rather unlikely case. 1368 // Note: use jccb() if label to be bound is very close to get 1369 // an 8-bit displacement 1370 L.add_patch_at(code(), locator()); 1371 emit_byte(0x0F); 1372 emit_byte(0x80 | cc); 1373 emit_long(0); 1374 } 1375 } 1376 1377 void Assembler::jccb(Condition cc, Label& L) { 1378 if (L.is_bound()) { 1379 const int short_size = 2; 1380 address entry = target(L); 1381 assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)), 1382 "Dispacement too large for a short jmp"); 1383 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; 1384 // 0111 tttn #8-bit disp 1385 emit_byte(0x70 | cc); 1386 emit_byte((offs - short_size) & 0xFF); 1387 } else { 1388 InstructionMark im(this); 1389 L.add_patch_at(code(), locator()); 1390 emit_byte(0x70 | cc); 1391 emit_byte(0); 1392 } 1393 } 1394 1395 void Assembler::jmp(Address adr) { 1396 InstructionMark im(this); 1397 prefix(adr); 1398 emit_byte(0xFF); 1399 emit_operand(rsp, adr); 1400 } 1401 1402 void Assembler::jmp(Label& L, relocInfo::relocType rtype) { 1403 if (L.is_bound()) { 1404 address entry = target(L); 1405 assert(entry != NULL, "jmp most probably wrong"); 1406 InstructionMark im(this); 1407 const int short_size = 2; 1408 const int long_size = 5; 1409 intptr_t offs = entry - _code_pos; 1410 if (rtype == relocInfo::none && is8bit(offs - short_size)) { 1411 emit_byte(0xEB); 1412 emit_byte((offs - short_size) & 0xFF); 1413 } else { 1414 emit_byte(0xE9); 1415 emit_long(offs - long_size); 1416 } 1417 } else { 1418 // By default, forward jumps are always 32-bit displacements, since 1419 // we can't yet know where the label will be bound. If you're sure that 1420 // the forward jump will not run beyond 256 bytes, use jmpb to 1421 // force an 8-bit displacement. 1422 InstructionMark im(this); 1423 relocate(rtype); 1424 L.add_patch_at(code(), locator()); 1425 emit_byte(0xE9); 1426 emit_long(0); 1427 } 1428 } 1429 1430 void Assembler::jmp(Register entry) { 1431 int encode = prefix_and_encode(entry->encoding()); 1432 emit_byte(0xFF); 1433 emit_byte(0xE0 | encode); 1434 } 1435 1436 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { 1437 InstructionMark im(this); 1438 emit_byte(0xE9); 1439 assert(dest != NULL, "must have a target"); 1440 intptr_t disp = dest - (_code_pos + sizeof(int32_t)); 1441 assert(is_simm32(disp), "must be 32bit offset (jmp)"); 1442 emit_data(disp, rspec.reloc(), call32_operand); 1443 } 1444 1445 void Assembler::jmpb(Label& L) { 1446 if (L.is_bound()) { 1447 const int short_size = 2; 1448 address entry = target(L); 1449 assert(is8bit((entry - _code_pos) + short_size), 1450 "Dispacement too large for a short jmp"); 1451 assert(entry != NULL, "jmp most probably wrong"); 1452 intptr_t offs = entry - _code_pos; 1453 emit_byte(0xEB); 1454 emit_byte((offs - short_size) & 0xFF); 1455 } else { 1456 InstructionMark im(this); 1457 L.add_patch_at(code(), locator()); 1458 emit_byte(0xEB); 1459 emit_byte(0); 1460 } 1461 } 1462 1463 void Assembler::ldmxcsr( Address src) { 1464 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1465 InstructionMark im(this); 1466 prefix(src); 1467 emit_byte(0x0F); 1468 emit_byte(0xAE); 1469 emit_operand(as_Register(2), src); 1470 } 1471 1472 void Assembler::leal(Register dst, Address src) { 1473 InstructionMark im(this); 1474 #ifdef _LP64 1475 emit_byte(0x67); // addr32 1476 prefix(src, dst); 1477 #endif // LP64 1478 emit_byte(0x8D); 1479 emit_operand(dst, src); 1480 } 1481 1482 void Assembler::lock() { 1483 if (Atomics & 1) { 1484 // Emit either nothing, a NOP, or a NOP: prefix 1485 emit_byte(0x90) ; 1486 } else { 1487 emit_byte(0xF0); 1488 } 1489 } 1490 1491 void Assembler::lzcntl(Register dst, Register src) { 1492 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 1493 emit_byte(0xF3); 1494 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1495 emit_byte(0x0F); 1496 emit_byte(0xBD); 1497 emit_byte(0xC0 | encode); 1498 } 1499 1500 // Emit mfence instruction 1501 void Assembler::mfence() { 1502 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) 1503 emit_byte( 0x0F ); 1504 emit_byte( 0xAE ); 1505 emit_byte( 0xF0 ); 1506 } 1507 1508 void Assembler::mov(Register dst, Register src) { 1509 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 1510 } 1511 1512 void Assembler::movapd(XMMRegister dst, XMMRegister src) { 1513 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1514 int dstenc = dst->encoding(); 1515 int srcenc = src->encoding(); 1516 emit_byte(0x66); 1517 if (dstenc < 8) { 1518 if (srcenc >= 8) { 1519 prefix(REX_B); 1520 srcenc -= 8; 1521 } 1522 } else { 1523 if (srcenc < 8) { 1524 prefix(REX_R); 1525 } else { 1526 prefix(REX_RB); 1527 srcenc -= 8; 1528 } 1529 dstenc -= 8; 1530 } 1531 emit_byte(0x0F); 1532 emit_byte(0x28); 1533 emit_byte(0xC0 | dstenc << 3 | srcenc); 1534 } 1535 1536 void Assembler::movaps(XMMRegister dst, XMMRegister src) { 1537 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1538 int dstenc = dst->encoding(); 1539 int srcenc = src->encoding(); 1540 if (dstenc < 8) { 1541 if (srcenc >= 8) { 1542 prefix(REX_B); 1543 srcenc -= 8; 1544 } 1545 } else { 1546 if (srcenc < 8) { 1547 prefix(REX_R); 1548 } else { 1549 prefix(REX_RB); 1550 srcenc -= 8; 1551 } 1552 dstenc -= 8; 1553 } 1554 emit_byte(0x0F); 1555 emit_byte(0x28); 1556 emit_byte(0xC0 | dstenc << 3 | srcenc); 1557 } 1558 1559 void Assembler::movb(Register dst, Address src) { 1560 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 1561 InstructionMark im(this); 1562 prefix(src, dst, true); 1563 emit_byte(0x8A); 1564 emit_operand(dst, src); 1565 } 1566 1567 1568 void Assembler::movb(Address dst, int imm8) { 1569 InstructionMark im(this); 1570 prefix(dst); 1571 emit_byte(0xC6); 1572 emit_operand(rax, dst, 1); 1573 emit_byte(imm8); 1574 } 1575 1576 1577 void Assembler::movb(Address dst, Register src) { 1578 assert(src->has_byte_register(), "must have byte register"); 1579 InstructionMark im(this); 1580 prefix(dst, src, true); 1581 emit_byte(0x88); 1582 emit_operand(src, dst); 1583 } 1584 1585 void Assembler::movdl(XMMRegister dst, Register src) { 1586 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1587 emit_byte(0x66); 1588 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1589 emit_byte(0x0F); 1590 emit_byte(0x6E); 1591 emit_byte(0xC0 | encode); 1592 } 1593 1594 void Assembler::movdl(Register dst, XMMRegister src) { 1595 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1596 emit_byte(0x66); 1597 // swap src/dst to get correct prefix 1598 int encode = prefix_and_encode(src->encoding(), dst->encoding()); 1599 emit_byte(0x0F); 1600 emit_byte(0x7E); 1601 emit_byte(0xC0 | encode); 1602 } 1603 1604 void Assembler::movdl(XMMRegister dst, Address src) { 1605 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1606 InstructionMark im(this); 1607 emit_byte(0x66); 1608 prefix(src, dst); 1609 emit_byte(0x0F); 1610 emit_byte(0x6E); 1611 emit_operand(dst, src); 1612 } 1613 1614 1615 void Assembler::movdqa(XMMRegister dst, Address src) { 1616 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1617 InstructionMark im(this); 1618 emit_byte(0x66); 1619 prefix(src, dst); 1620 emit_byte(0x0F); 1621 emit_byte(0x6F); 1622 emit_operand(dst, src); 1623 } 1624 1625 void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 1626 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1627 emit_byte(0x66); 1628 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 1629 emit_byte(0x0F); 1630 emit_byte(0x6F); 1631 emit_byte(0xC0 | encode); 1632 } 1633 1634 void Assembler::movdqa(Address dst, XMMRegister src) { 1635 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1636 InstructionMark im(this); 1637 emit_byte(0x66); 1638 prefix(dst, src); 1639 emit_byte(0x0F); 1640 emit_byte(0x7F); 1641 emit_operand(src, dst); 1642 } 1643 1644 void Assembler::movdqu(XMMRegister dst, Address src) { 1645 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1646 InstructionMark im(this); 1647 emit_byte(0xF3); 1648 prefix(src, dst); 1649 emit_byte(0x0F); 1650 emit_byte(0x6F); 1651 emit_operand(dst, src); 1652 } 1653 1654 void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 1655 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1656 emit_byte(0xF3); 1657 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 1658 emit_byte(0x0F); 1659 emit_byte(0x6F); 1660 emit_byte(0xC0 | encode); 1661 } 1662 1663 void Assembler::movdqu(Address dst, XMMRegister src) { 1664 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1665 InstructionMark im(this); 1666 emit_byte(0xF3); 1667 prefix(dst, src); 1668 emit_byte(0x0F); 1669 emit_byte(0x7F); 1670 emit_operand(src, dst); 1671 } 1672 1673 // Uses zero extension on 64bit 1674 1675 void Assembler::movl(Register dst, int32_t imm32) { 1676 int encode = prefix_and_encode(dst->encoding()); 1677 emit_byte(0xB8 | encode); 1678 emit_long(imm32); 1679 } 1680 1681 void Assembler::movl(Register dst, Register src) { 1682 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1683 emit_byte(0x8B); 1684 emit_byte(0xC0 | encode); 1685 } 1686 1687 void Assembler::movl(Register dst, Address src) { 1688 InstructionMark im(this); 1689 prefix(src, dst); 1690 emit_byte(0x8B); 1691 emit_operand(dst, src); 1692 } 1693 1694 void Assembler::movl(Address dst, int32_t imm32) { 1695 InstructionMark im(this); 1696 prefix(dst); 1697 emit_byte(0xC7); 1698 emit_operand(rax, dst, 4); 1699 emit_long(imm32); 1700 } 1701 1702 void Assembler::movl(Address dst, Register src) { 1703 InstructionMark im(this); 1704 prefix(dst, src); 1705 emit_byte(0x89); 1706 emit_operand(src, dst); 1707 } 1708 1709 // New cpus require to use movsd and movss to avoid partial register stall 1710 // when loading from memory. But for old Opteron use movlpd instead of movsd. 1711 // The selection is done in MacroAssembler::movdbl() and movflt(). 1712 void Assembler::movlpd(XMMRegister dst, Address src) { 1713 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1714 InstructionMark im(this); 1715 emit_byte(0x66); 1716 prefix(src, dst); 1717 emit_byte(0x0F); 1718 emit_byte(0x12); 1719 emit_operand(dst, src); 1720 } 1721 1722 void Assembler::movq( MMXRegister dst, Address src ) { 1723 assert( VM_Version::supports_mmx(), "" ); 1724 emit_byte(0x0F); 1725 emit_byte(0x6F); 1726 emit_operand(dst, src); 1727 } 1728 1729 void Assembler::movq( Address dst, MMXRegister src ) { 1730 assert( VM_Version::supports_mmx(), "" ); 1731 emit_byte(0x0F); 1732 emit_byte(0x7F); 1733 // workaround gcc (3.2.1-7a) bug 1734 // In that version of gcc with only an emit_operand(MMX, Address) 1735 // gcc will tail jump and try and reverse the parameters completely 1736 // obliterating dst in the process. By having a version available 1737 // that doesn't need to swap the args at the tail jump the bug is 1738 // avoided. 1739 emit_operand(dst, src); 1740 } 1741 1742 void Assembler::movq(XMMRegister dst, Address src) { 1743 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1744 InstructionMark im(this); 1745 emit_byte(0xF3); 1746 prefix(src, dst); 1747 emit_byte(0x0F); 1748 emit_byte(0x7E); 1749 emit_operand(dst, src); 1750 } 1751 1752 void Assembler::movq(Address dst, XMMRegister src) { 1753 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1754 InstructionMark im(this); 1755 emit_byte(0x66); 1756 prefix(dst, src); 1757 emit_byte(0x0F); 1758 emit_byte(0xD6); 1759 emit_operand(src, dst); 1760 } 1761 1762 void Assembler::movsbl(Register dst, Address src) { // movsxb 1763 InstructionMark im(this); 1764 prefix(src, dst); 1765 emit_byte(0x0F); 1766 emit_byte(0xBE); 1767 emit_operand(dst, src); 1768 } 1769 1770 void Assembler::movsbl(Register dst, Register src) { // movsxb 1771 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1772 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1773 emit_byte(0x0F); 1774 emit_byte(0xBE); 1775 emit_byte(0xC0 | encode); 1776 } 1777 1778 void Assembler::movsd(XMMRegister dst, XMMRegister src) { 1779 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1780 emit_byte(0xF2); 1781 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1782 emit_byte(0x0F); 1783 emit_byte(0x10); 1784 emit_byte(0xC0 | encode); 1785 } 1786 1787 void Assembler::movsd(XMMRegister dst, Address src) { 1788 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1789 InstructionMark im(this); 1790 emit_byte(0xF2); 1791 prefix(src, dst); 1792 emit_byte(0x0F); 1793 emit_byte(0x10); 1794 emit_operand(dst, src); 1795 } 1796 1797 void Assembler::movsd(Address dst, XMMRegister src) { 1798 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1799 InstructionMark im(this); 1800 emit_byte(0xF2); 1801 prefix(dst, src); 1802 emit_byte(0x0F); 1803 emit_byte(0x11); 1804 emit_operand(src, dst); 1805 } 1806 1807 void Assembler::movss(XMMRegister dst, XMMRegister src) { 1808 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1809 emit_byte(0xF3); 1810 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1811 emit_byte(0x0F); 1812 emit_byte(0x10); 1813 emit_byte(0xC0 | encode); 1814 } 1815 1816 void Assembler::movss(XMMRegister dst, Address src) { 1817 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1818 InstructionMark im(this); 1819 emit_byte(0xF3); 1820 prefix(src, dst); 1821 emit_byte(0x0F); 1822 emit_byte(0x10); 1823 emit_operand(dst, src); 1824 } 1825 1826 void Assembler::movss(Address dst, XMMRegister src) { 1827 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1828 InstructionMark im(this); 1829 emit_byte(0xF3); 1830 prefix(dst, src); 1831 emit_byte(0x0F); 1832 emit_byte(0x11); 1833 emit_operand(src, dst); 1834 } 1835 1836 void Assembler::movswl(Register dst, Address src) { // movsxw 1837 InstructionMark im(this); 1838 prefix(src, dst); 1839 emit_byte(0x0F); 1840 emit_byte(0xBF); 1841 emit_operand(dst, src); 1842 } 1843 1844 void Assembler::movswl(Register dst, Register src) { // movsxw 1845 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1846 emit_byte(0x0F); 1847 emit_byte(0xBF); 1848 emit_byte(0xC0 | encode); 1849 } 1850 1851 void Assembler::movw(Address dst, int imm16) { 1852 InstructionMark im(this); 1853 1854 emit_byte(0x66); // switch to 16-bit mode 1855 prefix(dst); 1856 emit_byte(0xC7); 1857 emit_operand(rax, dst, 2); 1858 emit_word(imm16); 1859 } 1860 1861 void Assembler::movw(Register dst, Address src) { 1862 InstructionMark im(this); 1863 emit_byte(0x66); 1864 prefix(src, dst); 1865 emit_byte(0x8B); 1866 emit_operand(dst, src); 1867 } 1868 1869 void Assembler::movw(Address dst, Register src) { 1870 InstructionMark im(this); 1871 emit_byte(0x66); 1872 prefix(dst, src); 1873 emit_byte(0x89); 1874 emit_operand(src, dst); 1875 } 1876 1877 void Assembler::movzbl(Register dst, Address src) { // movzxb 1878 InstructionMark im(this); 1879 prefix(src, dst); 1880 emit_byte(0x0F); 1881 emit_byte(0xB6); 1882 emit_operand(dst, src); 1883 } 1884 1885 void Assembler::movzbl(Register dst, Register src) { // movzxb 1886 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1887 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1888 emit_byte(0x0F); 1889 emit_byte(0xB6); 1890 emit_byte(0xC0 | encode); 1891 } 1892 1893 void Assembler::movzwl(Register dst, Address src) { // movzxw 1894 InstructionMark im(this); 1895 prefix(src, dst); 1896 emit_byte(0x0F); 1897 emit_byte(0xB7); 1898 emit_operand(dst, src); 1899 } 1900 1901 void Assembler::movzwl(Register dst, Register src) { // movzxw 1902 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1903 emit_byte(0x0F); 1904 emit_byte(0xB7); 1905 emit_byte(0xC0 | encode); 1906 } 1907 1908 void Assembler::mull(Address src) { 1909 InstructionMark im(this); 1910 prefix(src); 1911 emit_byte(0xF7); 1912 emit_operand(rsp, src); 1913 } 1914 1915 void Assembler::mull(Register src) { 1916 int encode = prefix_and_encode(src->encoding()); 1917 emit_byte(0xF7); 1918 emit_byte(0xE0 | encode); 1919 } 1920 1921 void Assembler::mulsd(XMMRegister dst, Address src) { 1922 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1923 InstructionMark im(this); 1924 emit_byte(0xF2); 1925 prefix(src, dst); 1926 emit_byte(0x0F); 1927 emit_byte(0x59); 1928 emit_operand(dst, src); 1929 } 1930 1931 void Assembler::mulsd(XMMRegister dst, XMMRegister src) { 1932 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1933 emit_byte(0xF2); 1934 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1935 emit_byte(0x0F); 1936 emit_byte(0x59); 1937 emit_byte(0xC0 | encode); 1938 } 1939 1940 void Assembler::mulss(XMMRegister dst, Address src) { 1941 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1942 InstructionMark im(this); 1943 emit_byte(0xF3); 1944 prefix(src, dst); 1945 emit_byte(0x0F); 1946 emit_byte(0x59); 1947 emit_operand(dst, src); 1948 } 1949 1950 void Assembler::mulss(XMMRegister dst, XMMRegister src) { 1951 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1952 emit_byte(0xF3); 1953 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1954 emit_byte(0x0F); 1955 emit_byte(0x59); 1956 emit_byte(0xC0 | encode); 1957 } 1958 1959 void Assembler::negl(Register dst) { 1960 int encode = prefix_and_encode(dst->encoding()); 1961 emit_byte(0xF7); 1962 emit_byte(0xD8 | encode); 1963 } 1964 1965 void Assembler::nop(int i) { 1966 #ifdef ASSERT 1967 assert(i > 0, " "); 1968 // The fancy nops aren't currently recognized by debuggers making it a 1969 // pain to disassemble code while debugging. If asserts are on clearly 1970 // speed is not an issue so simply use the single byte traditional nop 1971 // to do alignment. 1972 1973 for (; i > 0 ; i--) emit_byte(0x90); 1974 return; 1975 1976 #endif // ASSERT 1977 1978 if (UseAddressNop && VM_Version::is_intel()) { 1979 // 1980 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 1981 // 1: 0x90 1982 // 2: 0x66 0x90 1983 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 1984 // 4: 0x0F 0x1F 0x40 0x00 1985 // 5: 0x0F 0x1F 0x44 0x00 0x00 1986 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 1987 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1988 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1989 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1990 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1991 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1992 1993 // The rest coding is Intel specific - don't use consecutive address nops 1994 1995 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1996 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1997 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1998 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1999 2000 while(i >= 15) { 2001 // For Intel don't generate consecutive addess nops (mix with regular nops) 2002 i -= 15; 2003 emit_byte(0x66); // size prefix 2004 emit_byte(0x66); // size prefix 2005 emit_byte(0x66); // size prefix 2006 addr_nop_8(); 2007 emit_byte(0x66); // size prefix 2008 emit_byte(0x66); // size prefix 2009 emit_byte(0x66); // size prefix 2010 emit_byte(0x90); // nop 2011 } 2012 switch (i) { 2013 case 14: 2014 emit_byte(0x66); // size prefix 2015 case 13: 2016 emit_byte(0x66); // size prefix 2017 case 12: 2018 addr_nop_8(); 2019 emit_byte(0x66); // size prefix 2020 emit_byte(0x66); // size prefix 2021 emit_byte(0x66); // size prefix 2022 emit_byte(0x90); // nop 2023 break; 2024 case 11: 2025 emit_byte(0x66); // size prefix 2026 case 10: 2027 emit_byte(0x66); // size prefix 2028 case 9: 2029 emit_byte(0x66); // size prefix 2030 case 8: 2031 addr_nop_8(); 2032 break; 2033 case 7: 2034 addr_nop_7(); 2035 break; 2036 case 6: 2037 emit_byte(0x66); // size prefix 2038 case 5: 2039 addr_nop_5(); 2040 break; 2041 case 4: 2042 addr_nop_4(); 2043 break; 2044 case 3: 2045 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2046 emit_byte(0x66); // size prefix 2047 case 2: 2048 emit_byte(0x66); // size prefix 2049 case 1: 2050 emit_byte(0x90); // nop 2051 break; 2052 default: 2053 assert(i == 0, " "); 2054 } 2055 return; 2056 } 2057 if (UseAddressNop && VM_Version::is_amd()) { 2058 // 2059 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD. 2060 // 1: 0x90 2061 // 2: 0x66 0x90 2062 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2063 // 4: 0x0F 0x1F 0x40 0x00 2064 // 5: 0x0F 0x1F 0x44 0x00 0x00 2065 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2066 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2067 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2068 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2069 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2070 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2071 2072 // The rest coding is AMD specific - use consecutive address nops 2073 2074 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2075 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2076 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2077 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2078 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2079 // Size prefixes (0x66) are added for larger sizes 2080 2081 while(i >= 22) { 2082 i -= 11; 2083 emit_byte(0x66); // size prefix 2084 emit_byte(0x66); // size prefix 2085 emit_byte(0x66); // size prefix 2086 addr_nop_8(); 2087 } 2088 // Generate first nop for size between 21-12 2089 switch (i) { 2090 case 21: 2091 i -= 1; 2092 emit_byte(0x66); // size prefix 2093 case 20: 2094 case 19: 2095 i -= 1; 2096 emit_byte(0x66); // size prefix 2097 case 18: 2098 case 17: 2099 i -= 1; 2100 emit_byte(0x66); // size prefix 2101 case 16: 2102 case 15: 2103 i -= 8; 2104 addr_nop_8(); 2105 break; 2106 case 14: 2107 case 13: 2108 i -= 7; 2109 addr_nop_7(); 2110 break; 2111 case 12: 2112 i -= 6; 2113 emit_byte(0x66); // size prefix 2114 addr_nop_5(); 2115 break; 2116 default: 2117 assert(i < 12, " "); 2118 } 2119 2120 // Generate second nop for size between 11-1 2121 switch (i) { 2122 case 11: 2123 emit_byte(0x66); // size prefix 2124 case 10: 2125 emit_byte(0x66); // size prefix 2126 case 9: 2127 emit_byte(0x66); // size prefix 2128 case 8: 2129 addr_nop_8(); 2130 break; 2131 case 7: 2132 addr_nop_7(); 2133 break; 2134 case 6: 2135 emit_byte(0x66); // size prefix 2136 case 5: 2137 addr_nop_5(); 2138 break; 2139 case 4: 2140 addr_nop_4(); 2141 break; 2142 case 3: 2143 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2144 emit_byte(0x66); // size prefix 2145 case 2: 2146 emit_byte(0x66); // size prefix 2147 case 1: 2148 emit_byte(0x90); // nop 2149 break; 2150 default: 2151 assert(i == 0, " "); 2152 } 2153 return; 2154 } 2155 2156 // Using nops with size prefixes "0x66 0x90". 2157 // From AMD Optimization Guide: 2158 // 1: 0x90 2159 // 2: 0x66 0x90 2160 // 3: 0x66 0x66 0x90 2161 // 4: 0x66 0x66 0x66 0x90 2162 // 5: 0x66 0x66 0x90 0x66 0x90 2163 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2164 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2165 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2166 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2167 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2168 // 2169 while(i > 12) { 2170 i -= 4; 2171 emit_byte(0x66); // size prefix 2172 emit_byte(0x66); 2173 emit_byte(0x66); 2174 emit_byte(0x90); // nop 2175 } 2176 // 1 - 12 nops 2177 if(i > 8) { 2178 if(i > 9) { 2179 i -= 1; 2180 emit_byte(0x66); 2181 } 2182 i -= 3; 2183 emit_byte(0x66); 2184 emit_byte(0x66); 2185 emit_byte(0x90); 2186 } 2187 // 1 - 8 nops 2188 if(i > 4) { 2189 if(i > 6) { 2190 i -= 1; 2191 emit_byte(0x66); 2192 } 2193 i -= 3; 2194 emit_byte(0x66); 2195 emit_byte(0x66); 2196 emit_byte(0x90); 2197 } 2198 switch (i) { 2199 case 4: 2200 emit_byte(0x66); 2201 case 3: 2202 emit_byte(0x66); 2203 case 2: 2204 emit_byte(0x66); 2205 case 1: 2206 emit_byte(0x90); 2207 break; 2208 default: 2209 assert(i == 0, " "); 2210 } 2211 } 2212 2213 void Assembler::notl(Register dst) { 2214 int encode = prefix_and_encode(dst->encoding()); 2215 emit_byte(0xF7); 2216 emit_byte(0xD0 | encode ); 2217 } 2218 2219 void Assembler::orl(Address dst, int32_t imm32) { 2220 InstructionMark im(this); 2221 prefix(dst); 2222 emit_arith_operand(0x81, rcx, dst, imm32); 2223 } 2224 2225 void Assembler::orl(Register dst, int32_t imm32) { 2226 prefix(dst); 2227 emit_arith(0x81, 0xC8, dst, imm32); 2228 } 2229 2230 void Assembler::orl(Register dst, Address src) { 2231 InstructionMark im(this); 2232 prefix(src, dst); 2233 emit_byte(0x0B); 2234 emit_operand(dst, src); 2235 } 2236 2237 void Assembler::orl(Register dst, Register src) { 2238 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2239 emit_arith(0x0B, 0xC0, dst, src); 2240 } 2241 2242 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { 2243 assert(VM_Version::supports_sse4_2(), ""); 2244 2245 InstructionMark im(this); 2246 emit_byte(0x66); 2247 prefix(src, dst); 2248 emit_byte(0x0F); 2249 emit_byte(0x3A); 2250 emit_byte(0x61); 2251 emit_operand(dst, src); 2252 emit_byte(imm8); 2253 } 2254 2255 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { 2256 assert(VM_Version::supports_sse4_2(), ""); 2257 2258 emit_byte(0x66); 2259 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 2260 emit_byte(0x0F); 2261 emit_byte(0x3A); 2262 emit_byte(0x61); 2263 emit_byte(0xC0 | encode); 2264 emit_byte(imm8); 2265 } 2266 2267 // generic 2268 void Assembler::pop(Register dst) { 2269 int encode = prefix_and_encode(dst->encoding()); 2270 emit_byte(0x58 | encode); 2271 } 2272 2273 void Assembler::popcntl(Register dst, Address src) { 2274 assert(VM_Version::supports_popcnt(), "must support"); 2275 InstructionMark im(this); 2276 emit_byte(0xF3); 2277 prefix(src, dst); 2278 emit_byte(0x0F); 2279 emit_byte(0xB8); 2280 emit_operand(dst, src); 2281 } 2282 2283 void Assembler::popcntl(Register dst, Register src) { 2284 assert(VM_Version::supports_popcnt(), "must support"); 2285 emit_byte(0xF3); 2286 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2287 emit_byte(0x0F); 2288 emit_byte(0xB8); 2289 emit_byte(0xC0 | encode); 2290 } 2291 2292 void Assembler::popf() { 2293 emit_byte(0x9D); 2294 } 2295 2296 #ifndef _LP64 // no 32bit push/pop on amd64 2297 void Assembler::popl(Address dst) { 2298 // NOTE: this will adjust stack by 8byte on 64bits 2299 InstructionMark im(this); 2300 prefix(dst); 2301 emit_byte(0x8F); 2302 emit_operand(rax, dst); 2303 } 2304 #endif 2305 2306 void Assembler::prefetch_prefix(Address src) { 2307 prefix(src); 2308 emit_byte(0x0F); 2309 } 2310 2311 void Assembler::prefetchnta(Address src) { 2312 NOT_LP64(assert(VM_Version::supports_sse2(), "must support")); 2313 InstructionMark im(this); 2314 prefetch_prefix(src); 2315 emit_byte(0x18); 2316 emit_operand(rax, src); // 0, src 2317 } 2318 2319 void Assembler::prefetchr(Address src) { 2320 NOT_LP64(assert(VM_Version::supports_3dnow_prefetch(), "must support")); 2321 InstructionMark im(this); 2322 prefetch_prefix(src); 2323 emit_byte(0x0D); 2324 emit_operand(rax, src); // 0, src 2325 } 2326 2327 void Assembler::prefetcht0(Address src) { 2328 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2329 InstructionMark im(this); 2330 prefetch_prefix(src); 2331 emit_byte(0x18); 2332 emit_operand(rcx, src); // 1, src 2333 } 2334 2335 void Assembler::prefetcht1(Address src) { 2336 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2337 InstructionMark im(this); 2338 prefetch_prefix(src); 2339 emit_byte(0x18); 2340 emit_operand(rdx, src); // 2, src 2341 } 2342 2343 void Assembler::prefetcht2(Address src) { 2344 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2345 InstructionMark im(this); 2346 prefetch_prefix(src); 2347 emit_byte(0x18); 2348 emit_operand(rbx, src); // 3, src 2349 } 2350 2351 void Assembler::prefetchw(Address src) { 2352 NOT_LP64(assert(VM_Version::supports_3dnow_prefetch(), "must support")); 2353 InstructionMark im(this); 2354 prefetch_prefix(src); 2355 emit_byte(0x0D); 2356 emit_operand(rcx, src); // 1, src 2357 } 2358 2359 void Assembler::prefix(Prefix p) { 2360 a_byte(p); 2361 } 2362 2363 void Assembler::por(XMMRegister dst, XMMRegister src) { 2364 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2365 2366 emit_byte(0x66); 2367 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2368 emit_byte(0x0F); 2369 2370 emit_byte(0xEB); 2371 emit_byte(0xC0 | encode); 2372 } 2373 2374 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 2375 assert(isByte(mode), "invalid value"); 2376 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2377 2378 emit_byte(0x66); 2379 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2380 emit_byte(0x0F); 2381 emit_byte(0x70); 2382 emit_byte(0xC0 | encode); 2383 emit_byte(mode & 0xFF); 2384 2385 } 2386 2387 void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 2388 assert(isByte(mode), "invalid value"); 2389 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2390 2391 InstructionMark im(this); 2392 emit_byte(0x66); 2393 prefix(src, dst); 2394 emit_byte(0x0F); 2395 emit_byte(0x70); 2396 emit_operand(dst, src); 2397 emit_byte(mode & 0xFF); 2398 } 2399 2400 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 2401 assert(isByte(mode), "invalid value"); 2402 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2403 2404 emit_byte(0xF2); 2405 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2406 emit_byte(0x0F); 2407 emit_byte(0x70); 2408 emit_byte(0xC0 | encode); 2409 emit_byte(mode & 0xFF); 2410 } 2411 2412 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 2413 assert(isByte(mode), "invalid value"); 2414 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2415 2416 InstructionMark im(this); 2417 emit_byte(0xF2); 2418 prefix(src, dst); // QQ new 2419 emit_byte(0x0F); 2420 emit_byte(0x70); 2421 emit_operand(dst, src); 2422 emit_byte(mode & 0xFF); 2423 } 2424 2425 void Assembler::psrlq(XMMRegister dst, int shift) { 2426 // Shift 64 bit value logically right by specified number of bits. 2427 // HMM Table D-1 says sse2 or mmx. 2428 // Do not confuse it with psrldq SSE2 instruction which 2429 // shifts 128 bit value in xmm register by number of bytes. 2430 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2431 2432 int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding()); 2433 emit_byte(0x66); 2434 emit_byte(0x0F); 2435 emit_byte(0x73); 2436 emit_byte(0xC0 | encode); 2437 emit_byte(shift); 2438 } 2439 2440 void Assembler::psrldq(XMMRegister dst, int shift) { 2441 // Shift 128 bit value in xmm register by number of bytes. 2442 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2443 2444 int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding()); 2445 emit_byte(0x66); 2446 emit_byte(0x0F); 2447 emit_byte(0x73); 2448 emit_byte(0xC0 | encode); 2449 emit_byte(shift); 2450 } 2451 2452 void Assembler::ptest(XMMRegister dst, Address src) { 2453 assert(VM_Version::supports_sse4_1(), ""); 2454 2455 InstructionMark im(this); 2456 emit_byte(0x66); 2457 prefix(src, dst); 2458 emit_byte(0x0F); 2459 emit_byte(0x38); 2460 emit_byte(0x17); 2461 emit_operand(dst, src); 2462 } 2463 2464 void Assembler::ptest(XMMRegister dst, XMMRegister src) { 2465 assert(VM_Version::supports_sse4_1(), ""); 2466 2467 emit_byte(0x66); 2468 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 2469 emit_byte(0x0F); 2470 emit_byte(0x38); 2471 emit_byte(0x17); 2472 emit_byte(0xC0 | encode); 2473 } 2474 2475 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 2476 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2477 emit_byte(0x66); 2478 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2479 emit_byte(0x0F); 2480 emit_byte(0x60); 2481 emit_byte(0xC0 | encode); 2482 } 2483 2484 void Assembler::push(int32_t imm32) { 2485 // in 64bits we push 64bits onto the stack but only 2486 // take a 32bit immediate 2487 emit_byte(0x68); 2488 emit_long(imm32); 2489 } 2490 2491 void Assembler::push(Register src) { 2492 int encode = prefix_and_encode(src->encoding()); 2493 2494 emit_byte(0x50 | encode); 2495 } 2496 2497 void Assembler::pushf() { 2498 emit_byte(0x9C); 2499 } 2500 2501 #ifndef _LP64 // no 32bit push/pop on amd64 2502 void Assembler::pushl(Address src) { 2503 // Note this will push 64bit on 64bit 2504 InstructionMark im(this); 2505 prefix(src); 2506 emit_byte(0xFF); 2507 emit_operand(rsi, src); 2508 } 2509 #endif 2510 2511 void Assembler::pxor(XMMRegister dst, Address src) { 2512 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2513 InstructionMark im(this); 2514 emit_byte(0x66); 2515 prefix(src, dst); 2516 emit_byte(0x0F); 2517 emit_byte(0xEF); 2518 emit_operand(dst, src); 2519 } 2520 2521 void Assembler::pxor(XMMRegister dst, XMMRegister src) { 2522 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2523 InstructionMark im(this); 2524 emit_byte(0x66); 2525 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2526 emit_byte(0x0F); 2527 emit_byte(0xEF); 2528 emit_byte(0xC0 | encode); 2529 } 2530 2531 void Assembler::rcll(Register dst, int imm8) { 2532 assert(isShiftCount(imm8), "illegal shift count"); 2533 int encode = prefix_and_encode(dst->encoding()); 2534 if (imm8 == 1) { 2535 emit_byte(0xD1); 2536 emit_byte(0xD0 | encode); 2537 } else { 2538 emit_byte(0xC1); 2539 emit_byte(0xD0 | encode); 2540 emit_byte(imm8); 2541 } 2542 } 2543 2544 // copies data from [esi] to [edi] using rcx pointer sized words 2545 // generic 2546 void Assembler::rep_mov() { 2547 emit_byte(0xF3); 2548 // MOVSQ 2549 LP64_ONLY(prefix(REX_W)); 2550 emit_byte(0xA5); 2551 } 2552 2553 // sets rcx pointer sized words with rax, value at [edi] 2554 // generic 2555 void Assembler::rep_set() { // rep_set 2556 emit_byte(0xF3); 2557 // STOSQ 2558 LP64_ONLY(prefix(REX_W)); 2559 emit_byte(0xAB); 2560 } 2561 2562 // scans rcx pointer sized words at [edi] for occurance of rax, 2563 // generic 2564 void Assembler::repne_scan() { // repne_scan 2565 emit_byte(0xF2); 2566 // SCASQ 2567 LP64_ONLY(prefix(REX_W)); 2568 emit_byte(0xAF); 2569 } 2570 2571 #ifdef _LP64 2572 // scans rcx 4 byte words at [edi] for occurance of rax, 2573 // generic 2574 void Assembler::repne_scanl() { // repne_scan 2575 emit_byte(0xF2); 2576 // SCASL 2577 emit_byte(0xAF); 2578 } 2579 #endif 2580 2581 void Assembler::ret(int imm16) { 2582 if (imm16 == 0) { 2583 emit_byte(0xC3); 2584 } else { 2585 emit_byte(0xC2); 2586 emit_word(imm16); 2587 } 2588 } 2589 2590 void Assembler::sahf() { 2591 #ifdef _LP64 2592 // Not supported in 64bit mode 2593 ShouldNotReachHere(); 2594 #endif 2595 emit_byte(0x9E); 2596 } 2597 2598 void Assembler::sarl(Register dst, int imm8) { 2599 int encode = prefix_and_encode(dst->encoding()); 2600 assert(isShiftCount(imm8), "illegal shift count"); 2601 if (imm8 == 1) { 2602 emit_byte(0xD1); 2603 emit_byte(0xF8 | encode); 2604 } else { 2605 emit_byte(0xC1); 2606 emit_byte(0xF8 | encode); 2607 emit_byte(imm8); 2608 } 2609 } 2610 2611 void Assembler::sarl(Register dst) { 2612 int encode = prefix_and_encode(dst->encoding()); 2613 emit_byte(0xD3); 2614 emit_byte(0xF8 | encode); 2615 } 2616 2617 void Assembler::sbbl(Address dst, int32_t imm32) { 2618 InstructionMark im(this); 2619 prefix(dst); 2620 emit_arith_operand(0x81, rbx, dst, imm32); 2621 } 2622 2623 void Assembler::sbbl(Register dst, int32_t imm32) { 2624 prefix(dst); 2625 emit_arith(0x81, 0xD8, dst, imm32); 2626 } 2627 2628 2629 void Assembler::sbbl(Register dst, Address src) { 2630 InstructionMark im(this); 2631 prefix(src, dst); 2632 emit_byte(0x1B); 2633 emit_operand(dst, src); 2634 } 2635 2636 void Assembler::sbbl(Register dst, Register src) { 2637 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2638 emit_arith(0x1B, 0xC0, dst, src); 2639 } 2640 2641 void Assembler::setb(Condition cc, Register dst) { 2642 assert(0 <= cc && cc < 16, "illegal cc"); 2643 int encode = prefix_and_encode(dst->encoding(), true); 2644 emit_byte(0x0F); 2645 emit_byte(0x90 | cc); 2646 emit_byte(0xC0 | encode); 2647 } 2648 2649 void Assembler::shll(Register dst, int imm8) { 2650 assert(isShiftCount(imm8), "illegal shift count"); 2651 int encode = prefix_and_encode(dst->encoding()); 2652 if (imm8 == 1 ) { 2653 emit_byte(0xD1); 2654 emit_byte(0xE0 | encode); 2655 } else { 2656 emit_byte(0xC1); 2657 emit_byte(0xE0 | encode); 2658 emit_byte(imm8); 2659 } 2660 } 2661 2662 void Assembler::shll(Register dst) { 2663 int encode = prefix_and_encode(dst->encoding()); 2664 emit_byte(0xD3); 2665 emit_byte(0xE0 | encode); 2666 } 2667 2668 void Assembler::shrl(Register dst, int imm8) { 2669 assert(isShiftCount(imm8), "illegal shift count"); 2670 int encode = prefix_and_encode(dst->encoding()); 2671 emit_byte(0xC1); 2672 emit_byte(0xE8 | encode); 2673 emit_byte(imm8); 2674 } 2675 2676 void Assembler::shrl(Register dst) { 2677 int encode = prefix_and_encode(dst->encoding()); 2678 emit_byte(0xD3); 2679 emit_byte(0xE8 | encode); 2680 } 2681 2682 // copies a single word from [esi] to [edi] 2683 void Assembler::smovl() { 2684 emit_byte(0xA5); 2685 } 2686 2687 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { 2688 // HMM Table D-1 says sse2 2689 // NOT_LP64(assert(VM_Version::supports_sse(), "")); 2690 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2691 emit_byte(0xF2); 2692 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2693 emit_byte(0x0F); 2694 emit_byte(0x51); 2695 emit_byte(0xC0 | encode); 2696 } 2697 2698 void Assembler::sqrtsd(XMMRegister dst, Address src) { 2699 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2700 InstructionMark im(this); 2701 emit_byte(0xF2); 2702 prefix(src, dst); 2703 emit_byte(0x0F); 2704 emit_byte(0x51); 2705 emit_operand(dst, src); 2706 } 2707 2708 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { 2709 // HMM Table D-1 says sse2 2710 // NOT_LP64(assert(VM_Version::supports_sse(), "")); 2711 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2712 emit_byte(0xF3); 2713 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2714 emit_byte(0x0F); 2715 emit_byte(0x51); 2716 emit_byte(0xC0 | encode); 2717 } 2718 2719 void Assembler::sqrtss(XMMRegister dst, Address src) { 2720 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2721 InstructionMark im(this); 2722 emit_byte(0xF3); 2723 prefix(src, dst); 2724 emit_byte(0x0F); 2725 emit_byte(0x51); 2726 emit_operand(dst, src); 2727 } 2728 2729 void Assembler::stmxcsr( Address dst) { 2730 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2731 InstructionMark im(this); 2732 prefix(dst); 2733 emit_byte(0x0F); 2734 emit_byte(0xAE); 2735 emit_operand(as_Register(3), dst); 2736 } 2737 2738 void Assembler::subl(Address dst, int32_t imm32) { 2739 InstructionMark im(this); 2740 prefix(dst); 2741 emit_arith_operand(0x81, rbp, dst, imm32); 2742 } 2743 2744 void Assembler::subl(Address dst, Register src) { 2745 InstructionMark im(this); 2746 prefix(dst, src); 2747 emit_byte(0x29); 2748 emit_operand(src, dst); 2749 } 2750 2751 void Assembler::subl(Register dst, int32_t imm32) { 2752 prefix(dst); 2753 emit_arith(0x81, 0xE8, dst, imm32); 2754 } 2755 2756 void Assembler::subl(Register dst, Address src) { 2757 InstructionMark im(this); 2758 prefix(src, dst); 2759 emit_byte(0x2B); 2760 emit_operand(dst, src); 2761 } 2762 2763 void Assembler::subl(Register dst, Register src) { 2764 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2765 emit_arith(0x2B, 0xC0, dst, src); 2766 } 2767 2768 void Assembler::subsd(XMMRegister dst, XMMRegister src) { 2769 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2770 emit_byte(0xF2); 2771 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2772 emit_byte(0x0F); 2773 emit_byte(0x5C); 2774 emit_byte(0xC0 | encode); 2775 } 2776 2777 void Assembler::subsd(XMMRegister dst, Address src) { 2778 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2779 InstructionMark im(this); 2780 emit_byte(0xF2); 2781 prefix(src, dst); 2782 emit_byte(0x0F); 2783 emit_byte(0x5C); 2784 emit_operand(dst, src); 2785 } 2786 2787 void Assembler::subss(XMMRegister dst, XMMRegister src) { 2788 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2789 emit_byte(0xF3); 2790 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2791 emit_byte(0x0F); 2792 emit_byte(0x5C); 2793 emit_byte(0xC0 | encode); 2794 } 2795 2796 void Assembler::subss(XMMRegister dst, Address src) { 2797 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2798 InstructionMark im(this); 2799 emit_byte(0xF3); 2800 prefix(src, dst); 2801 emit_byte(0x0F); 2802 emit_byte(0x5C); 2803 emit_operand(dst, src); 2804 } 2805 2806 void Assembler::testb(Register dst, int imm8) { 2807 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 2808 (void) prefix_and_encode(dst->encoding(), true); 2809 emit_arith_b(0xF6, 0xC0, dst, imm8); 2810 } 2811 2812 void Assembler::testl(Register dst, int32_t imm32) { 2813 // not using emit_arith because test 2814 // doesn't support sign-extension of 2815 // 8bit operands 2816 int encode = dst->encoding(); 2817 if (encode == 0) { 2818 emit_byte(0xA9); 2819 } else { 2820 encode = prefix_and_encode(encode); 2821 emit_byte(0xF7); 2822 emit_byte(0xC0 | encode); 2823 } 2824 emit_long(imm32); 2825 } 2826 2827 void Assembler::testl(Register dst, Register src) { 2828 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2829 emit_arith(0x85, 0xC0, dst, src); 2830 } 2831 2832 void Assembler::testl(Register dst, Address src) { 2833 InstructionMark im(this); 2834 prefix(src, dst); 2835 emit_byte(0x85); 2836 emit_operand(dst, src); 2837 } 2838 2839 void Assembler::ucomisd(XMMRegister dst, Address src) { 2840 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2841 emit_byte(0x66); 2842 ucomiss(dst, src); 2843 } 2844 2845 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { 2846 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2847 emit_byte(0x66); 2848 ucomiss(dst, src); 2849 } 2850 2851 void Assembler::ucomiss(XMMRegister dst, Address src) { 2852 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2853 2854 InstructionMark im(this); 2855 prefix(src, dst); 2856 emit_byte(0x0F); 2857 emit_byte(0x2E); 2858 emit_operand(dst, src); 2859 } 2860 2861 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { 2862 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2863 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2864 emit_byte(0x0F); 2865 emit_byte(0x2E); 2866 emit_byte(0xC0 | encode); 2867 } 2868 2869 2870 void Assembler::xaddl(Address dst, Register src) { 2871 InstructionMark im(this); 2872 prefix(dst, src); 2873 emit_byte(0x0F); 2874 emit_byte(0xC1); 2875 emit_operand(src, dst); 2876 } 2877 2878 void Assembler::xchgl(Register dst, Address src) { // xchg 2879 InstructionMark im(this); 2880 prefix(src, dst); 2881 emit_byte(0x87); 2882 emit_operand(dst, src); 2883 } 2884 2885 void Assembler::xchgl(Register dst, Register src) { 2886 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2887 emit_byte(0x87); 2888 emit_byte(0xc0 | encode); 2889 } 2890 2891 void Assembler::xorl(Register dst, int32_t imm32) { 2892 prefix(dst); 2893 emit_arith(0x81, 0xF0, dst, imm32); 2894 } 2895 2896 void Assembler::xorl(Register dst, Address src) { 2897 InstructionMark im(this); 2898 prefix(src, dst); 2899 emit_byte(0x33); 2900 emit_operand(dst, src); 2901 } 2902 2903 void Assembler::xorl(Register dst, Register src) { 2904 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2905 emit_arith(0x33, 0xC0, dst, src); 2906 } 2907 2908 void Assembler::xorpd(XMMRegister dst, XMMRegister src) { 2909 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2910 emit_byte(0x66); 2911 xorps(dst, src); 2912 } 2913 2914 void Assembler::xorpd(XMMRegister dst, Address src) { 2915 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2916 InstructionMark im(this); 2917 emit_byte(0x66); 2918 prefix(src, dst); 2919 emit_byte(0x0F); 2920 emit_byte(0x57); 2921 emit_operand(dst, src); 2922 } 2923 2924 2925 void Assembler::xorps(XMMRegister dst, XMMRegister src) { 2926 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2927 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2928 emit_byte(0x0F); 2929 emit_byte(0x57); 2930 emit_byte(0xC0 | encode); 2931 } 2932 2933 void Assembler::xorps(XMMRegister dst, Address src) { 2934 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2935 InstructionMark im(this); 2936 prefix(src, dst); 2937 emit_byte(0x0F); 2938 emit_byte(0x57); 2939 emit_operand(dst, src); 2940 } 2941 2942 #ifndef _LP64 2943 // 32bit only pieces of the assembler 2944 2945 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { 2946 // NO PREFIX AS NEVER 64BIT 2947 InstructionMark im(this); 2948 emit_byte(0x81); 2949 emit_byte(0xF8 | src1->encoding()); 2950 emit_data(imm32, rspec, 0); 2951 } 2952 2953 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { 2954 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs 2955 InstructionMark im(this); 2956 emit_byte(0x81); 2957 emit_operand(rdi, src1); 2958 emit_data(imm32, rspec, 0); 2959 } 2960 2961 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax, 2962 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded 2963 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. 2964 void Assembler::cmpxchg8(Address adr) { 2965 InstructionMark im(this); 2966 emit_byte(0x0F); 2967 emit_byte(0xc7); 2968 emit_operand(rcx, adr); 2969 } 2970 2971 void Assembler::decl(Register dst) { 2972 // Don't use it directly. Use MacroAssembler::decrementl() instead. 2973 emit_byte(0x48 | dst->encoding()); 2974 } 2975 2976 #endif // _LP64 2977 2978 // 64bit typically doesn't use the x87 but needs to for the trig funcs 2979 2980 void Assembler::fabs() { 2981 emit_byte(0xD9); 2982 emit_byte(0xE1); 2983 } 2984 2985 void Assembler::fadd(int i) { 2986 emit_farith(0xD8, 0xC0, i); 2987 } 2988 2989 void Assembler::fadd_d(Address src) { 2990 InstructionMark im(this); 2991 emit_byte(0xDC); 2992 emit_operand32(rax, src); 2993 } 2994 2995 void Assembler::fadd_s(Address src) { 2996 InstructionMark im(this); 2997 emit_byte(0xD8); 2998 emit_operand32(rax, src); 2999 } 3000 3001 void Assembler::fadda(int i) { 3002 emit_farith(0xDC, 0xC0, i); 3003 } 3004 3005 void Assembler::faddp(int i) { 3006 emit_farith(0xDE, 0xC0, i); 3007 } 3008 3009 void Assembler::fchs() { 3010 emit_byte(0xD9); 3011 emit_byte(0xE0); 3012 } 3013 3014 void Assembler::fcom(int i) { 3015 emit_farith(0xD8, 0xD0, i); 3016 } 3017 3018 void Assembler::fcomp(int i) { 3019 emit_farith(0xD8, 0xD8, i); 3020 } 3021 3022 void Assembler::fcomp_d(Address src) { 3023 InstructionMark im(this); 3024 emit_byte(0xDC); 3025 emit_operand32(rbx, src); 3026 } 3027 3028 void Assembler::fcomp_s(Address src) { 3029 InstructionMark im(this); 3030 emit_byte(0xD8); 3031 emit_operand32(rbx, src); 3032 } 3033 3034 void Assembler::fcompp() { 3035 emit_byte(0xDE); 3036 emit_byte(0xD9); 3037 } 3038 3039 void Assembler::fcos() { 3040 emit_byte(0xD9); 3041 emit_byte(0xFF); 3042 } 3043 3044 void Assembler::fdecstp() { 3045 emit_byte(0xD9); 3046 emit_byte(0xF6); 3047 } 3048 3049 void Assembler::fdiv(int i) { 3050 emit_farith(0xD8, 0xF0, i); 3051 } 3052 3053 void Assembler::fdiv_d(Address src) { 3054 InstructionMark im(this); 3055 emit_byte(0xDC); 3056 emit_operand32(rsi, src); 3057 } 3058 3059 void Assembler::fdiv_s(Address src) { 3060 InstructionMark im(this); 3061 emit_byte(0xD8); 3062 emit_operand32(rsi, src); 3063 } 3064 3065 void Assembler::fdiva(int i) { 3066 emit_farith(0xDC, 0xF8, i); 3067 } 3068 3069 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) 3070 // is erroneous for some of the floating-point instructions below. 3071 3072 void Assembler::fdivp(int i) { 3073 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) 3074 } 3075 3076 void Assembler::fdivr(int i) { 3077 emit_farith(0xD8, 0xF8, i); 3078 } 3079 3080 void Assembler::fdivr_d(Address src) { 3081 InstructionMark im(this); 3082 emit_byte(0xDC); 3083 emit_operand32(rdi, src); 3084 } 3085 3086 void Assembler::fdivr_s(Address src) { 3087 InstructionMark im(this); 3088 emit_byte(0xD8); 3089 emit_operand32(rdi, src); 3090 } 3091 3092 void Assembler::fdivra(int i) { 3093 emit_farith(0xDC, 0xF0, i); 3094 } 3095 3096 void Assembler::fdivrp(int i) { 3097 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) 3098 } 3099 3100 void Assembler::ffree(int i) { 3101 emit_farith(0xDD, 0xC0, i); 3102 } 3103 3104 void Assembler::fild_d(Address adr) { 3105 InstructionMark im(this); 3106 emit_byte(0xDF); 3107 emit_operand32(rbp, adr); 3108 } 3109 3110 void Assembler::fild_s(Address adr) { 3111 InstructionMark im(this); 3112 emit_byte(0xDB); 3113 emit_operand32(rax, adr); 3114 } 3115 3116 void Assembler::fincstp() { 3117 emit_byte(0xD9); 3118 emit_byte(0xF7); 3119 } 3120 3121 void Assembler::finit() { 3122 emit_byte(0x9B); 3123 emit_byte(0xDB); 3124 emit_byte(0xE3); 3125 } 3126 3127 void Assembler::fist_s(Address adr) { 3128 InstructionMark im(this); 3129 emit_byte(0xDB); 3130 emit_operand32(rdx, adr); 3131 } 3132 3133 void Assembler::fistp_d(Address adr) { 3134 InstructionMark im(this); 3135 emit_byte(0xDF); 3136 emit_operand32(rdi, adr); 3137 } 3138 3139 void Assembler::fistp_s(Address adr) { 3140 InstructionMark im(this); 3141 emit_byte(0xDB); 3142 emit_operand32(rbx, adr); 3143 } 3144 3145 void Assembler::fld1() { 3146 emit_byte(0xD9); 3147 emit_byte(0xE8); 3148 } 3149 3150 void Assembler::fld_d(Address adr) { 3151 InstructionMark im(this); 3152 emit_byte(0xDD); 3153 emit_operand32(rax, adr); 3154 } 3155 3156 void Assembler::fld_s(Address adr) { 3157 InstructionMark im(this); 3158 emit_byte(0xD9); 3159 emit_operand32(rax, adr); 3160 } 3161 3162 3163 void Assembler::fld_s(int index) { 3164 emit_farith(0xD9, 0xC0, index); 3165 } 3166 3167 void Assembler::fld_x(Address adr) { 3168 InstructionMark im(this); 3169 emit_byte(0xDB); 3170 emit_operand32(rbp, adr); 3171 } 3172 3173 void Assembler::fldcw(Address src) { 3174 InstructionMark im(this); 3175 emit_byte(0xd9); 3176 emit_operand32(rbp, src); 3177 } 3178 3179 void Assembler::fldenv(Address src) { 3180 InstructionMark im(this); 3181 emit_byte(0xD9); 3182 emit_operand32(rsp, src); 3183 } 3184 3185 void Assembler::fldlg2() { 3186 emit_byte(0xD9); 3187 emit_byte(0xEC); 3188 } 3189 3190 void Assembler::fldln2() { 3191 emit_byte(0xD9); 3192 emit_byte(0xED); 3193 } 3194 3195 void Assembler::fldz() { 3196 emit_byte(0xD9); 3197 emit_byte(0xEE); 3198 } 3199 3200 void Assembler::flog() { 3201 fldln2(); 3202 fxch(); 3203 fyl2x(); 3204 } 3205 3206 void Assembler::flog10() { 3207 fldlg2(); 3208 fxch(); 3209 fyl2x(); 3210 } 3211 3212 void Assembler::fmul(int i) { 3213 emit_farith(0xD8, 0xC8, i); 3214 } 3215 3216 void Assembler::fmul_d(Address src) { 3217 InstructionMark im(this); 3218 emit_byte(0xDC); 3219 emit_operand32(rcx, src); 3220 } 3221 3222 void Assembler::fmul_s(Address src) { 3223 InstructionMark im(this); 3224 emit_byte(0xD8); 3225 emit_operand32(rcx, src); 3226 } 3227 3228 void Assembler::fmula(int i) { 3229 emit_farith(0xDC, 0xC8, i); 3230 } 3231 3232 void Assembler::fmulp(int i) { 3233 emit_farith(0xDE, 0xC8, i); 3234 } 3235 3236 void Assembler::fnsave(Address dst) { 3237 InstructionMark im(this); 3238 emit_byte(0xDD); 3239 emit_operand32(rsi, dst); 3240 } 3241 3242 void Assembler::fnstcw(Address src) { 3243 InstructionMark im(this); 3244 emit_byte(0x9B); 3245 emit_byte(0xD9); 3246 emit_operand32(rdi, src); 3247 } 3248 3249 void Assembler::fnstsw_ax() { 3250 emit_byte(0xdF); 3251 emit_byte(0xE0); 3252 } 3253 3254 void Assembler::fprem() { 3255 emit_byte(0xD9); 3256 emit_byte(0xF8); 3257 } 3258 3259 void Assembler::fprem1() { 3260 emit_byte(0xD9); 3261 emit_byte(0xF5); 3262 } 3263 3264 void Assembler::frstor(Address src) { 3265 InstructionMark im(this); 3266 emit_byte(0xDD); 3267 emit_operand32(rsp, src); 3268 } 3269 3270 void Assembler::fsin() { 3271 emit_byte(0xD9); 3272 emit_byte(0xFE); 3273 } 3274 3275 void Assembler::fsqrt() { 3276 emit_byte(0xD9); 3277 emit_byte(0xFA); 3278 } 3279 3280 void Assembler::fst_d(Address adr) { 3281 InstructionMark im(this); 3282 emit_byte(0xDD); 3283 emit_operand32(rdx, adr); 3284 } 3285 3286 void Assembler::fst_s(Address adr) { 3287 InstructionMark im(this); 3288 emit_byte(0xD9); 3289 emit_operand32(rdx, adr); 3290 } 3291 3292 void Assembler::fstp_d(Address adr) { 3293 InstructionMark im(this); 3294 emit_byte(0xDD); 3295 emit_operand32(rbx, adr); 3296 } 3297 3298 void Assembler::fstp_d(int index) { 3299 emit_farith(0xDD, 0xD8, index); 3300 } 3301 3302 void Assembler::fstp_s(Address adr) { 3303 InstructionMark im(this); 3304 emit_byte(0xD9); 3305 emit_operand32(rbx, adr); 3306 } 3307 3308 void Assembler::fstp_x(Address adr) { 3309 InstructionMark im(this); 3310 emit_byte(0xDB); 3311 emit_operand32(rdi, adr); 3312 } 3313 3314 void Assembler::fsub(int i) { 3315 emit_farith(0xD8, 0xE0, i); 3316 } 3317 3318 void Assembler::fsub_d(Address src) { 3319 InstructionMark im(this); 3320 emit_byte(0xDC); 3321 emit_operand32(rsp, src); 3322 } 3323 3324 void Assembler::fsub_s(Address src) { 3325 InstructionMark im(this); 3326 emit_byte(0xD8); 3327 emit_operand32(rsp, src); 3328 } 3329 3330 void Assembler::fsuba(int i) { 3331 emit_farith(0xDC, 0xE8, i); 3332 } 3333 3334 void Assembler::fsubp(int i) { 3335 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) 3336 } 3337 3338 void Assembler::fsubr(int i) { 3339 emit_farith(0xD8, 0xE8, i); 3340 } 3341 3342 void Assembler::fsubr_d(Address src) { 3343 InstructionMark im(this); 3344 emit_byte(0xDC); 3345 emit_operand32(rbp, src); 3346 } 3347 3348 void Assembler::fsubr_s(Address src) { 3349 InstructionMark im(this); 3350 emit_byte(0xD8); 3351 emit_operand32(rbp, src); 3352 } 3353 3354 void Assembler::fsubra(int i) { 3355 emit_farith(0xDC, 0xE0, i); 3356 } 3357 3358 void Assembler::fsubrp(int i) { 3359 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) 3360 } 3361 3362 void Assembler::ftan() { 3363 emit_byte(0xD9); 3364 emit_byte(0xF2); 3365 emit_byte(0xDD); 3366 emit_byte(0xD8); 3367 } 3368 3369 void Assembler::ftst() { 3370 emit_byte(0xD9); 3371 emit_byte(0xE4); 3372 } 3373 3374 void Assembler::fucomi(int i) { 3375 // make sure the instruction is supported (introduced for P6, together with cmov) 3376 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3377 emit_farith(0xDB, 0xE8, i); 3378 } 3379 3380 void Assembler::fucomip(int i) { 3381 // make sure the instruction is supported (introduced for P6, together with cmov) 3382 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3383 emit_farith(0xDF, 0xE8, i); 3384 } 3385 3386 void Assembler::fwait() { 3387 emit_byte(0x9B); 3388 } 3389 3390 void Assembler::fxch(int i) { 3391 emit_farith(0xD9, 0xC8, i); 3392 } 3393 3394 void Assembler::fyl2x() { 3395 emit_byte(0xD9); 3396 emit_byte(0xF1); 3397 } 3398 3399 3400 #ifndef _LP64 3401 3402 void Assembler::incl(Register dst) { 3403 // Don't use it directly. Use MacroAssembler::incrementl() instead. 3404 emit_byte(0x40 | dst->encoding()); 3405 } 3406 3407 void Assembler::lea(Register dst, Address src) { 3408 leal(dst, src); 3409 } 3410 3411 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { 3412 InstructionMark im(this); 3413 emit_byte(0xC7); 3414 emit_operand(rax, dst); 3415 emit_data((int)imm32, rspec, 0); 3416 } 3417 3418 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { 3419 InstructionMark im(this); 3420 int encode = prefix_and_encode(dst->encoding()); 3421 emit_byte(0xB8 | encode); 3422 emit_data((int)imm32, rspec, 0); 3423 } 3424 3425 void Assembler::popa() { // 32bit 3426 emit_byte(0x61); 3427 } 3428 3429 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { 3430 InstructionMark im(this); 3431 emit_byte(0x68); 3432 emit_data(imm32, rspec, 0); 3433 } 3434 3435 void Assembler::pusha() { // 32bit 3436 emit_byte(0x60); 3437 } 3438 3439 void Assembler::set_byte_if_not_zero(Register dst) { 3440 emit_byte(0x0F); 3441 emit_byte(0x95); 3442 emit_byte(0xE0 | dst->encoding()); 3443 } 3444 3445 void Assembler::shldl(Register dst, Register src) { 3446 emit_byte(0x0F); 3447 emit_byte(0xA5); 3448 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3449 } 3450 3451 void Assembler::shrdl(Register dst, Register src) { 3452 emit_byte(0x0F); 3453 emit_byte(0xAD); 3454 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3455 } 3456 3457 #else // LP64 3458 3459 void Assembler::set_byte_if_not_zero(Register dst) { 3460 int enc = prefix_and_encode(dst->encoding(), true); 3461 emit_byte(0x0F); 3462 emit_byte(0x95); 3463 emit_byte(0xE0 | enc); 3464 } 3465 3466 // 64bit only pieces of the assembler 3467 // This should only be used by 64bit instructions that can use rip-relative 3468 // it cannot be used by instructions that want an immediate value. 3469 3470 bool Assembler::reachable(AddressLiteral adr) { 3471 int64_t disp; 3472 // None will force a 64bit literal to the code stream. Likely a placeholder 3473 // for something that will be patched later and we need to certain it will 3474 // always be reachable. 3475 if (adr.reloc() == relocInfo::none) { 3476 return false; 3477 } 3478 if (adr.reloc() == relocInfo::internal_word_type) { 3479 // This should be rip relative and easily reachable. 3480 return true; 3481 } 3482 if (adr.reloc() == relocInfo::virtual_call_type || 3483 adr.reloc() == relocInfo::opt_virtual_call_type || 3484 adr.reloc() == relocInfo::static_call_type || 3485 adr.reloc() == relocInfo::static_stub_type ) { 3486 // This should be rip relative within the code cache and easily 3487 // reachable until we get huge code caches. (At which point 3488 // ic code is going to have issues). 3489 return true; 3490 } 3491 if (adr.reloc() != relocInfo::external_word_type && 3492 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special 3493 adr.reloc() != relocInfo::poll_type && // relocs to identify them 3494 adr.reloc() != relocInfo::runtime_call_type ) { 3495 return false; 3496 } 3497 3498 // Stress the correction code 3499 if (ForceUnreachable) { 3500 // Must be runtimecall reloc, see if it is in the codecache 3501 // Flipping stuff in the codecache to be unreachable causes issues 3502 // with things like inline caches where the additional instructions 3503 // are not handled. 3504 if (CodeCache::find_blob(adr._target) == NULL) { 3505 return false; 3506 } 3507 } 3508 // For external_word_type/runtime_call_type if it is reachable from where we 3509 // are now (possibly a temp buffer) and where we might end up 3510 // anywhere in the codeCache then we are always reachable. 3511 // This would have to change if we ever save/restore shared code 3512 // to be more pessimistic. 3513 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int)); 3514 if (!is_simm32(disp)) return false; 3515 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int)); 3516 if (!is_simm32(disp)) return false; 3517 3518 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int)); 3519 3520 // Because rip relative is a disp + address_of_next_instruction and we 3521 // don't know the value of address_of_next_instruction we apply a fudge factor 3522 // to make sure we will be ok no matter the size of the instruction we get placed into. 3523 // We don't have to fudge the checks above here because they are already worst case. 3524 3525 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal 3526 // + 4 because better safe than sorry. 3527 const int fudge = 12 + 4; 3528 if (disp < 0) { 3529 disp -= fudge; 3530 } else { 3531 disp += fudge; 3532 } 3533 return is_simm32(disp); 3534 } 3535 3536 // Check if the polling page is not reachable from the code cache using rip-relative 3537 // addressing. 3538 bool Assembler::is_polling_page_far() { 3539 intptr_t addr = (intptr_t)os::get_polling_page(); 3540 return !is_simm32(addr - (intptr_t)CodeCache::low_bound()) || 3541 !is_simm32(addr - (intptr_t)CodeCache::high_bound()); 3542 } 3543 3544 void Assembler::emit_data64(jlong data, 3545 relocInfo::relocType rtype, 3546 int format) { 3547 if (rtype == relocInfo::none) { 3548 emit_long64(data); 3549 } else { 3550 emit_data64(data, Relocation::spec_simple(rtype), format); 3551 } 3552 } 3553 3554 void Assembler::emit_data64(jlong data, 3555 RelocationHolder const& rspec, 3556 int format) { 3557 assert(imm_operand == 0, "default format must be immediate in this file"); 3558 assert(imm_operand == format, "must be immediate"); 3559 assert(inst_mark() != NULL, "must be inside InstructionMark"); 3560 // Do not use AbstractAssembler::relocate, which is not intended for 3561 // embedded words. Instead, relocate to the enclosing instruction. 3562 code_section()->relocate(inst_mark(), rspec, format); 3563 #ifdef ASSERT 3564 check_relocation(rspec, format); 3565 #endif 3566 emit_long64(data); 3567 } 3568 3569 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { 3570 if (reg_enc >= 8) { 3571 prefix(REX_B); 3572 reg_enc -= 8; 3573 } else if (byteinst && reg_enc >= 4) { 3574 prefix(REX); 3575 } 3576 return reg_enc; 3577 } 3578 3579 int Assembler::prefixq_and_encode(int reg_enc) { 3580 if (reg_enc < 8) { 3581 prefix(REX_W); 3582 } else { 3583 prefix(REX_WB); 3584 reg_enc -= 8; 3585 } 3586 return reg_enc; 3587 } 3588 3589 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { 3590 if (dst_enc < 8) { 3591 if (src_enc >= 8) { 3592 prefix(REX_B); 3593 src_enc -= 8; 3594 } else if (byteinst && src_enc >= 4) { 3595 prefix(REX); 3596 } 3597 } else { 3598 if (src_enc < 8) { 3599 prefix(REX_R); 3600 } else { 3601 prefix(REX_RB); 3602 src_enc -= 8; 3603 } 3604 dst_enc -= 8; 3605 } 3606 return dst_enc << 3 | src_enc; 3607 } 3608 3609 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { 3610 if (dst_enc < 8) { 3611 if (src_enc < 8) { 3612 prefix(REX_W); 3613 } else { 3614 prefix(REX_WB); 3615 src_enc -= 8; 3616 } 3617 } else { 3618 if (src_enc < 8) { 3619 prefix(REX_WR); 3620 } else { 3621 prefix(REX_WRB); 3622 src_enc -= 8; 3623 } 3624 dst_enc -= 8; 3625 } 3626 return dst_enc << 3 | src_enc; 3627 } 3628 3629 void Assembler::prefix(Register reg) { 3630 if (reg->encoding() >= 8) { 3631 prefix(REX_B); 3632 } 3633 } 3634 3635 void Assembler::prefix(Address adr) { 3636 if (adr.base_needs_rex()) { 3637 if (adr.index_needs_rex()) { 3638 prefix(REX_XB); 3639 } else { 3640 prefix(REX_B); 3641 } 3642 } else { 3643 if (adr.index_needs_rex()) { 3644 prefix(REX_X); 3645 } 3646 } 3647 } 3648 3649 void Assembler::prefixq(Address adr) { 3650 if (adr.base_needs_rex()) { 3651 if (adr.index_needs_rex()) { 3652 prefix(REX_WXB); 3653 } else { 3654 prefix(REX_WB); 3655 } 3656 } else { 3657 if (adr.index_needs_rex()) { 3658 prefix(REX_WX); 3659 } else { 3660 prefix(REX_W); 3661 } 3662 } 3663 } 3664 3665 3666 void Assembler::prefix(Address adr, Register reg, bool byteinst) { 3667 if (reg->encoding() < 8) { 3668 if (adr.base_needs_rex()) { 3669 if (adr.index_needs_rex()) { 3670 prefix(REX_XB); 3671 } else { 3672 prefix(REX_B); 3673 } 3674 } else { 3675 if (adr.index_needs_rex()) { 3676 prefix(REX_X); 3677 } else if (reg->encoding() >= 4 ) { 3678 prefix(REX); 3679 } 3680 } 3681 } else { 3682 if (adr.base_needs_rex()) { 3683 if (adr.index_needs_rex()) { 3684 prefix(REX_RXB); 3685 } else { 3686 prefix(REX_RB); 3687 } 3688 } else { 3689 if (adr.index_needs_rex()) { 3690 prefix(REX_RX); 3691 } else { 3692 prefix(REX_R); 3693 } 3694 } 3695 } 3696 } 3697 3698 void Assembler::prefixq(Address adr, Register src) { 3699 if (src->encoding() < 8) { 3700 if (adr.base_needs_rex()) { 3701 if (adr.index_needs_rex()) { 3702 prefix(REX_WXB); 3703 } else { 3704 prefix(REX_WB); 3705 } 3706 } else { 3707 if (adr.index_needs_rex()) { 3708 prefix(REX_WX); 3709 } else { 3710 prefix(REX_W); 3711 } 3712 } 3713 } else { 3714 if (adr.base_needs_rex()) { 3715 if (adr.index_needs_rex()) { 3716 prefix(REX_WRXB); 3717 } else { 3718 prefix(REX_WRB); 3719 } 3720 } else { 3721 if (adr.index_needs_rex()) { 3722 prefix(REX_WRX); 3723 } else { 3724 prefix(REX_WR); 3725 } 3726 } 3727 } 3728 } 3729 3730 void Assembler::prefix(Address adr, XMMRegister reg) { 3731 if (reg->encoding() < 8) { 3732 if (adr.base_needs_rex()) { 3733 if (adr.index_needs_rex()) { 3734 prefix(REX_XB); 3735 } else { 3736 prefix(REX_B); 3737 } 3738 } else { 3739 if (adr.index_needs_rex()) { 3740 prefix(REX_X); 3741 } 3742 } 3743 } else { 3744 if (adr.base_needs_rex()) { 3745 if (adr.index_needs_rex()) { 3746 prefix(REX_RXB); 3747 } else { 3748 prefix(REX_RB); 3749 } 3750 } else { 3751 if (adr.index_needs_rex()) { 3752 prefix(REX_RX); 3753 } else { 3754 prefix(REX_R); 3755 } 3756 } 3757 } 3758 } 3759 3760 void Assembler::adcq(Register dst, int32_t imm32) { 3761 (void) prefixq_and_encode(dst->encoding()); 3762 emit_arith(0x81, 0xD0, dst, imm32); 3763 } 3764 3765 void Assembler::adcq(Register dst, Address src) { 3766 InstructionMark im(this); 3767 prefixq(src, dst); 3768 emit_byte(0x13); 3769 emit_operand(dst, src); 3770 } 3771 3772 void Assembler::adcq(Register dst, Register src) { 3773 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 3774 emit_arith(0x13, 0xC0, dst, src); 3775 } 3776 3777 void Assembler::addq(Address dst, int32_t imm32) { 3778 InstructionMark im(this); 3779 prefixq(dst); 3780 emit_arith_operand(0x81, rax, dst,imm32); 3781 } 3782 3783 void Assembler::addq(Address dst, Register src) { 3784 InstructionMark im(this); 3785 prefixq(dst, src); 3786 emit_byte(0x01); 3787 emit_operand(src, dst); 3788 } 3789 3790 void Assembler::addq(Register dst, int32_t imm32) { 3791 (void) prefixq_and_encode(dst->encoding()); 3792 emit_arith(0x81, 0xC0, dst, imm32); 3793 } 3794 3795 void Assembler::addq(Register dst, Address src) { 3796 InstructionMark im(this); 3797 prefixq(src, dst); 3798 emit_byte(0x03); 3799 emit_operand(dst, src); 3800 } 3801 3802 void Assembler::addq(Register dst, Register src) { 3803 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 3804 emit_arith(0x03, 0xC0, dst, src); 3805 } 3806 3807 void Assembler::andq(Register dst, int32_t imm32) { 3808 (void) prefixq_and_encode(dst->encoding()); 3809 emit_arith(0x81, 0xE0, dst, imm32); 3810 } 3811 3812 void Assembler::andq(Register dst, Address src) { 3813 InstructionMark im(this); 3814 prefixq(src, dst); 3815 emit_byte(0x23); 3816 emit_operand(dst, src); 3817 } 3818 3819 void Assembler::andq(Register dst, Register src) { 3820 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 3821 emit_arith(0x23, 0xC0, dst, src); 3822 } 3823 3824 void Assembler::bsfq(Register dst, Register src) { 3825 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3826 emit_byte(0x0F); 3827 emit_byte(0xBC); 3828 emit_byte(0xC0 | encode); 3829 } 3830 3831 void Assembler::bsrq(Register dst, Register src) { 3832 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 3833 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3834 emit_byte(0x0F); 3835 emit_byte(0xBD); 3836 emit_byte(0xC0 | encode); 3837 } 3838 3839 void Assembler::bswapq(Register reg) { 3840 int encode = prefixq_and_encode(reg->encoding()); 3841 emit_byte(0x0F); 3842 emit_byte(0xC8 | encode); 3843 } 3844 3845 void Assembler::cdqq() { 3846 prefix(REX_W); 3847 emit_byte(0x99); 3848 } 3849 3850 void Assembler::clflush(Address adr) { 3851 prefix(adr); 3852 emit_byte(0x0F); 3853 emit_byte(0xAE); 3854 emit_operand(rdi, adr); 3855 } 3856 3857 void Assembler::cmovq(Condition cc, Register dst, Register src) { 3858 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3859 emit_byte(0x0F); 3860 emit_byte(0x40 | cc); 3861 emit_byte(0xC0 | encode); 3862 } 3863 3864 void Assembler::cmovq(Condition cc, Register dst, Address src) { 3865 InstructionMark im(this); 3866 prefixq(src, dst); 3867 emit_byte(0x0F); 3868 emit_byte(0x40 | cc); 3869 emit_operand(dst, src); 3870 } 3871 3872 void Assembler::cmpq(Address dst, int32_t imm32) { 3873 InstructionMark im(this); 3874 prefixq(dst); 3875 emit_byte(0x81); 3876 emit_operand(rdi, dst, 4); 3877 emit_long(imm32); 3878 } 3879 3880 void Assembler::cmpq(Register dst, int32_t imm32) { 3881 (void) prefixq_and_encode(dst->encoding()); 3882 emit_arith(0x81, 0xF8, dst, imm32); 3883 } 3884 3885 void Assembler::cmpq(Address dst, Register src) { 3886 InstructionMark im(this); 3887 prefixq(dst, src); 3888 emit_byte(0x3B); 3889 emit_operand(src, dst); 3890 } 3891 3892 void Assembler::cmpq(Register dst, Register src) { 3893 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 3894 emit_arith(0x3B, 0xC0, dst, src); 3895 } 3896 3897 void Assembler::cmpq(Register dst, Address src) { 3898 InstructionMark im(this); 3899 prefixq(src, dst); 3900 emit_byte(0x3B); 3901 emit_operand(dst, src); 3902 } 3903 3904 void Assembler::cmpxchgq(Register reg, Address adr) { 3905 InstructionMark im(this); 3906 prefixq(adr, reg); 3907 emit_byte(0x0F); 3908 emit_byte(0xB1); 3909 emit_operand(reg, adr); 3910 } 3911 3912 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { 3913 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3914 emit_byte(0xF2); 3915 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3916 emit_byte(0x0F); 3917 emit_byte(0x2A); 3918 emit_byte(0xC0 | encode); 3919 } 3920 3921 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { 3922 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3923 emit_byte(0xF3); 3924 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3925 emit_byte(0x0F); 3926 emit_byte(0x2A); 3927 emit_byte(0xC0 | encode); 3928 } 3929 3930 void Assembler::cvttsd2siq(Register dst, XMMRegister src) { 3931 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3932 emit_byte(0xF2); 3933 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3934 emit_byte(0x0F); 3935 emit_byte(0x2C); 3936 emit_byte(0xC0 | encode); 3937 } 3938 3939 void Assembler::cvttss2siq(Register dst, XMMRegister src) { 3940 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3941 emit_byte(0xF3); 3942 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3943 emit_byte(0x0F); 3944 emit_byte(0x2C); 3945 emit_byte(0xC0 | encode); 3946 } 3947 3948 void Assembler::decl(Register dst) { 3949 // Don't use it directly. Use MacroAssembler::decrementl() instead. 3950 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 3951 int encode = prefix_and_encode(dst->encoding()); 3952 emit_byte(0xFF); 3953 emit_byte(0xC8 | encode); 3954 } 3955 3956 void Assembler::decq(Register dst) { 3957 // Don't use it directly. Use MacroAssembler::decrementq() instead. 3958 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3959 int encode = prefixq_and_encode(dst->encoding()); 3960 emit_byte(0xFF); 3961 emit_byte(0xC8 | encode); 3962 } 3963 3964 void Assembler::decq(Address dst) { 3965 // Don't use it directly. Use MacroAssembler::decrementq() instead. 3966 InstructionMark im(this); 3967 prefixq(dst); 3968 emit_byte(0xFF); 3969 emit_operand(rcx, dst); 3970 } 3971 3972 void Assembler::fxrstor(Address src) { 3973 prefixq(src); 3974 emit_byte(0x0F); 3975 emit_byte(0xAE); 3976 emit_operand(as_Register(1), src); 3977 } 3978 3979 void Assembler::fxsave(Address dst) { 3980 prefixq(dst); 3981 emit_byte(0x0F); 3982 emit_byte(0xAE); 3983 emit_operand(as_Register(0), dst); 3984 } 3985 3986 void Assembler::idivq(Register src) { 3987 int encode = prefixq_and_encode(src->encoding()); 3988 emit_byte(0xF7); 3989 emit_byte(0xF8 | encode); 3990 } 3991 3992 void Assembler::imulq(Register dst, Register src) { 3993 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3994 emit_byte(0x0F); 3995 emit_byte(0xAF); 3996 emit_byte(0xC0 | encode); 3997 } 3998 3999 void Assembler::imulq(Register dst, Register src, int value) { 4000 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4001 if (is8bit(value)) { 4002 emit_byte(0x6B); 4003 emit_byte(0xC0 | encode); 4004 emit_byte(value & 0xFF); 4005 } else { 4006 emit_byte(0x69); 4007 emit_byte(0xC0 | encode); 4008 emit_long(value); 4009 } 4010 } 4011 4012 void Assembler::incl(Register dst) { 4013 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4014 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4015 int encode = prefix_and_encode(dst->encoding()); 4016 emit_byte(0xFF); 4017 emit_byte(0xC0 | encode); 4018 } 4019 4020 void Assembler::incq(Register dst) { 4021 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4022 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4023 int encode = prefixq_and_encode(dst->encoding()); 4024 emit_byte(0xFF); 4025 emit_byte(0xC0 | encode); 4026 } 4027 4028 void Assembler::incq(Address dst) { 4029 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4030 InstructionMark im(this); 4031 prefixq(dst); 4032 emit_byte(0xFF); 4033 emit_operand(rax, dst); 4034 } 4035 4036 void Assembler::lea(Register dst, Address src) { 4037 leaq(dst, src); 4038 } 4039 4040 void Assembler::leaq(Register dst, Address src) { 4041 InstructionMark im(this); 4042 prefixq(src, dst); 4043 emit_byte(0x8D); 4044 emit_operand(dst, src); 4045 } 4046 4047 void Assembler::mov64(Register dst, int64_t imm64) { 4048 InstructionMark im(this); 4049 int encode = prefixq_and_encode(dst->encoding()); 4050 emit_byte(0xB8 | encode); 4051 emit_long64(imm64); 4052 } 4053 4054 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { 4055 InstructionMark im(this); 4056 int encode = prefixq_and_encode(dst->encoding()); 4057 emit_byte(0xB8 | encode); 4058 emit_data64(imm64, rspec); 4059 } 4060 4061 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4062 InstructionMark im(this); 4063 int encode = prefix_and_encode(dst->encoding()); 4064 emit_byte(0xB8 | encode); 4065 emit_data((int)imm32, rspec, narrow_oop_operand); 4066 } 4067 4068 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4069 InstructionMark im(this); 4070 prefix(dst); 4071 emit_byte(0xC7); 4072 emit_operand(rax, dst, 4); 4073 emit_data((int)imm32, rspec, narrow_oop_operand); 4074 } 4075 4076 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) { 4077 InstructionMark im(this); 4078 int encode = prefix_and_encode(src1->encoding()); 4079 emit_byte(0x81); 4080 emit_byte(0xF8 | encode); 4081 emit_data((int)imm32, rspec, narrow_oop_operand); 4082 } 4083 4084 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) { 4085 InstructionMark im(this); 4086 prefix(src1); 4087 emit_byte(0x81); 4088 emit_operand(rax, src1, 4); 4089 emit_data((int)imm32, rspec, narrow_oop_operand); 4090 } 4091 4092 void Assembler::lzcntq(Register dst, Register src) { 4093 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 4094 emit_byte(0xF3); 4095 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4096 emit_byte(0x0F); 4097 emit_byte(0xBD); 4098 emit_byte(0xC0 | encode); 4099 } 4100 4101 void Assembler::movdq(XMMRegister dst, Register src) { 4102 // table D-1 says MMX/SSE2 4103 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); 4104 emit_byte(0x66); 4105 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4106 emit_byte(0x0F); 4107 emit_byte(0x6E); 4108 emit_byte(0xC0 | encode); 4109 } 4110 4111 void Assembler::movdq(Register dst, XMMRegister src) { 4112 // table D-1 says MMX/SSE2 4113 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); 4114 emit_byte(0x66); 4115 // swap src/dst to get correct prefix 4116 int encode = prefixq_and_encode(src->encoding(), dst->encoding()); 4117 emit_byte(0x0F); 4118 emit_byte(0x7E); 4119 emit_byte(0xC0 | encode); 4120 } 4121 4122 void Assembler::movq(Register dst, Register src) { 4123 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4124 emit_byte(0x8B); 4125 emit_byte(0xC0 | encode); 4126 } 4127 4128 void Assembler::movq(Register dst, Address src) { 4129 InstructionMark im(this); 4130 prefixq(src, dst); 4131 emit_byte(0x8B); 4132 emit_operand(dst, src); 4133 } 4134 4135 void Assembler::movq(Address dst, Register src) { 4136 InstructionMark im(this); 4137 prefixq(dst, src); 4138 emit_byte(0x89); 4139 emit_operand(src, dst); 4140 } 4141 4142 void Assembler::movsbq(Register dst, Address src) { 4143 InstructionMark im(this); 4144 prefixq(src, dst); 4145 emit_byte(0x0F); 4146 emit_byte(0xBE); 4147 emit_operand(dst, src); 4148 } 4149 4150 void Assembler::movsbq(Register dst, Register src) { 4151 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4152 emit_byte(0x0F); 4153 emit_byte(0xBE); 4154 emit_byte(0xC0 | encode); 4155 } 4156 4157 void Assembler::movslq(Register dst, int32_t imm32) { 4158 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx) 4159 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx) 4160 // as a result we shouldn't use until tested at runtime... 4161 ShouldNotReachHere(); 4162 InstructionMark im(this); 4163 int encode = prefixq_and_encode(dst->encoding()); 4164 emit_byte(0xC7 | encode); 4165 emit_long(imm32); 4166 } 4167 4168 void Assembler::movslq(Address dst, int32_t imm32) { 4169 assert(is_simm32(imm32), "lost bits"); 4170 InstructionMark im(this); 4171 prefixq(dst); 4172 emit_byte(0xC7); 4173 emit_operand(rax, dst, 4); 4174 emit_long(imm32); 4175 } 4176 4177 void Assembler::movslq(Register dst, Address src) { 4178 InstructionMark im(this); 4179 prefixq(src, dst); 4180 emit_byte(0x63); 4181 emit_operand(dst, src); 4182 } 4183 4184 void Assembler::movslq(Register dst, Register src) { 4185 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4186 emit_byte(0x63); 4187 emit_byte(0xC0 | encode); 4188 } 4189 4190 void Assembler::movswq(Register dst, Address src) { 4191 InstructionMark im(this); 4192 prefixq(src, dst); 4193 emit_byte(0x0F); 4194 emit_byte(0xBF); 4195 emit_operand(dst, src); 4196 } 4197 4198 void Assembler::movswq(Register dst, Register src) { 4199 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4200 emit_byte(0x0F); 4201 emit_byte(0xBF); 4202 emit_byte(0xC0 | encode); 4203 } 4204 4205 void Assembler::movzbq(Register dst, Address src) { 4206 InstructionMark im(this); 4207 prefixq(src, dst); 4208 emit_byte(0x0F); 4209 emit_byte(0xB6); 4210 emit_operand(dst, src); 4211 } 4212 4213 void Assembler::movzbq(Register dst, Register src) { 4214 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4215 emit_byte(0x0F); 4216 emit_byte(0xB6); 4217 emit_byte(0xC0 | encode); 4218 } 4219 4220 void Assembler::movzwq(Register dst, Address src) { 4221 InstructionMark im(this); 4222 prefixq(src, dst); 4223 emit_byte(0x0F); 4224 emit_byte(0xB7); 4225 emit_operand(dst, src); 4226 } 4227 4228 void Assembler::movzwq(Register dst, Register src) { 4229 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4230 emit_byte(0x0F); 4231 emit_byte(0xB7); 4232 emit_byte(0xC0 | encode); 4233 } 4234 4235 void Assembler::negq(Register dst) { 4236 int encode = prefixq_and_encode(dst->encoding()); 4237 emit_byte(0xF7); 4238 emit_byte(0xD8 | encode); 4239 } 4240 4241 void Assembler::notq(Register dst) { 4242 int encode = prefixq_and_encode(dst->encoding()); 4243 emit_byte(0xF7); 4244 emit_byte(0xD0 | encode); 4245 } 4246 4247 void Assembler::orq(Address dst, int32_t imm32) { 4248 InstructionMark im(this); 4249 prefixq(dst); 4250 emit_byte(0x81); 4251 emit_operand(rcx, dst, 4); 4252 emit_long(imm32); 4253 } 4254 4255 void Assembler::orq(Register dst, int32_t imm32) { 4256 (void) prefixq_and_encode(dst->encoding()); 4257 emit_arith(0x81, 0xC8, dst, imm32); 4258 } 4259 4260 void Assembler::orq(Register dst, Address src) { 4261 InstructionMark im(this); 4262 prefixq(src, dst); 4263 emit_byte(0x0B); 4264 emit_operand(dst, src); 4265 } 4266 4267 void Assembler::orq(Register dst, Register src) { 4268 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4269 emit_arith(0x0B, 0xC0, dst, src); 4270 } 4271 4272 void Assembler::popa() { // 64bit 4273 movq(r15, Address(rsp, 0)); 4274 movq(r14, Address(rsp, wordSize)); 4275 movq(r13, Address(rsp, 2 * wordSize)); 4276 movq(r12, Address(rsp, 3 * wordSize)); 4277 movq(r11, Address(rsp, 4 * wordSize)); 4278 movq(r10, Address(rsp, 5 * wordSize)); 4279 movq(r9, Address(rsp, 6 * wordSize)); 4280 movq(r8, Address(rsp, 7 * wordSize)); 4281 movq(rdi, Address(rsp, 8 * wordSize)); 4282 movq(rsi, Address(rsp, 9 * wordSize)); 4283 movq(rbp, Address(rsp, 10 * wordSize)); 4284 // skip rsp 4285 movq(rbx, Address(rsp, 12 * wordSize)); 4286 movq(rdx, Address(rsp, 13 * wordSize)); 4287 movq(rcx, Address(rsp, 14 * wordSize)); 4288 movq(rax, Address(rsp, 15 * wordSize)); 4289 4290 addq(rsp, 16 * wordSize); 4291 } 4292 4293 void Assembler::popcntq(Register dst, Address src) { 4294 assert(VM_Version::supports_popcnt(), "must support"); 4295 InstructionMark im(this); 4296 emit_byte(0xF3); 4297 prefixq(src, dst); 4298 emit_byte(0x0F); 4299 emit_byte(0xB8); 4300 emit_operand(dst, src); 4301 } 4302 4303 void Assembler::popcntq(Register dst, Register src) { 4304 assert(VM_Version::supports_popcnt(), "must support"); 4305 emit_byte(0xF3); 4306 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4307 emit_byte(0x0F); 4308 emit_byte(0xB8); 4309 emit_byte(0xC0 | encode); 4310 } 4311 4312 void Assembler::popq(Address dst) { 4313 InstructionMark im(this); 4314 prefixq(dst); 4315 emit_byte(0x8F); 4316 emit_operand(rax, dst); 4317 } 4318 4319 void Assembler::pusha() { // 64bit 4320 // we have to store original rsp. ABI says that 128 bytes 4321 // below rsp are local scratch. 4322 movq(Address(rsp, -5 * wordSize), rsp); 4323 4324 subq(rsp, 16 * wordSize); 4325 4326 movq(Address(rsp, 15 * wordSize), rax); 4327 movq(Address(rsp, 14 * wordSize), rcx); 4328 movq(Address(rsp, 13 * wordSize), rdx); 4329 movq(Address(rsp, 12 * wordSize), rbx); 4330 // skip rsp 4331 movq(Address(rsp, 10 * wordSize), rbp); 4332 movq(Address(rsp, 9 * wordSize), rsi); 4333 movq(Address(rsp, 8 * wordSize), rdi); 4334 movq(Address(rsp, 7 * wordSize), r8); 4335 movq(Address(rsp, 6 * wordSize), r9); 4336 movq(Address(rsp, 5 * wordSize), r10); 4337 movq(Address(rsp, 4 * wordSize), r11); 4338 movq(Address(rsp, 3 * wordSize), r12); 4339 movq(Address(rsp, 2 * wordSize), r13); 4340 movq(Address(rsp, wordSize), r14); 4341 movq(Address(rsp, 0), r15); 4342 } 4343 4344 void Assembler::pushq(Address src) { 4345 InstructionMark im(this); 4346 prefixq(src); 4347 emit_byte(0xFF); 4348 emit_operand(rsi, src); 4349 } 4350 4351 void Assembler::rclq(Register dst, int imm8) { 4352 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4353 int encode = prefixq_and_encode(dst->encoding()); 4354 if (imm8 == 1) { 4355 emit_byte(0xD1); 4356 emit_byte(0xD0 | encode); 4357 } else { 4358 emit_byte(0xC1); 4359 emit_byte(0xD0 | encode); 4360 emit_byte(imm8); 4361 } 4362 } 4363 void Assembler::sarq(Register dst, int imm8) { 4364 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4365 int encode = prefixq_and_encode(dst->encoding()); 4366 if (imm8 == 1) { 4367 emit_byte(0xD1); 4368 emit_byte(0xF8 | encode); 4369 } else { 4370 emit_byte(0xC1); 4371 emit_byte(0xF8 | encode); 4372 emit_byte(imm8); 4373 } 4374 } 4375 4376 void Assembler::sarq(Register dst) { 4377 int encode = prefixq_and_encode(dst->encoding()); 4378 emit_byte(0xD3); 4379 emit_byte(0xF8 | encode); 4380 } 4381 4382 void Assembler::sbbq(Address dst, int32_t imm32) { 4383 InstructionMark im(this); 4384 prefixq(dst); 4385 emit_arith_operand(0x81, rbx, dst, imm32); 4386 } 4387 4388 void Assembler::sbbq(Register dst, int32_t imm32) { 4389 (void) prefixq_and_encode(dst->encoding()); 4390 emit_arith(0x81, 0xD8, dst, imm32); 4391 } 4392 4393 void Assembler::sbbq(Register dst, Address src) { 4394 InstructionMark im(this); 4395 prefixq(src, dst); 4396 emit_byte(0x1B); 4397 emit_operand(dst, src); 4398 } 4399 4400 void Assembler::sbbq(Register dst, Register src) { 4401 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4402 emit_arith(0x1B, 0xC0, dst, src); 4403 } 4404 4405 void Assembler::shlq(Register dst, int imm8) { 4406 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4407 int encode = prefixq_and_encode(dst->encoding()); 4408 if (imm8 == 1) { 4409 emit_byte(0xD1); 4410 emit_byte(0xE0 | encode); 4411 } else { 4412 emit_byte(0xC1); 4413 emit_byte(0xE0 | encode); 4414 emit_byte(imm8); 4415 } 4416 } 4417 4418 void Assembler::shlq(Register dst) { 4419 int encode = prefixq_and_encode(dst->encoding()); 4420 emit_byte(0xD3); 4421 emit_byte(0xE0 | encode); 4422 } 4423 4424 void Assembler::shrq(Register dst, int imm8) { 4425 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4426 int encode = prefixq_and_encode(dst->encoding()); 4427 emit_byte(0xC1); 4428 emit_byte(0xE8 | encode); 4429 emit_byte(imm8); 4430 } 4431 4432 void Assembler::shrq(Register dst) { 4433 int encode = prefixq_and_encode(dst->encoding()); 4434 emit_byte(0xD3); 4435 emit_byte(0xE8 | encode); 4436 } 4437 4438 void Assembler::subq(Address dst, int32_t imm32) { 4439 InstructionMark im(this); 4440 prefixq(dst); 4441 emit_arith_operand(0x81, rbp, dst, imm32); 4442 } 4443 4444 void Assembler::subq(Address dst, Register src) { 4445 InstructionMark im(this); 4446 prefixq(dst, src); 4447 emit_byte(0x29); 4448 emit_operand(src, dst); 4449 } 4450 4451 void Assembler::subq(Register dst, int32_t imm32) { 4452 (void) prefixq_and_encode(dst->encoding()); 4453 emit_arith(0x81, 0xE8, dst, imm32); 4454 } 4455 4456 void Assembler::subq(Register dst, Address src) { 4457 InstructionMark im(this); 4458 prefixq(src, dst); 4459 emit_byte(0x2B); 4460 emit_operand(dst, src); 4461 } 4462 4463 void Assembler::subq(Register dst, Register src) { 4464 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4465 emit_arith(0x2B, 0xC0, dst, src); 4466 } 4467 4468 void Assembler::testq(Register dst, int32_t imm32) { 4469 // not using emit_arith because test 4470 // doesn't support sign-extension of 4471 // 8bit operands 4472 int encode = dst->encoding(); 4473 if (encode == 0) { 4474 prefix(REX_W); 4475 emit_byte(0xA9); 4476 } else { 4477 encode = prefixq_and_encode(encode); 4478 emit_byte(0xF7); 4479 emit_byte(0xC0 | encode); 4480 } 4481 emit_long(imm32); 4482 } 4483 4484 void Assembler::testq(Register dst, Register src) { 4485 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4486 emit_arith(0x85, 0xC0, dst, src); 4487 } 4488 4489 void Assembler::xaddq(Address dst, Register src) { 4490 InstructionMark im(this); 4491 prefixq(dst, src); 4492 emit_byte(0x0F); 4493 emit_byte(0xC1); 4494 emit_operand(src, dst); 4495 } 4496 4497 void Assembler::xchgq(Register dst, Address src) { 4498 InstructionMark im(this); 4499 prefixq(src, dst); 4500 emit_byte(0x87); 4501 emit_operand(dst, src); 4502 } 4503 4504 void Assembler::xchgq(Register dst, Register src) { 4505 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4506 emit_byte(0x87); 4507 emit_byte(0xc0 | encode); 4508 } 4509 4510 void Assembler::xorq(Register dst, Register src) { 4511 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4512 emit_arith(0x33, 0xC0, dst, src); 4513 } 4514 4515 void Assembler::xorq(Register dst, Address src) { 4516 InstructionMark im(this); 4517 prefixq(src, dst); 4518 emit_byte(0x33); 4519 emit_operand(dst, src); 4520 } 4521 4522 #endif // !LP64 4523 4524 static Assembler::Condition reverse[] = { 4525 Assembler::noOverflow /* overflow = 0x0 */ , 4526 Assembler::overflow /* noOverflow = 0x1 */ , 4527 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 4528 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 4529 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 4530 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 4531 Assembler::above /* belowEqual = 0x6 */ , 4532 Assembler::belowEqual /* above = 0x7 */ , 4533 Assembler::positive /* negative = 0x8 */ , 4534 Assembler::negative /* positive = 0x9 */ , 4535 Assembler::noParity /* parity = 0xa */ , 4536 Assembler::parity /* noParity = 0xb */ , 4537 Assembler::greaterEqual /* less = 0xc */ , 4538 Assembler::less /* greaterEqual = 0xd */ , 4539 Assembler::greater /* lessEqual = 0xe */ , 4540 Assembler::lessEqual /* greater = 0xf, */ 4541 4542 }; 4543 4544 4545 // Implementation of MacroAssembler 4546 4547 // First all the versions that have distinct versions depending on 32/64 bit 4548 // Unless the difference is trivial (1 line or so). 4549 4550 #ifndef _LP64 4551 4552 // 32bit versions 4553 4554 Address MacroAssembler::as_Address(AddressLiteral adr) { 4555 return Address(adr.target(), adr.rspec()); 4556 } 4557 4558 Address MacroAssembler::as_Address(ArrayAddress adr) { 4559 return Address::make_array(adr); 4560 } 4561 4562 int MacroAssembler::biased_locking_enter(Register lock_reg, 4563 Register obj_reg, 4564 Register swap_reg, 4565 Register tmp_reg, 4566 bool swap_reg_contains_mark, 4567 Label& done, 4568 Label* slow_case, 4569 BiasedLockingCounters* counters) { 4570 assert(UseBiasedLocking, "why call this otherwise?"); 4571 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 4572 assert_different_registers(lock_reg, obj_reg, swap_reg); 4573 4574 if (PrintBiasedLockingStatistics && counters == NULL) 4575 counters = BiasedLocking::counters(); 4576 4577 bool need_tmp_reg = false; 4578 if (tmp_reg == noreg) { 4579 need_tmp_reg = true; 4580 tmp_reg = lock_reg; 4581 } else { 4582 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 4583 } 4584 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 4585 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 4586 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 4587 Address saved_mark_addr(lock_reg, 0); 4588 4589 // Biased locking 4590 // See whether the lock is currently biased toward our thread and 4591 // whether the epoch is still valid 4592 // Note that the runtime guarantees sufficient alignment of JavaThread 4593 // pointers to allow age to be placed into low bits 4594 // First check to see whether biasing is even enabled for this object 4595 Label cas_label; 4596 int null_check_offset = -1; 4597 if (!swap_reg_contains_mark) { 4598 null_check_offset = offset(); 4599 movl(swap_reg, mark_addr); 4600 } 4601 if (need_tmp_reg) { 4602 push(tmp_reg); 4603 } 4604 movl(tmp_reg, swap_reg); 4605 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 4606 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 4607 if (need_tmp_reg) { 4608 pop(tmp_reg); 4609 } 4610 jcc(Assembler::notEqual, cas_label); 4611 // The bias pattern is present in the object's header. Need to check 4612 // whether the bias owner and the epoch are both still current. 4613 // Note that because there is no current thread register on x86 we 4614 // need to store off the mark word we read out of the object to 4615 // avoid reloading it and needing to recheck invariants below. This 4616 // store is unfortunate but it makes the overall code shorter and 4617 // simpler. 4618 movl(saved_mark_addr, swap_reg); 4619 if (need_tmp_reg) { 4620 push(tmp_reg); 4621 } 4622 get_thread(tmp_reg); 4623 xorl(swap_reg, tmp_reg); 4624 if (swap_reg_contains_mark) { 4625 null_check_offset = offset(); 4626 } 4627 movl(tmp_reg, klass_addr); 4628 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4629 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 4630 if (need_tmp_reg) { 4631 pop(tmp_reg); 4632 } 4633 if (counters != NULL) { 4634 cond_inc32(Assembler::zero, 4635 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 4636 } 4637 jcc(Assembler::equal, done); 4638 4639 Label try_revoke_bias; 4640 Label try_rebias; 4641 4642 // At this point we know that the header has the bias pattern and 4643 // that we are not the bias owner in the current epoch. We need to 4644 // figure out more details about the state of the header in order to 4645 // know what operations can be legally performed on the object's 4646 // header. 4647 4648 // If the low three bits in the xor result aren't clear, that means 4649 // the prototype header is no longer biased and we have to revoke 4650 // the bias on this object. 4651 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 4652 jcc(Assembler::notZero, try_revoke_bias); 4653 4654 // Biasing is still enabled for this data type. See whether the 4655 // epoch of the current bias is still valid, meaning that the epoch 4656 // bits of the mark word are equal to the epoch bits of the 4657 // prototype header. (Note that the prototype header's epoch bits 4658 // only change at a safepoint.) If not, attempt to rebias the object 4659 // toward the current thread. Note that we must be absolutely sure 4660 // that the current epoch is invalid in order to do this because 4661 // otherwise the manipulations it performs on the mark word are 4662 // illegal. 4663 testl(swap_reg, markOopDesc::epoch_mask_in_place); 4664 jcc(Assembler::notZero, try_rebias); 4665 4666 // The epoch of the current bias is still valid but we know nothing 4667 // about the owner; it might be set or it might be clear. Try to 4668 // acquire the bias of the object using an atomic operation. If this 4669 // fails we will go in to the runtime to revoke the object's bias. 4670 // Note that we first construct the presumed unbiased header so we 4671 // don't accidentally blow away another thread's valid bias. 4672 movl(swap_reg, saved_mark_addr); 4673 andl(swap_reg, 4674 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 4675 if (need_tmp_reg) { 4676 push(tmp_reg); 4677 } 4678 get_thread(tmp_reg); 4679 orl(tmp_reg, swap_reg); 4680 if (os::is_MP()) { 4681 lock(); 4682 } 4683 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4684 if (need_tmp_reg) { 4685 pop(tmp_reg); 4686 } 4687 // If the biasing toward our thread failed, this means that 4688 // another thread succeeded in biasing it toward itself and we 4689 // need to revoke that bias. The revocation will occur in the 4690 // interpreter runtime in the slow case. 4691 if (counters != NULL) { 4692 cond_inc32(Assembler::zero, 4693 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 4694 } 4695 if (slow_case != NULL) { 4696 jcc(Assembler::notZero, *slow_case); 4697 } 4698 jmp(done); 4699 4700 bind(try_rebias); 4701 // At this point we know the epoch has expired, meaning that the 4702 // current "bias owner", if any, is actually invalid. Under these 4703 // circumstances _only_, we are allowed to use the current header's 4704 // value as the comparison value when doing the cas to acquire the 4705 // bias in the current epoch. In other words, we allow transfer of 4706 // the bias from one thread to another directly in this situation. 4707 // 4708 // FIXME: due to a lack of registers we currently blow away the age 4709 // bits in this situation. Should attempt to preserve them. 4710 if (need_tmp_reg) { 4711 push(tmp_reg); 4712 } 4713 get_thread(tmp_reg); 4714 movl(swap_reg, klass_addr); 4715 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4716 movl(swap_reg, saved_mark_addr); 4717 if (os::is_MP()) { 4718 lock(); 4719 } 4720 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4721 if (need_tmp_reg) { 4722 pop(tmp_reg); 4723 } 4724 // If the biasing toward our thread failed, then another thread 4725 // succeeded in biasing it toward itself and we need to revoke that 4726 // bias. The revocation will occur in the runtime in the slow case. 4727 if (counters != NULL) { 4728 cond_inc32(Assembler::zero, 4729 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 4730 } 4731 if (slow_case != NULL) { 4732 jcc(Assembler::notZero, *slow_case); 4733 } 4734 jmp(done); 4735 4736 bind(try_revoke_bias); 4737 // The prototype mark in the klass doesn't have the bias bit set any 4738 // more, indicating that objects of this data type are not supposed 4739 // to be biased any more. We are going to try to reset the mark of 4740 // this object to the prototype value and fall through to the 4741 // CAS-based locking scheme. Note that if our CAS fails, it means 4742 // that another thread raced us for the privilege of revoking the 4743 // bias of this particular object, so it's okay to continue in the 4744 // normal locking code. 4745 // 4746 // FIXME: due to a lack of registers we currently blow away the age 4747 // bits in this situation. Should attempt to preserve them. 4748 movl(swap_reg, saved_mark_addr); 4749 if (need_tmp_reg) { 4750 push(tmp_reg); 4751 } 4752 movl(tmp_reg, klass_addr); 4753 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4754 if (os::is_MP()) { 4755 lock(); 4756 } 4757 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4758 if (need_tmp_reg) { 4759 pop(tmp_reg); 4760 } 4761 // Fall through to the normal CAS-based lock, because no matter what 4762 // the result of the above CAS, some thread must have succeeded in 4763 // removing the bias bit from the object's header. 4764 if (counters != NULL) { 4765 cond_inc32(Assembler::zero, 4766 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 4767 } 4768 4769 bind(cas_label); 4770 4771 return null_check_offset; 4772 } 4773 void MacroAssembler::call_VM_leaf_base(address entry_point, 4774 int number_of_arguments) { 4775 call(RuntimeAddress(entry_point)); 4776 increment(rsp, number_of_arguments * wordSize); 4777 } 4778 4779 void MacroAssembler::cmpoop(Address src1, jobject obj) { 4780 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4781 } 4782 4783 void MacroAssembler::cmpoop(Register src1, jobject obj) { 4784 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4785 } 4786 4787 void MacroAssembler::extend_sign(Register hi, Register lo) { 4788 // According to Intel Doc. AP-526, "Integer Divide", p.18. 4789 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 4790 cdql(); 4791 } else { 4792 movl(hi, lo); 4793 sarl(hi, 31); 4794 } 4795 } 4796 4797 void MacroAssembler::fat_nop() { 4798 // A 5 byte nop that is safe for patching (see patch_verified_entry) 4799 emit_byte(0x26); // es: 4800 emit_byte(0x2e); // cs: 4801 emit_byte(0x64); // fs: 4802 emit_byte(0x65); // gs: 4803 emit_byte(0x90); 4804 } 4805 4806 void MacroAssembler::jC2(Register tmp, Label& L) { 4807 // set parity bit if FPU flag C2 is set (via rax) 4808 save_rax(tmp); 4809 fwait(); fnstsw_ax(); 4810 sahf(); 4811 restore_rax(tmp); 4812 // branch 4813 jcc(Assembler::parity, L); 4814 } 4815 4816 void MacroAssembler::jnC2(Register tmp, Label& L) { 4817 // set parity bit if FPU flag C2 is set (via rax) 4818 save_rax(tmp); 4819 fwait(); fnstsw_ax(); 4820 sahf(); 4821 restore_rax(tmp); 4822 // branch 4823 jcc(Assembler::noParity, L); 4824 } 4825 4826 // 32bit can do a case table jump in one instruction but we no longer allow the base 4827 // to be installed in the Address class 4828 void MacroAssembler::jump(ArrayAddress entry) { 4829 jmp(as_Address(entry)); 4830 } 4831 4832 // Note: y_lo will be destroyed 4833 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 4834 // Long compare for Java (semantics as described in JVM spec.) 4835 Label high, low, done; 4836 4837 cmpl(x_hi, y_hi); 4838 jcc(Assembler::less, low); 4839 jcc(Assembler::greater, high); 4840 // x_hi is the return register 4841 xorl(x_hi, x_hi); 4842 cmpl(x_lo, y_lo); 4843 jcc(Assembler::below, low); 4844 jcc(Assembler::equal, done); 4845 4846 bind(high); 4847 xorl(x_hi, x_hi); 4848 increment(x_hi); 4849 jmp(done); 4850 4851 bind(low); 4852 xorl(x_hi, x_hi); 4853 decrementl(x_hi); 4854 4855 bind(done); 4856 } 4857 4858 void MacroAssembler::lea(Register dst, AddressLiteral src) { 4859 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 4860 } 4861 4862 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 4863 // leal(dst, as_Address(adr)); 4864 // see note in movl as to why we must use a move 4865 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 4866 } 4867 4868 void MacroAssembler::leave() { 4869 mov(rsp, rbp); 4870 pop(rbp); 4871 } 4872 4873 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 4874 // Multiplication of two Java long values stored on the stack 4875 // as illustrated below. Result is in rdx:rax. 4876 // 4877 // rsp ---> [ ?? ] \ \ 4878 // .... | y_rsp_offset | 4879 // [ y_lo ] / (in bytes) | x_rsp_offset 4880 // [ y_hi ] | (in bytes) 4881 // .... | 4882 // [ x_lo ] / 4883 // [ x_hi ] 4884 // .... 4885 // 4886 // Basic idea: lo(result) = lo(x_lo * y_lo) 4887 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 4888 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 4889 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 4890 Label quick; 4891 // load x_hi, y_hi and check if quick 4892 // multiplication is possible 4893 movl(rbx, x_hi); 4894 movl(rcx, y_hi); 4895 movl(rax, rbx); 4896 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 4897 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 4898 // do full multiplication 4899 // 1st step 4900 mull(y_lo); // x_hi * y_lo 4901 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 4902 // 2nd step 4903 movl(rax, x_lo); 4904 mull(rcx); // x_lo * y_hi 4905 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 4906 // 3rd step 4907 bind(quick); // note: rbx, = 0 if quick multiply! 4908 movl(rax, x_lo); 4909 mull(y_lo); // x_lo * y_lo 4910 addl(rdx, rbx); // correct hi(x_lo * y_lo) 4911 } 4912 4913 void MacroAssembler::lneg(Register hi, Register lo) { 4914 negl(lo); 4915 adcl(hi, 0); 4916 negl(hi); 4917 } 4918 4919 void MacroAssembler::lshl(Register hi, Register lo) { 4920 // Java shift left long support (semantics as described in JVM spec., p.305) 4921 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 4922 // shift value is in rcx ! 4923 assert(hi != rcx, "must not use rcx"); 4924 assert(lo != rcx, "must not use rcx"); 4925 const Register s = rcx; // shift count 4926 const int n = BitsPerWord; 4927 Label L; 4928 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 4929 cmpl(s, n); // if (s < n) 4930 jcc(Assembler::less, L); // else (s >= n) 4931 movl(hi, lo); // x := x << n 4932 xorl(lo, lo); 4933 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 4934 bind(L); // s (mod n) < n 4935 shldl(hi, lo); // x := x << s 4936 shll(lo); 4937 } 4938 4939 4940 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 4941 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 4942 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 4943 assert(hi != rcx, "must not use rcx"); 4944 assert(lo != rcx, "must not use rcx"); 4945 const Register s = rcx; // shift count 4946 const int n = BitsPerWord; 4947 Label L; 4948 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 4949 cmpl(s, n); // if (s < n) 4950 jcc(Assembler::less, L); // else (s >= n) 4951 movl(lo, hi); // x := x >> n 4952 if (sign_extension) sarl(hi, 31); 4953 else xorl(hi, hi); 4954 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 4955 bind(L); // s (mod n) < n 4956 shrdl(lo, hi); // x := x >> s 4957 if (sign_extension) sarl(hi); 4958 else shrl(hi); 4959 } 4960 4961 void MacroAssembler::movoop(Register dst, jobject obj) { 4962 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4963 } 4964 4965 void MacroAssembler::movoop(Address dst, jobject obj) { 4966 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4967 } 4968 4969 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 4970 if (src.is_lval()) { 4971 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 4972 } else { 4973 movl(dst, as_Address(src)); 4974 } 4975 } 4976 4977 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 4978 movl(as_Address(dst), src); 4979 } 4980 4981 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 4982 movl(dst, as_Address(src)); 4983 } 4984 4985 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 4986 void MacroAssembler::movptr(Address dst, intptr_t src) { 4987 movl(dst, src); 4988 } 4989 4990 4991 void MacroAssembler::pop_callee_saved_registers() { 4992 pop(rcx); 4993 pop(rdx); 4994 pop(rdi); 4995 pop(rsi); 4996 } 4997 4998 void MacroAssembler::pop_fTOS() { 4999 fld_d(Address(rsp, 0)); 5000 addl(rsp, 2 * wordSize); 5001 } 5002 5003 void MacroAssembler::push_callee_saved_registers() { 5004 push(rsi); 5005 push(rdi); 5006 push(rdx); 5007 push(rcx); 5008 } 5009 5010 void MacroAssembler::push_fTOS() { 5011 subl(rsp, 2 * wordSize); 5012 fstp_d(Address(rsp, 0)); 5013 } 5014 5015 5016 void MacroAssembler::pushoop(jobject obj) { 5017 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 5018 } 5019 5020 5021 void MacroAssembler::pushptr(AddressLiteral src) { 5022 if (src.is_lval()) { 5023 push_literal32((int32_t)src.target(), src.rspec()); 5024 } else { 5025 pushl(as_Address(src)); 5026 } 5027 } 5028 5029 void MacroAssembler::set_word_if_not_zero(Register dst) { 5030 xorl(dst, dst); 5031 set_byte_if_not_zero(dst); 5032 } 5033 5034 static void pass_arg0(MacroAssembler* masm, Register arg) { 5035 masm->push(arg); 5036 } 5037 5038 static void pass_arg1(MacroAssembler* masm, Register arg) { 5039 masm->push(arg); 5040 } 5041 5042 static void pass_arg2(MacroAssembler* masm, Register arg) { 5043 masm->push(arg); 5044 } 5045 5046 static void pass_arg3(MacroAssembler* masm, Register arg) { 5047 masm->push(arg); 5048 } 5049 5050 #ifndef PRODUCT 5051 extern "C" void findpc(intptr_t x); 5052 #endif 5053 5054 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 5055 // In order to get locks to work, we need to fake a in_VM state 5056 JavaThread* thread = JavaThread::current(); 5057 JavaThreadState saved_state = thread->thread_state(); 5058 thread->set_thread_state(_thread_in_vm); 5059 if (ShowMessageBoxOnError) { 5060 JavaThread* thread = JavaThread::current(); 5061 JavaThreadState saved_state = thread->thread_state(); 5062 thread->set_thread_state(_thread_in_vm); 5063 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5064 ttyLocker ttyl; 5065 BytecodeCounter::print(); 5066 } 5067 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5068 // This is the value of eip which points to where verify_oop will return. 5069 if (os::message_box(msg, "Execution stopped, print registers?")) { 5070 ttyLocker ttyl; 5071 tty->print_cr("eip = 0x%08x", eip); 5072 #ifndef PRODUCT 5073 if ((WizardMode || Verbose) && PrintMiscellaneous) { 5074 tty->cr(); 5075 findpc(eip); 5076 tty->cr(); 5077 } 5078 #endif 5079 tty->print_cr("rax = 0x%08x", rax); 5080 tty->print_cr("rbx = 0x%08x", rbx); 5081 tty->print_cr("rcx = 0x%08x", rcx); 5082 tty->print_cr("rdx = 0x%08x", rdx); 5083 tty->print_cr("rdi = 0x%08x", rdi); 5084 tty->print_cr("rsi = 0x%08x", rsi); 5085 tty->print_cr("rbp = 0x%08x", rbp); 5086 tty->print_cr("rsp = 0x%08x", rsp); 5087 BREAKPOINT; 5088 assert(false, "start up GDB"); 5089 } 5090 } else { 5091 ttyLocker ttyl; 5092 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 5093 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 5094 } 5095 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5096 } 5097 5098 void MacroAssembler::stop(const char* msg) { 5099 ExternalAddress message((address)msg); 5100 // push address of message 5101 pushptr(message.addr()); 5102 { Label L; call(L, relocInfo::none); bind(L); } // push eip 5103 pusha(); // push registers 5104 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 5105 hlt(); 5106 } 5107 5108 void MacroAssembler::warn(const char* msg) { 5109 push_CPU_state(); 5110 5111 ExternalAddress message((address) msg); 5112 // push address of message 5113 pushptr(message.addr()); 5114 5115 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 5116 addl(rsp, wordSize); // discard argument 5117 pop_CPU_state(); 5118 } 5119 5120 #else // _LP64 5121 5122 // 64 bit versions 5123 5124 Address MacroAssembler::as_Address(AddressLiteral adr) { 5125 // amd64 always does this as a pc-rel 5126 // we can be absolute or disp based on the instruction type 5127 // jmp/call are displacements others are absolute 5128 assert(!adr.is_lval(), "must be rval"); 5129 assert(reachable(adr), "must be"); 5130 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 5131 5132 } 5133 5134 Address MacroAssembler::as_Address(ArrayAddress adr) { 5135 AddressLiteral base = adr.base(); 5136 lea(rscratch1, base); 5137 Address index = adr.index(); 5138 assert(index._disp == 0, "must not have disp"); // maybe it can? 5139 Address array(rscratch1, index._index, index._scale, index._disp); 5140 return array; 5141 } 5142 5143 int MacroAssembler::biased_locking_enter(Register lock_reg, 5144 Register obj_reg, 5145 Register swap_reg, 5146 Register tmp_reg, 5147 bool swap_reg_contains_mark, 5148 Label& done, 5149 Label* slow_case, 5150 BiasedLockingCounters* counters) { 5151 assert(UseBiasedLocking, "why call this otherwise?"); 5152 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 5153 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 5154 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 5155 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 5156 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 5157 Address saved_mark_addr(lock_reg, 0); 5158 5159 if (PrintBiasedLockingStatistics && counters == NULL) 5160 counters = BiasedLocking::counters(); 5161 5162 // Biased locking 5163 // See whether the lock is currently biased toward our thread and 5164 // whether the epoch is still valid 5165 // Note that the runtime guarantees sufficient alignment of JavaThread 5166 // pointers to allow age to be placed into low bits 5167 // First check to see whether biasing is even enabled for this object 5168 Label cas_label; 5169 int null_check_offset = -1; 5170 if (!swap_reg_contains_mark) { 5171 null_check_offset = offset(); 5172 movq(swap_reg, mark_addr); 5173 } 5174 movq(tmp_reg, swap_reg); 5175 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5176 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 5177 jcc(Assembler::notEqual, cas_label); 5178 // The bias pattern is present in the object's header. Need to check 5179 // whether the bias owner and the epoch are both still current. 5180 load_prototype_header(tmp_reg, obj_reg); 5181 orq(tmp_reg, r15_thread); 5182 xorq(tmp_reg, swap_reg); 5183 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 5184 if (counters != NULL) { 5185 cond_inc32(Assembler::zero, 5186 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5187 } 5188 jcc(Assembler::equal, done); 5189 5190 Label try_revoke_bias; 5191 Label try_rebias; 5192 5193 // At this point we know that the header has the bias pattern and 5194 // that we are not the bias owner in the current epoch. We need to 5195 // figure out more details about the state of the header in order to 5196 // know what operations can be legally performed on the object's 5197 // header. 5198 5199 // If the low three bits in the xor result aren't clear, that means 5200 // the prototype header is no longer biased and we have to revoke 5201 // the bias on this object. 5202 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5203 jcc(Assembler::notZero, try_revoke_bias); 5204 5205 // Biasing is still enabled for this data type. See whether the 5206 // epoch of the current bias is still valid, meaning that the epoch 5207 // bits of the mark word are equal to the epoch bits of the 5208 // prototype header. (Note that the prototype header's epoch bits 5209 // only change at a safepoint.) If not, attempt to rebias the object 5210 // toward the current thread. Note that we must be absolutely sure 5211 // that the current epoch is invalid in order to do this because 5212 // otherwise the manipulations it performs on the mark word are 5213 // illegal. 5214 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 5215 jcc(Assembler::notZero, try_rebias); 5216 5217 // The epoch of the current bias is still valid but we know nothing 5218 // about the owner; it might be set or it might be clear. Try to 5219 // acquire the bias of the object using an atomic operation. If this 5220 // fails we will go in to the runtime to revoke the object's bias. 5221 // Note that we first construct the presumed unbiased header so we 5222 // don't accidentally blow away another thread's valid bias. 5223 andq(swap_reg, 5224 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 5225 movq(tmp_reg, swap_reg); 5226 orq(tmp_reg, r15_thread); 5227 if (os::is_MP()) { 5228 lock(); 5229 } 5230 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5231 // If the biasing toward our thread failed, this means that 5232 // another thread succeeded in biasing it toward itself and we 5233 // need to revoke that bias. The revocation will occur in the 5234 // interpreter runtime in the slow case. 5235 if (counters != NULL) { 5236 cond_inc32(Assembler::zero, 5237 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5238 } 5239 if (slow_case != NULL) { 5240 jcc(Assembler::notZero, *slow_case); 5241 } 5242 jmp(done); 5243 5244 bind(try_rebias); 5245 // At this point we know the epoch has expired, meaning that the 5246 // current "bias owner", if any, is actually invalid. Under these 5247 // circumstances _only_, we are allowed to use the current header's 5248 // value as the comparison value when doing the cas to acquire the 5249 // bias in the current epoch. In other words, we allow transfer of 5250 // the bias from one thread to another directly in this situation. 5251 // 5252 // FIXME: due to a lack of registers we currently blow away the age 5253 // bits in this situation. Should attempt to preserve them. 5254 load_prototype_header(tmp_reg, obj_reg); 5255 orq(tmp_reg, r15_thread); 5256 if (os::is_MP()) { 5257 lock(); 5258 } 5259 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5260 // If the biasing toward our thread failed, then another thread 5261 // succeeded in biasing it toward itself and we need to revoke that 5262 // bias. The revocation will occur in the runtime in the slow case. 5263 if (counters != NULL) { 5264 cond_inc32(Assembler::zero, 5265 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 5266 } 5267 if (slow_case != NULL) { 5268 jcc(Assembler::notZero, *slow_case); 5269 } 5270 jmp(done); 5271 5272 bind(try_revoke_bias); 5273 // The prototype mark in the klass doesn't have the bias bit set any 5274 // more, indicating that objects of this data type are not supposed 5275 // to be biased any more. We are going to try to reset the mark of 5276 // this object to the prototype value and fall through to the 5277 // CAS-based locking scheme. Note that if our CAS fails, it means 5278 // that another thread raced us for the privilege of revoking the 5279 // bias of this particular object, so it's okay to continue in the 5280 // normal locking code. 5281 // 5282 // FIXME: due to a lack of registers we currently blow away the age 5283 // bits in this situation. Should attempt to preserve them. 5284 load_prototype_header(tmp_reg, obj_reg); 5285 if (os::is_MP()) { 5286 lock(); 5287 } 5288 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5289 // Fall through to the normal CAS-based lock, because no matter what 5290 // the result of the above CAS, some thread must have succeeded in 5291 // removing the bias bit from the object's header. 5292 if (counters != NULL) { 5293 cond_inc32(Assembler::zero, 5294 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 5295 } 5296 5297 bind(cas_label); 5298 5299 return null_check_offset; 5300 } 5301 5302 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 5303 Label L, E; 5304 5305 #ifdef _WIN64 5306 // Windows always allocates space for it's register args 5307 assert(num_args <= 4, "only register arguments supported"); 5308 subq(rsp, frame::arg_reg_save_area_bytes); 5309 #endif 5310 5311 // Align stack if necessary 5312 testl(rsp, 15); 5313 jcc(Assembler::zero, L); 5314 5315 subq(rsp, 8); 5316 { 5317 call(RuntimeAddress(entry_point)); 5318 } 5319 addq(rsp, 8); 5320 jmp(E); 5321 5322 bind(L); 5323 { 5324 call(RuntimeAddress(entry_point)); 5325 } 5326 5327 bind(E); 5328 5329 #ifdef _WIN64 5330 // restore stack pointer 5331 addq(rsp, frame::arg_reg_save_area_bytes); 5332 #endif 5333 5334 } 5335 5336 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 5337 assert(!src2.is_lval(), "should use cmpptr"); 5338 5339 if (reachable(src2)) { 5340 cmpq(src1, as_Address(src2)); 5341 } else { 5342 lea(rscratch1, src2); 5343 Assembler::cmpq(src1, Address(rscratch1, 0)); 5344 } 5345 } 5346 5347 int MacroAssembler::corrected_idivq(Register reg) { 5348 // Full implementation of Java ldiv and lrem; checks for special 5349 // case as described in JVM spec., p.243 & p.271. The function 5350 // returns the (pc) offset of the idivl instruction - may be needed 5351 // for implicit exceptions. 5352 // 5353 // normal case special case 5354 // 5355 // input : rax: dividend min_long 5356 // reg: divisor (may not be eax/edx) -1 5357 // 5358 // output: rax: quotient (= rax idiv reg) min_long 5359 // rdx: remainder (= rax irem reg) 0 5360 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 5361 static const int64_t min_long = 0x8000000000000000; 5362 Label normal_case, special_case; 5363 5364 // check for special case 5365 cmp64(rax, ExternalAddress((address) &min_long)); 5366 jcc(Assembler::notEqual, normal_case); 5367 xorl(rdx, rdx); // prepare rdx for possible special case (where 5368 // remainder = 0) 5369 cmpq(reg, -1); 5370 jcc(Assembler::equal, special_case); 5371 5372 // handle normal case 5373 bind(normal_case); 5374 cdqq(); 5375 int idivq_offset = offset(); 5376 idivq(reg); 5377 5378 // normal and special case exit 5379 bind(special_case); 5380 5381 return idivq_offset; 5382 } 5383 5384 void MacroAssembler::decrementq(Register reg, int value) { 5385 if (value == min_jint) { subq(reg, value); return; } 5386 if (value < 0) { incrementq(reg, -value); return; } 5387 if (value == 0) { ; return; } 5388 if (value == 1 && UseIncDec) { decq(reg) ; return; } 5389 /* else */ { subq(reg, value) ; return; } 5390 } 5391 5392 void MacroAssembler::decrementq(Address dst, int value) { 5393 if (value == min_jint) { subq(dst, value); return; } 5394 if (value < 0) { incrementq(dst, -value); return; } 5395 if (value == 0) { ; return; } 5396 if (value == 1 && UseIncDec) { decq(dst) ; return; } 5397 /* else */ { subq(dst, value) ; return; } 5398 } 5399 5400 void MacroAssembler::fat_nop() { 5401 // A 5 byte nop that is safe for patching (see patch_verified_entry) 5402 // Recommened sequence from 'Software Optimization Guide for the AMD 5403 // Hammer Processor' 5404 emit_byte(0x66); 5405 emit_byte(0x66); 5406 emit_byte(0x90); 5407 emit_byte(0x66); 5408 emit_byte(0x90); 5409 } 5410 5411 void MacroAssembler::incrementq(Register reg, int value) { 5412 if (value == min_jint) { addq(reg, value); return; } 5413 if (value < 0) { decrementq(reg, -value); return; } 5414 if (value == 0) { ; return; } 5415 if (value == 1 && UseIncDec) { incq(reg) ; return; } 5416 /* else */ { addq(reg, value) ; return; } 5417 } 5418 5419 void MacroAssembler::incrementq(Address dst, int value) { 5420 if (value == min_jint) { addq(dst, value); return; } 5421 if (value < 0) { decrementq(dst, -value); return; } 5422 if (value == 0) { ; return; } 5423 if (value == 1 && UseIncDec) { incq(dst) ; return; } 5424 /* else */ { addq(dst, value) ; return; } 5425 } 5426 5427 // 32bit can do a case table jump in one instruction but we no longer allow the base 5428 // to be installed in the Address class 5429 void MacroAssembler::jump(ArrayAddress entry) { 5430 lea(rscratch1, entry.base()); 5431 Address dispatch = entry.index(); 5432 assert(dispatch._base == noreg, "must be"); 5433 dispatch._base = rscratch1; 5434 jmp(dispatch); 5435 } 5436 5437 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5438 ShouldNotReachHere(); // 64bit doesn't use two regs 5439 cmpq(x_lo, y_lo); 5440 } 5441 5442 void MacroAssembler::lea(Register dst, AddressLiteral src) { 5443 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5444 } 5445 5446 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5447 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 5448 movptr(dst, rscratch1); 5449 } 5450 5451 void MacroAssembler::leave() { 5452 // %%% is this really better? Why not on 32bit too? 5453 emit_byte(0xC9); // LEAVE 5454 } 5455 5456 void MacroAssembler::lneg(Register hi, Register lo) { 5457 ShouldNotReachHere(); // 64bit doesn't use two regs 5458 negq(lo); 5459 } 5460 5461 void MacroAssembler::movoop(Register dst, jobject obj) { 5462 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5463 } 5464 5465 void MacroAssembler::movoop(Address dst, jobject obj) { 5466 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5467 movq(dst, rscratch1); 5468 } 5469 5470 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5471 if (src.is_lval()) { 5472 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5473 } else { 5474 if (reachable(src)) { 5475 movq(dst, as_Address(src)); 5476 } else { 5477 lea(rscratch1, src); 5478 movq(dst, Address(rscratch1,0)); 5479 } 5480 } 5481 } 5482 5483 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5484 movq(as_Address(dst), src); 5485 } 5486 5487 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5488 movq(dst, as_Address(src)); 5489 } 5490 5491 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 5492 void MacroAssembler::movptr(Address dst, intptr_t src) { 5493 mov64(rscratch1, src); 5494 movq(dst, rscratch1); 5495 } 5496 5497 // These are mostly for initializing NULL 5498 void MacroAssembler::movptr(Address dst, int32_t src) { 5499 movslq(dst, src); 5500 } 5501 5502 void MacroAssembler::movptr(Register dst, int32_t src) { 5503 mov64(dst, (intptr_t)src); 5504 } 5505 5506 void MacroAssembler::pushoop(jobject obj) { 5507 movoop(rscratch1, obj); 5508 push(rscratch1); 5509 } 5510 5511 void MacroAssembler::pushptr(AddressLiteral src) { 5512 lea(rscratch1, src); 5513 if (src.is_lval()) { 5514 push(rscratch1); 5515 } else { 5516 pushq(Address(rscratch1, 0)); 5517 } 5518 } 5519 5520 void MacroAssembler::reset_last_Java_frame(bool clear_fp, 5521 bool clear_pc) { 5522 // we must set sp to zero to clear frame 5523 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 5524 // must clear fp, so that compiled frames are not confused; it is 5525 // possible that we need it only for debugging 5526 if (clear_fp) { 5527 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 5528 } 5529 5530 if (clear_pc) { 5531 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 5532 } 5533 } 5534 5535 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 5536 Register last_java_fp, 5537 address last_java_pc) { 5538 // determine last_java_sp register 5539 if (!last_java_sp->is_valid()) { 5540 last_java_sp = rsp; 5541 } 5542 5543 // last_java_fp is optional 5544 if (last_java_fp->is_valid()) { 5545 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 5546 last_java_fp); 5547 } 5548 5549 // last_java_pc is optional 5550 if (last_java_pc != NULL) { 5551 Address java_pc(r15_thread, 5552 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 5553 lea(rscratch1, InternalAddress(last_java_pc)); 5554 movptr(java_pc, rscratch1); 5555 } 5556 5557 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 5558 } 5559 5560 static void pass_arg0(MacroAssembler* masm, Register arg) { 5561 if (c_rarg0 != arg ) { 5562 masm->mov(c_rarg0, arg); 5563 } 5564 } 5565 5566 static void pass_arg1(MacroAssembler* masm, Register arg) { 5567 if (c_rarg1 != arg ) { 5568 masm->mov(c_rarg1, arg); 5569 } 5570 } 5571 5572 static void pass_arg2(MacroAssembler* masm, Register arg) { 5573 if (c_rarg2 != arg ) { 5574 masm->mov(c_rarg2, arg); 5575 } 5576 } 5577 5578 static void pass_arg3(MacroAssembler* masm, Register arg) { 5579 if (c_rarg3 != arg ) { 5580 masm->mov(c_rarg3, arg); 5581 } 5582 } 5583 5584 void MacroAssembler::stop(const char* msg) { 5585 address rip = pc(); 5586 pusha(); // get regs on stack 5587 lea(c_rarg0, ExternalAddress((address) msg)); 5588 lea(c_rarg1, InternalAddress(rip)); 5589 movq(c_rarg2, rsp); // pass pointer to regs array 5590 andq(rsp, -16); // align stack as required by ABI 5591 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 5592 hlt(); 5593 } 5594 5595 void MacroAssembler::warn(const char* msg) { 5596 push(rsp); 5597 andq(rsp, -16); // align stack as required by push_CPU_state and call 5598 5599 push_CPU_state(); // keeps alignment at 16 bytes 5600 lea(c_rarg0, ExternalAddress((address) msg)); 5601 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 5602 pop_CPU_state(); 5603 pop(rsp); 5604 } 5605 5606 #ifndef PRODUCT 5607 extern "C" void findpc(intptr_t x); 5608 #endif 5609 5610 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 5611 // In order to get locks to work, we need to fake a in_VM state 5612 if (ShowMessageBoxOnError ) { 5613 JavaThread* thread = JavaThread::current(); 5614 JavaThreadState saved_state = thread->thread_state(); 5615 thread->set_thread_state(_thread_in_vm); 5616 #ifndef PRODUCT 5617 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5618 ttyLocker ttyl; 5619 BytecodeCounter::print(); 5620 } 5621 #endif 5622 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5623 // XXX correct this offset for amd64 5624 // This is the value of eip which points to where verify_oop will return. 5625 if (os::message_box(msg, "Execution stopped, print registers?")) { 5626 ttyLocker ttyl; 5627 tty->print_cr("rip = 0x%016lx", pc); 5628 #ifndef PRODUCT 5629 tty->cr(); 5630 findpc(pc); 5631 tty->cr(); 5632 #endif 5633 tty->print_cr("rax = 0x%016lx", regs[15]); 5634 tty->print_cr("rbx = 0x%016lx", regs[12]); 5635 tty->print_cr("rcx = 0x%016lx", regs[14]); 5636 tty->print_cr("rdx = 0x%016lx", regs[13]); 5637 tty->print_cr("rdi = 0x%016lx", regs[8]); 5638 tty->print_cr("rsi = 0x%016lx", regs[9]); 5639 tty->print_cr("rbp = 0x%016lx", regs[10]); 5640 tty->print_cr("rsp = 0x%016lx", regs[11]); 5641 tty->print_cr("r8 = 0x%016lx", regs[7]); 5642 tty->print_cr("r9 = 0x%016lx", regs[6]); 5643 tty->print_cr("r10 = 0x%016lx", regs[5]); 5644 tty->print_cr("r11 = 0x%016lx", regs[4]); 5645 tty->print_cr("r12 = 0x%016lx", regs[3]); 5646 tty->print_cr("r13 = 0x%016lx", regs[2]); 5647 tty->print_cr("r14 = 0x%016lx", regs[1]); 5648 tty->print_cr("r15 = 0x%016lx", regs[0]); 5649 BREAKPOINT; 5650 } 5651 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5652 } else { 5653 ttyLocker ttyl; 5654 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 5655 msg); 5656 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 5657 } 5658 } 5659 5660 #endif // _LP64 5661 5662 // Now versions that are common to 32/64 bit 5663 5664 void MacroAssembler::addptr(Register dst, int32_t imm32) { 5665 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 5666 } 5667 5668 void MacroAssembler::addptr(Register dst, Register src) { 5669 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 5670 } 5671 5672 void MacroAssembler::addptr(Address dst, Register src) { 5673 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 5674 } 5675 5676 void MacroAssembler::align(int modulus) { 5677 if (offset() % modulus != 0) { 5678 nop(modulus - (offset() % modulus)); 5679 } 5680 } 5681 5682 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 5683 if (reachable(src)) { 5684 andpd(dst, as_Address(src)); 5685 } else { 5686 lea(rscratch1, src); 5687 andpd(dst, Address(rscratch1, 0)); 5688 } 5689 } 5690 5691 void MacroAssembler::andptr(Register dst, int32_t imm32) { 5692 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 5693 } 5694 5695 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 5696 pushf(); 5697 if (os::is_MP()) 5698 lock(); 5699 incrementl(counter_addr); 5700 popf(); 5701 } 5702 5703 // Writes to stack successive pages until offset reached to check for 5704 // stack overflow + shadow pages. This clobbers tmp. 5705 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 5706 movptr(tmp, rsp); 5707 // Bang stack for total size given plus shadow page size. 5708 // Bang one page at a time because large size can bang beyond yellow and 5709 // red zones. 5710 Label loop; 5711 bind(loop); 5712 movl(Address(tmp, (-os::vm_page_size())), size ); 5713 subptr(tmp, os::vm_page_size()); 5714 subl(size, os::vm_page_size()); 5715 jcc(Assembler::greater, loop); 5716 5717 // Bang down shadow pages too. 5718 // The -1 because we already subtracted 1 page. 5719 for (int i = 0; i< StackShadowPages-1; i++) { 5720 // this could be any sized move but this is can be a debugging crumb 5721 // so the bigger the better. 5722 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 5723 } 5724 } 5725 5726 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 5727 assert(UseBiasedLocking, "why call this otherwise?"); 5728 5729 // Check for biased locking unlock case, which is a no-op 5730 // Note: we do not have to check the thread ID for two reasons. 5731 // First, the interpreter checks for IllegalMonitorStateException at 5732 // a higher level. Second, if the bias was revoked while we held the 5733 // lock, the object could not be rebiased toward another thread, so 5734 // the bias bit would be clear. 5735 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 5736 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 5737 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 5738 jcc(Assembler::equal, done); 5739 } 5740 5741 void MacroAssembler::c2bool(Register x) { 5742 // implements x == 0 ? 0 : 1 5743 // note: must only look at least-significant byte of x 5744 // since C-style booleans are stored in one byte 5745 // only! (was bug) 5746 andl(x, 0xFF); 5747 setb(Assembler::notZero, x); 5748 } 5749 5750 // Wouldn't need if AddressLiteral version had new name 5751 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 5752 Assembler::call(L, rtype); 5753 } 5754 5755 void MacroAssembler::call(Register entry) { 5756 Assembler::call(entry); 5757 } 5758 5759 void MacroAssembler::call(AddressLiteral entry) { 5760 if (reachable(entry)) { 5761 Assembler::call_literal(entry.target(), entry.rspec()); 5762 } else { 5763 lea(rscratch1, entry); 5764 Assembler::call(rscratch1); 5765 } 5766 } 5767 5768 // Implementation of call_VM versions 5769 5770 void MacroAssembler::call_VM(Register oop_result, 5771 address entry_point, 5772 bool check_exceptions) { 5773 Label C, E; 5774 call(C, relocInfo::none); 5775 jmp(E); 5776 5777 bind(C); 5778 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 5779 ret(0); 5780 5781 bind(E); 5782 } 5783 5784 void MacroAssembler::call_VM(Register oop_result, 5785 address entry_point, 5786 Register arg_1, 5787 bool check_exceptions) { 5788 Label C, E; 5789 call(C, relocInfo::none); 5790 jmp(E); 5791 5792 bind(C); 5793 pass_arg1(this, arg_1); 5794 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 5795 ret(0); 5796 5797 bind(E); 5798 } 5799 5800 void MacroAssembler::call_VM(Register oop_result, 5801 address entry_point, 5802 Register arg_1, 5803 Register arg_2, 5804 bool check_exceptions) { 5805 Label C, E; 5806 call(C, relocInfo::none); 5807 jmp(E); 5808 5809 bind(C); 5810 5811 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5812 5813 pass_arg2(this, arg_2); 5814 pass_arg1(this, arg_1); 5815 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 5816 ret(0); 5817 5818 bind(E); 5819 } 5820 5821 void MacroAssembler::call_VM(Register oop_result, 5822 address entry_point, 5823 Register arg_1, 5824 Register arg_2, 5825 Register arg_3, 5826 bool check_exceptions) { 5827 Label C, E; 5828 call(C, relocInfo::none); 5829 jmp(E); 5830 5831 bind(C); 5832 5833 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 5834 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 5835 pass_arg3(this, arg_3); 5836 5837 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5838 pass_arg2(this, arg_2); 5839 5840 pass_arg1(this, arg_1); 5841 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 5842 ret(0); 5843 5844 bind(E); 5845 } 5846 5847 void MacroAssembler::call_VM(Register oop_result, 5848 Register last_java_sp, 5849 address entry_point, 5850 int number_of_arguments, 5851 bool check_exceptions) { 5852 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 5853 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 5854 } 5855 5856 void MacroAssembler::call_VM(Register oop_result, 5857 Register last_java_sp, 5858 address entry_point, 5859 Register arg_1, 5860 bool check_exceptions) { 5861 pass_arg1(this, arg_1); 5862 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 5863 } 5864 5865 void MacroAssembler::call_VM(Register oop_result, 5866 Register last_java_sp, 5867 address entry_point, 5868 Register arg_1, 5869 Register arg_2, 5870 bool check_exceptions) { 5871 5872 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5873 pass_arg2(this, arg_2); 5874 pass_arg1(this, arg_1); 5875 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 5876 } 5877 5878 void MacroAssembler::call_VM(Register oop_result, 5879 Register last_java_sp, 5880 address entry_point, 5881 Register arg_1, 5882 Register arg_2, 5883 Register arg_3, 5884 bool check_exceptions) { 5885 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 5886 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 5887 pass_arg3(this, arg_3); 5888 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5889 pass_arg2(this, arg_2); 5890 pass_arg1(this, arg_1); 5891 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 5892 } 5893 5894 void MacroAssembler::call_VM_base(Register oop_result, 5895 Register java_thread, 5896 Register last_java_sp, 5897 address entry_point, 5898 int number_of_arguments, 5899 bool check_exceptions) { 5900 // determine java_thread register 5901 if (!java_thread->is_valid()) { 5902 #ifdef _LP64 5903 java_thread = r15_thread; 5904 #else 5905 java_thread = rdi; 5906 get_thread(java_thread); 5907 #endif // LP64 5908 } 5909 // determine last_java_sp register 5910 if (!last_java_sp->is_valid()) { 5911 last_java_sp = rsp; 5912 } 5913 // debugging support 5914 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 5915 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 5916 #ifdef ASSERT 5917 LP64_ONLY(if (UseCompressedOops) verify_heapbase("call_VM_base");) 5918 #endif // ASSERT 5919 5920 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 5921 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 5922 5923 // push java thread (becomes first argument of C function) 5924 5925 NOT_LP64(push(java_thread); number_of_arguments++); 5926 LP64_ONLY(mov(c_rarg0, r15_thread)); 5927 5928 // set last Java frame before call 5929 assert(last_java_sp != rbp, "can't use ebp/rbp"); 5930 5931 // Only interpreter should have to set fp 5932 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 5933 5934 // do the call, remove parameters 5935 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 5936 5937 // restore the thread (cannot use the pushed argument since arguments 5938 // may be overwritten by C code generated by an optimizing compiler); 5939 // however can use the register value directly if it is callee saved. 5940 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 5941 // rdi & rsi (also r15) are callee saved -> nothing to do 5942 #ifdef ASSERT 5943 guarantee(java_thread != rax, "change this code"); 5944 push(rax); 5945 { Label L; 5946 get_thread(rax); 5947 cmpptr(java_thread, rax); 5948 jcc(Assembler::equal, L); 5949 stop("MacroAssembler::call_VM_base: rdi not callee saved?"); 5950 bind(L); 5951 } 5952 pop(rax); 5953 #endif 5954 } else { 5955 get_thread(java_thread); 5956 } 5957 // reset last Java frame 5958 // Only interpreter should have to clear fp 5959 reset_last_Java_frame(java_thread, true, false); 5960 5961 #ifndef CC_INTERP 5962 // C++ interp handles this in the interpreter 5963 check_and_handle_popframe(java_thread); 5964 check_and_handle_earlyret(java_thread); 5965 #endif /* CC_INTERP */ 5966 5967 if (check_exceptions) { 5968 // check for pending exceptions (java_thread is set upon return) 5969 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 5970 #ifndef _LP64 5971 jump_cc(Assembler::notEqual, 5972 RuntimeAddress(StubRoutines::forward_exception_entry())); 5973 #else 5974 // This used to conditionally jump to forward_exception however it is 5975 // possible if we relocate that the branch will not reach. So we must jump 5976 // around so we can always reach 5977 5978 Label ok; 5979 jcc(Assembler::equal, ok); 5980 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 5981 bind(ok); 5982 #endif // LP64 5983 } 5984 5985 // get oop result if there is one and reset the value in the thread 5986 if (oop_result->is_valid()) { 5987 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 5988 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 5989 verify_oop(oop_result, "broken oop in call_VM_base"); 5990 } 5991 } 5992 5993 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 5994 5995 // Calculate the value for last_Java_sp 5996 // somewhat subtle. call_VM does an intermediate call 5997 // which places a return address on the stack just under the 5998 // stack pointer as the user finsihed with it. This allows 5999 // use to retrieve last_Java_pc from last_Java_sp[-1]. 6000 // On 32bit we then have to push additional args on the stack to accomplish 6001 // the actual requested call. On 64bit call_VM only can use register args 6002 // so the only extra space is the return address that call_VM created. 6003 // This hopefully explains the calculations here. 6004 6005 #ifdef _LP64 6006 // We've pushed one address, correct last_Java_sp 6007 lea(rax, Address(rsp, wordSize)); 6008 #else 6009 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 6010 #endif // LP64 6011 6012 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 6013 6014 } 6015 6016 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 6017 call_VM_leaf_base(entry_point, number_of_arguments); 6018 } 6019 6020 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 6021 pass_arg0(this, arg_0); 6022 call_VM_leaf(entry_point, 1); 6023 } 6024 6025 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 6026 6027 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6028 pass_arg1(this, arg_1); 6029 pass_arg0(this, arg_0); 6030 call_VM_leaf(entry_point, 2); 6031 } 6032 6033 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 6034 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6035 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6036 pass_arg2(this, arg_2); 6037 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6038 pass_arg1(this, arg_1); 6039 pass_arg0(this, arg_0); 6040 call_VM_leaf(entry_point, 3); 6041 } 6042 6043 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 6044 pass_arg0(this, arg_0); 6045 MacroAssembler::call_VM_leaf_base(entry_point, 1); 6046 } 6047 6048 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 6049 6050 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6051 pass_arg1(this, arg_1); 6052 pass_arg0(this, arg_0); 6053 MacroAssembler::call_VM_leaf_base(entry_point, 2); 6054 } 6055 6056 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 6057 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6058 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6059 pass_arg2(this, arg_2); 6060 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6061 pass_arg1(this, arg_1); 6062 pass_arg0(this, arg_0); 6063 MacroAssembler::call_VM_leaf_base(entry_point, 3); 6064 } 6065 6066 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 6067 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); 6068 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6069 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6070 pass_arg3(this, arg_3); 6071 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6072 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6073 pass_arg2(this, arg_2); 6074 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6075 pass_arg1(this, arg_1); 6076 pass_arg0(this, arg_0); 6077 MacroAssembler::call_VM_leaf_base(entry_point, 4); 6078 } 6079 6080 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 6081 } 6082 6083 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 6084 } 6085 6086 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 6087 if (reachable(src1)) { 6088 cmpl(as_Address(src1), imm); 6089 } else { 6090 lea(rscratch1, src1); 6091 cmpl(Address(rscratch1, 0), imm); 6092 } 6093 } 6094 6095 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 6096 assert(!src2.is_lval(), "use cmpptr"); 6097 if (reachable(src2)) { 6098 cmpl(src1, as_Address(src2)); 6099 } else { 6100 lea(rscratch1, src2); 6101 cmpl(src1, Address(rscratch1, 0)); 6102 } 6103 } 6104 6105 void MacroAssembler::cmp32(Register src1, int32_t imm) { 6106 Assembler::cmpl(src1, imm); 6107 } 6108 6109 void MacroAssembler::cmp32(Register src1, Address src2) { 6110 Assembler::cmpl(src1, src2); 6111 } 6112 6113 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6114 ucomisd(opr1, opr2); 6115 6116 Label L; 6117 if (unordered_is_less) { 6118 movl(dst, -1); 6119 jcc(Assembler::parity, L); 6120 jcc(Assembler::below , L); 6121 movl(dst, 0); 6122 jcc(Assembler::equal , L); 6123 increment(dst); 6124 } else { // unordered is greater 6125 movl(dst, 1); 6126 jcc(Assembler::parity, L); 6127 jcc(Assembler::above , L); 6128 movl(dst, 0); 6129 jcc(Assembler::equal , L); 6130 decrementl(dst); 6131 } 6132 bind(L); 6133 } 6134 6135 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6136 ucomiss(opr1, opr2); 6137 6138 Label L; 6139 if (unordered_is_less) { 6140 movl(dst, -1); 6141 jcc(Assembler::parity, L); 6142 jcc(Assembler::below , L); 6143 movl(dst, 0); 6144 jcc(Assembler::equal , L); 6145 increment(dst); 6146 } else { // unordered is greater 6147 movl(dst, 1); 6148 jcc(Assembler::parity, L); 6149 jcc(Assembler::above , L); 6150 movl(dst, 0); 6151 jcc(Assembler::equal , L); 6152 decrementl(dst); 6153 } 6154 bind(L); 6155 } 6156 6157 6158 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 6159 if (reachable(src1)) { 6160 cmpb(as_Address(src1), imm); 6161 } else { 6162 lea(rscratch1, src1); 6163 cmpb(Address(rscratch1, 0), imm); 6164 } 6165 } 6166 6167 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 6168 #ifdef _LP64 6169 if (src2.is_lval()) { 6170 movptr(rscratch1, src2); 6171 Assembler::cmpq(src1, rscratch1); 6172 } else if (reachable(src2)) { 6173 cmpq(src1, as_Address(src2)); 6174 } else { 6175 lea(rscratch1, src2); 6176 Assembler::cmpq(src1, Address(rscratch1, 0)); 6177 } 6178 #else 6179 if (src2.is_lval()) { 6180 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 6181 } else { 6182 cmpl(src1, as_Address(src2)); 6183 } 6184 #endif // _LP64 6185 } 6186 6187 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 6188 assert(src2.is_lval(), "not a mem-mem compare"); 6189 #ifdef _LP64 6190 // moves src2's literal address 6191 movptr(rscratch1, src2); 6192 Assembler::cmpq(src1, rscratch1); 6193 #else 6194 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 6195 #endif // _LP64 6196 } 6197 6198 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 6199 if (reachable(adr)) { 6200 if (os::is_MP()) 6201 lock(); 6202 cmpxchgptr(reg, as_Address(adr)); 6203 } else { 6204 lea(rscratch1, adr); 6205 if (os::is_MP()) 6206 lock(); 6207 cmpxchgptr(reg, Address(rscratch1, 0)); 6208 } 6209 } 6210 6211 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 6212 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 6213 } 6214 6215 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 6216 if (reachable(src)) { 6217 comisd(dst, as_Address(src)); 6218 } else { 6219 lea(rscratch1, src); 6220 comisd(dst, Address(rscratch1, 0)); 6221 } 6222 } 6223 6224 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 6225 if (reachable(src)) { 6226 comiss(dst, as_Address(src)); 6227 } else { 6228 lea(rscratch1, src); 6229 comiss(dst, Address(rscratch1, 0)); 6230 } 6231 } 6232 6233 6234 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 6235 Condition negated_cond = negate_condition(cond); 6236 Label L; 6237 jcc(negated_cond, L); 6238 atomic_incl(counter_addr); 6239 bind(L); 6240 } 6241 6242 int MacroAssembler::corrected_idivl(Register reg) { 6243 // Full implementation of Java idiv and irem; checks for 6244 // special case as described in JVM spec., p.243 & p.271. 6245 // The function returns the (pc) offset of the idivl 6246 // instruction - may be needed for implicit exceptions. 6247 // 6248 // normal case special case 6249 // 6250 // input : rax,: dividend min_int 6251 // reg: divisor (may not be rax,/rdx) -1 6252 // 6253 // output: rax,: quotient (= rax, idiv reg) min_int 6254 // rdx: remainder (= rax, irem reg) 0 6255 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 6256 const int min_int = 0x80000000; 6257 Label normal_case, special_case; 6258 6259 // check for special case 6260 cmpl(rax, min_int); 6261 jcc(Assembler::notEqual, normal_case); 6262 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 6263 cmpl(reg, -1); 6264 jcc(Assembler::equal, special_case); 6265 6266 // handle normal case 6267 bind(normal_case); 6268 cdql(); 6269 int idivl_offset = offset(); 6270 idivl(reg); 6271 6272 // normal and special case exit 6273 bind(special_case); 6274 6275 return idivl_offset; 6276 } 6277 6278 6279 6280 void MacroAssembler::decrementl(Register reg, int value) { 6281 if (value == min_jint) {subl(reg, value) ; return; } 6282 if (value < 0) { incrementl(reg, -value); return; } 6283 if (value == 0) { ; return; } 6284 if (value == 1 && UseIncDec) { decl(reg) ; return; } 6285 /* else */ { subl(reg, value) ; return; } 6286 } 6287 6288 void MacroAssembler::decrementl(Address dst, int value) { 6289 if (value == min_jint) {subl(dst, value) ; return; } 6290 if (value < 0) { incrementl(dst, -value); return; } 6291 if (value == 0) { ; return; } 6292 if (value == 1 && UseIncDec) { decl(dst) ; return; } 6293 /* else */ { subl(dst, value) ; return; } 6294 } 6295 6296 void MacroAssembler::division_with_shift (Register reg, int shift_value) { 6297 assert (shift_value > 0, "illegal shift value"); 6298 Label _is_positive; 6299 testl (reg, reg); 6300 jcc (Assembler::positive, _is_positive); 6301 int offset = (1 << shift_value) - 1 ; 6302 6303 if (offset == 1) { 6304 incrementl(reg); 6305 } else { 6306 addl(reg, offset); 6307 } 6308 6309 bind (_is_positive); 6310 sarl(reg, shift_value); 6311 } 6312 6313 // !defined(COMPILER2) is because of stupid core builds 6314 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 6315 void MacroAssembler::empty_FPU_stack() { 6316 if (VM_Version::supports_mmx()) { 6317 emms(); 6318 } else { 6319 for (int i = 8; i-- > 0; ) ffree(i); 6320 } 6321 } 6322 #endif // !LP64 || C1 || !C2 6323 6324 6325 // Defines obj, preserves var_size_in_bytes 6326 void MacroAssembler::eden_allocate(Register obj, 6327 Register var_size_in_bytes, 6328 int con_size_in_bytes, 6329 Register t1, 6330 Label& slow_case) { 6331 assert(obj == rax, "obj must be in rax, for cmpxchg"); 6332 assert_different_registers(obj, var_size_in_bytes, t1); 6333 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 6334 jmp(slow_case); 6335 } else { 6336 Register end = t1; 6337 Label retry; 6338 bind(retry); 6339 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 6340 movptr(obj, heap_top); 6341 if (var_size_in_bytes == noreg) { 6342 lea(end, Address(obj, con_size_in_bytes)); 6343 } else { 6344 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 6345 } 6346 // if end < obj then we wrapped around => object too long => slow case 6347 cmpptr(end, obj); 6348 jcc(Assembler::below, slow_case); 6349 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 6350 jcc(Assembler::above, slow_case); 6351 // Compare obj with the top addr, and if still equal, store the new top addr in 6352 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 6353 // it otherwise. Use lock prefix for atomicity on MPs. 6354 locked_cmpxchgptr(end, heap_top); 6355 jcc(Assembler::notEqual, retry); 6356 } 6357 } 6358 6359 void MacroAssembler::enter() { 6360 push(rbp); 6361 mov(rbp, rsp); 6362 } 6363 6364 void MacroAssembler::fcmp(Register tmp) { 6365 fcmp(tmp, 1, true, true); 6366 } 6367 6368 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 6369 assert(!pop_right || pop_left, "usage error"); 6370 if (VM_Version::supports_cmov()) { 6371 assert(tmp == noreg, "unneeded temp"); 6372 if (pop_left) { 6373 fucomip(index); 6374 } else { 6375 fucomi(index); 6376 } 6377 if (pop_right) { 6378 fpop(); 6379 } 6380 } else { 6381 assert(tmp != noreg, "need temp"); 6382 if (pop_left) { 6383 if (pop_right) { 6384 fcompp(); 6385 } else { 6386 fcomp(index); 6387 } 6388 } else { 6389 fcom(index); 6390 } 6391 // convert FPU condition into eflags condition via rax, 6392 save_rax(tmp); 6393 fwait(); fnstsw_ax(); 6394 sahf(); 6395 restore_rax(tmp); 6396 } 6397 // condition codes set as follows: 6398 // 6399 // CF (corresponds to C0) if x < y 6400 // PF (corresponds to C2) if unordered 6401 // ZF (corresponds to C3) if x = y 6402 } 6403 6404 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 6405 fcmp2int(dst, unordered_is_less, 1, true, true); 6406 } 6407 6408 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 6409 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 6410 Label L; 6411 if (unordered_is_less) { 6412 movl(dst, -1); 6413 jcc(Assembler::parity, L); 6414 jcc(Assembler::below , L); 6415 movl(dst, 0); 6416 jcc(Assembler::equal , L); 6417 increment(dst); 6418 } else { // unordered is greater 6419 movl(dst, 1); 6420 jcc(Assembler::parity, L); 6421 jcc(Assembler::above , L); 6422 movl(dst, 0); 6423 jcc(Assembler::equal , L); 6424 decrementl(dst); 6425 } 6426 bind(L); 6427 } 6428 6429 void MacroAssembler::fld_d(AddressLiteral src) { 6430 fld_d(as_Address(src)); 6431 } 6432 6433 void MacroAssembler::fld_s(AddressLiteral src) { 6434 fld_s(as_Address(src)); 6435 } 6436 6437 void MacroAssembler::fld_x(AddressLiteral src) { 6438 Assembler::fld_x(as_Address(src)); 6439 } 6440 6441 void MacroAssembler::fldcw(AddressLiteral src) { 6442 Assembler::fldcw(as_Address(src)); 6443 } 6444 6445 void MacroAssembler::fpop() { 6446 ffree(); 6447 fincstp(); 6448 } 6449 6450 void MacroAssembler::fremr(Register tmp) { 6451 save_rax(tmp); 6452 { Label L; 6453 bind(L); 6454 fprem(); 6455 fwait(); fnstsw_ax(); 6456 #ifdef _LP64 6457 testl(rax, 0x400); 6458 jcc(Assembler::notEqual, L); 6459 #else 6460 sahf(); 6461 jcc(Assembler::parity, L); 6462 #endif // _LP64 6463 } 6464 restore_rax(tmp); 6465 // Result is in ST0. 6466 // Note: fxch & fpop to get rid of ST1 6467 // (otherwise FPU stack could overflow eventually) 6468 fxch(1); 6469 fpop(); 6470 } 6471 6472 6473 void MacroAssembler::incrementl(AddressLiteral dst) { 6474 if (reachable(dst)) { 6475 incrementl(as_Address(dst)); 6476 } else { 6477 lea(rscratch1, dst); 6478 incrementl(Address(rscratch1, 0)); 6479 } 6480 } 6481 6482 void MacroAssembler::incrementl(ArrayAddress dst) { 6483 incrementl(as_Address(dst)); 6484 } 6485 6486 void MacroAssembler::incrementl(Register reg, int value) { 6487 if (value == min_jint) {addl(reg, value) ; return; } 6488 if (value < 0) { decrementl(reg, -value); return; } 6489 if (value == 0) { ; return; } 6490 if (value == 1 && UseIncDec) { incl(reg) ; return; } 6491 /* else */ { addl(reg, value) ; return; } 6492 } 6493 6494 void MacroAssembler::incrementl(Address dst, int value) { 6495 if (value == min_jint) {addl(dst, value) ; return; } 6496 if (value < 0) { decrementl(dst, -value); return; } 6497 if (value == 0) { ; return; } 6498 if (value == 1 && UseIncDec) { incl(dst) ; return; } 6499 /* else */ { addl(dst, value) ; return; } 6500 } 6501 6502 void MacroAssembler::jump(AddressLiteral dst) { 6503 if (reachable(dst)) { 6504 jmp_literal(dst.target(), dst.rspec()); 6505 } else { 6506 lea(rscratch1, dst); 6507 jmp(rscratch1); 6508 } 6509 } 6510 6511 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 6512 if (reachable(dst)) { 6513 InstructionMark im(this); 6514 relocate(dst.reloc()); 6515 const int short_size = 2; 6516 const int long_size = 6; 6517 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos); 6518 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 6519 // 0111 tttn #8-bit disp 6520 emit_byte(0x70 | cc); 6521 emit_byte((offs - short_size) & 0xFF); 6522 } else { 6523 // 0000 1111 1000 tttn #32-bit disp 6524 emit_byte(0x0F); 6525 emit_byte(0x80 | cc); 6526 emit_long(offs - long_size); 6527 } 6528 } else { 6529 #ifdef ASSERT 6530 warning("reversing conditional branch"); 6531 #endif /* ASSERT */ 6532 Label skip; 6533 jccb(reverse[cc], skip); 6534 lea(rscratch1, dst); 6535 Assembler::jmp(rscratch1); 6536 bind(skip); 6537 } 6538 } 6539 6540 void MacroAssembler::ldmxcsr(AddressLiteral src) { 6541 if (reachable(src)) { 6542 Assembler::ldmxcsr(as_Address(src)); 6543 } else { 6544 lea(rscratch1, src); 6545 Assembler::ldmxcsr(Address(rscratch1, 0)); 6546 } 6547 } 6548 6549 int MacroAssembler::load_signed_byte(Register dst, Address src) { 6550 int off; 6551 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6552 off = offset(); 6553 movsbl(dst, src); // movsxb 6554 } else { 6555 off = load_unsigned_byte(dst, src); 6556 shll(dst, 24); 6557 sarl(dst, 24); 6558 } 6559 return off; 6560 } 6561 6562 // Note: load_signed_short used to be called load_signed_word. 6563 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler 6564 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. 6565 // The term "word" in HotSpot means a 32- or 64-bit machine word. 6566 int MacroAssembler::load_signed_short(Register dst, Address src) { 6567 int off; 6568 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6569 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 6570 // version but this is what 64bit has always done. This seems to imply 6571 // that users are only using 32bits worth. 6572 off = offset(); 6573 movswl(dst, src); // movsxw 6574 } else { 6575 off = load_unsigned_short(dst, src); 6576 shll(dst, 16); 6577 sarl(dst, 16); 6578 } 6579 return off; 6580 } 6581 6582 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 6583 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 6584 // and "3.9 Partial Register Penalties", p. 22). 6585 int off; 6586 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 6587 off = offset(); 6588 movzbl(dst, src); // movzxb 6589 } else { 6590 xorl(dst, dst); 6591 off = offset(); 6592 movb(dst, src); 6593 } 6594 return off; 6595 } 6596 6597 // Note: load_unsigned_short used to be called load_unsigned_word. 6598 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 6599 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 6600 // and "3.9 Partial Register Penalties", p. 22). 6601 int off; 6602 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 6603 off = offset(); 6604 movzwl(dst, src); // movzxw 6605 } else { 6606 xorl(dst, dst); 6607 off = offset(); 6608 movw(dst, src); 6609 } 6610 return off; 6611 } 6612 6613 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 6614 switch (size_in_bytes) { 6615 #ifndef _LP64 6616 case 8: 6617 assert(dst2 != noreg, "second dest register required"); 6618 movl(dst, src); 6619 movl(dst2, src.plus_disp(BytesPerInt)); 6620 break; 6621 #else 6622 case 8: movq(dst, src); break; 6623 #endif 6624 case 4: movl(dst, src); break; 6625 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 6626 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 6627 default: ShouldNotReachHere(); 6628 } 6629 } 6630 6631 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 6632 switch (size_in_bytes) { 6633 #ifndef _LP64 6634 case 8: 6635 assert(src2 != noreg, "second source register required"); 6636 movl(dst, src); 6637 movl(dst.plus_disp(BytesPerInt), src2); 6638 break; 6639 #else 6640 case 8: movq(dst, src); break; 6641 #endif 6642 case 4: movl(dst, src); break; 6643 case 2: movw(dst, src); break; 6644 case 1: movb(dst, src); break; 6645 default: ShouldNotReachHere(); 6646 } 6647 } 6648 6649 void MacroAssembler::mov32(AddressLiteral dst, Register src) { 6650 if (reachable(dst)) { 6651 movl(as_Address(dst), src); 6652 } else { 6653 lea(rscratch1, dst); 6654 movl(Address(rscratch1, 0), src); 6655 } 6656 } 6657 6658 void MacroAssembler::mov32(Register dst, AddressLiteral src) { 6659 if (reachable(src)) { 6660 movl(dst, as_Address(src)); 6661 } else { 6662 lea(rscratch1, src); 6663 movl(dst, Address(rscratch1, 0)); 6664 } 6665 } 6666 6667 // C++ bool manipulation 6668 6669 void MacroAssembler::movbool(Register dst, Address src) { 6670 if(sizeof(bool) == 1) 6671 movb(dst, src); 6672 else if(sizeof(bool) == 2) 6673 movw(dst, src); 6674 else if(sizeof(bool) == 4) 6675 movl(dst, src); 6676 else 6677 // unsupported 6678 ShouldNotReachHere(); 6679 } 6680 6681 void MacroAssembler::movbool(Address dst, bool boolconst) { 6682 if(sizeof(bool) == 1) 6683 movb(dst, (int) boolconst); 6684 else if(sizeof(bool) == 2) 6685 movw(dst, (int) boolconst); 6686 else if(sizeof(bool) == 4) 6687 movl(dst, (int) boolconst); 6688 else 6689 // unsupported 6690 ShouldNotReachHere(); 6691 } 6692 6693 void MacroAssembler::movbool(Address dst, Register src) { 6694 if(sizeof(bool) == 1) 6695 movb(dst, src); 6696 else if(sizeof(bool) == 2) 6697 movw(dst, src); 6698 else if(sizeof(bool) == 4) 6699 movl(dst, src); 6700 else 6701 // unsupported 6702 ShouldNotReachHere(); 6703 } 6704 6705 void MacroAssembler::movbyte(ArrayAddress dst, int src) { 6706 movb(as_Address(dst), src); 6707 } 6708 6709 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 6710 if (reachable(src)) { 6711 if (UseXmmLoadAndClearUpper) { 6712 movsd (dst, as_Address(src)); 6713 } else { 6714 movlpd(dst, as_Address(src)); 6715 } 6716 } else { 6717 lea(rscratch1, src); 6718 if (UseXmmLoadAndClearUpper) { 6719 movsd (dst, Address(rscratch1, 0)); 6720 } else { 6721 movlpd(dst, Address(rscratch1, 0)); 6722 } 6723 } 6724 } 6725 6726 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 6727 if (reachable(src)) { 6728 movss(dst, as_Address(src)); 6729 } else { 6730 lea(rscratch1, src); 6731 movss(dst, Address(rscratch1, 0)); 6732 } 6733 } 6734 6735 void MacroAssembler::movptr(Register dst, Register src) { 6736 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6737 } 6738 6739 void MacroAssembler::movptr(Register dst, Address src) { 6740 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6741 } 6742 6743 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 6744 void MacroAssembler::movptr(Register dst, intptr_t src) { 6745 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 6746 } 6747 6748 void MacroAssembler::movptr(Address dst, Register src) { 6749 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6750 } 6751 6752 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 6753 if (reachable(src)) { 6754 movss(dst, as_Address(src)); 6755 } else { 6756 lea(rscratch1, src); 6757 movss(dst, Address(rscratch1, 0)); 6758 } 6759 } 6760 6761 void MacroAssembler::null_check(Register reg, int offset) { 6762 if (needs_explicit_null_check(offset)) { 6763 // provoke OS NULL exception if reg = NULL by 6764 // accessing M[reg] w/o changing any (non-CC) registers 6765 // NOTE: cmpl is plenty here to provoke a segv 6766 cmpptr(rax, Address(reg, 0)); 6767 // Note: should probably use testl(rax, Address(reg, 0)); 6768 // may be shorter code (however, this version of 6769 // testl needs to be implemented first) 6770 } else { 6771 // nothing to do, (later) access of M[reg + offset] 6772 // will provoke OS NULL exception if reg = NULL 6773 } 6774 } 6775 6776 void MacroAssembler::os_breakpoint() { 6777 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 6778 // (e.g., MSVC can't call ps() otherwise) 6779 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 6780 } 6781 6782 void MacroAssembler::pop_CPU_state() { 6783 pop_FPU_state(); 6784 pop_IU_state(); 6785 } 6786 6787 void MacroAssembler::pop_FPU_state() { 6788 NOT_LP64(frstor(Address(rsp, 0));) 6789 LP64_ONLY(fxrstor(Address(rsp, 0));) 6790 addptr(rsp, FPUStateSizeInWords * wordSize); 6791 } 6792 6793 void MacroAssembler::pop_IU_state() { 6794 popa(); 6795 LP64_ONLY(addq(rsp, 8)); 6796 popf(); 6797 } 6798 6799 // Save Integer and Float state 6800 // Warning: Stack must be 16 byte aligned (64bit) 6801 void MacroAssembler::push_CPU_state() { 6802 push_IU_state(); 6803 push_FPU_state(); 6804 } 6805 6806 void MacroAssembler::push_FPU_state() { 6807 subptr(rsp, FPUStateSizeInWords * wordSize); 6808 #ifndef _LP64 6809 fnsave(Address(rsp, 0)); 6810 fwait(); 6811 #else 6812 fxsave(Address(rsp, 0)); 6813 #endif // LP64 6814 } 6815 6816 void MacroAssembler::push_IU_state() { 6817 // Push flags first because pusha kills them 6818 pushf(); 6819 // Make sure rsp stays 16-byte aligned 6820 LP64_ONLY(subq(rsp, 8)); 6821 pusha(); 6822 } 6823 6824 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 6825 // determine java_thread register 6826 if (!java_thread->is_valid()) { 6827 java_thread = rdi; 6828 get_thread(java_thread); 6829 } 6830 // we must set sp to zero to clear frame 6831 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 6832 if (clear_fp) { 6833 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 6834 } 6835 6836 if (clear_pc) 6837 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 6838 6839 } 6840 6841 void MacroAssembler::restore_rax(Register tmp) { 6842 if (tmp == noreg) pop(rax); 6843 else if (tmp != rax) mov(rax, tmp); 6844 } 6845 6846 void MacroAssembler::round_to(Register reg, int modulus) { 6847 addptr(reg, modulus - 1); 6848 andptr(reg, -modulus); 6849 } 6850 6851 void MacroAssembler::save_rax(Register tmp) { 6852 if (tmp == noreg) push(rax); 6853 else if (tmp != rax) mov(tmp, rax); 6854 } 6855 6856 // Write serialization page so VM thread can do a pseudo remote membar. 6857 // We use the current thread pointer to calculate a thread specific 6858 // offset to write to within the page. This minimizes bus traffic 6859 // due to cache line collision. 6860 void MacroAssembler::serialize_memory(Register thread, Register tmp) { 6861 movl(tmp, thread); 6862 shrl(tmp, os::get_serialize_page_shift_count()); 6863 andl(tmp, (os::vm_page_size() - sizeof(int))); 6864 6865 Address index(noreg, tmp, Address::times_1); 6866 ExternalAddress page(os::get_memory_serialize_page()); 6867 6868 // Size of store must match masking code above 6869 movl(as_Address(ArrayAddress(page, index)), tmp); 6870 } 6871 6872 // Calls to C land 6873 // 6874 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded 6875 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 6876 // has to be reset to 0. This is required to allow proper stack traversal. 6877 void MacroAssembler::set_last_Java_frame(Register java_thread, 6878 Register last_java_sp, 6879 Register last_java_fp, 6880 address last_java_pc) { 6881 // determine java_thread register 6882 if (!java_thread->is_valid()) { 6883 java_thread = rdi; 6884 get_thread(java_thread); 6885 } 6886 // determine last_java_sp register 6887 if (!last_java_sp->is_valid()) { 6888 last_java_sp = rsp; 6889 } 6890 6891 // last_java_fp is optional 6892 6893 if (last_java_fp->is_valid()) { 6894 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 6895 } 6896 6897 // last_java_pc is optional 6898 6899 if (last_java_pc != NULL) { 6900 lea(Address(java_thread, 6901 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 6902 InternalAddress(last_java_pc)); 6903 6904 } 6905 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 6906 } 6907 6908 void MacroAssembler::shlptr(Register dst, int imm8) { 6909 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 6910 } 6911 6912 void MacroAssembler::shrptr(Register dst, int imm8) { 6913 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 6914 } 6915 6916 void MacroAssembler::sign_extend_byte(Register reg) { 6917 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 6918 movsbl(reg, reg); // movsxb 6919 } else { 6920 shll(reg, 24); 6921 sarl(reg, 24); 6922 } 6923 } 6924 6925 void MacroAssembler::sign_extend_short(Register reg) { 6926 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6927 movswl(reg, reg); // movsxw 6928 } else { 6929 shll(reg, 16); 6930 sarl(reg, 16); 6931 } 6932 } 6933 6934 void MacroAssembler::testl(Register dst, AddressLiteral src) { 6935 assert(reachable(src), "Address should be reachable"); 6936 testl(dst, as_Address(src)); 6937 } 6938 6939 ////////////////////////////////////////////////////////////////////////////////// 6940 #ifndef SERIALGC 6941 6942 void MacroAssembler::g1_write_barrier_pre(Register obj, 6943 Register pre_val, 6944 Register thread, 6945 Register tmp, 6946 bool tosca_live, 6947 bool expand_call) { 6948 6949 // If expand_call is true then we expand the call_VM_leaf macro 6950 // directly to skip generating the check by 6951 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 6952 6953 #ifdef _LP64 6954 assert(thread == r15_thread, "must be"); 6955 #endif // _LP64 6956 6957 Label done; 6958 Label runtime; 6959 6960 assert(pre_val != noreg, "check this code"); 6961 6962 if (obj != noreg) { 6963 assert_different_registers(obj, pre_val, tmp); 6964 assert(pre_val != rax, "check this code"); 6965 } 6966 6967 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 6968 PtrQueue::byte_offset_of_active())); 6969 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 6970 PtrQueue::byte_offset_of_index())); 6971 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 6972 PtrQueue::byte_offset_of_buf())); 6973 6974 6975 // Is marking active? 6976 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 6977 cmpl(in_progress, 0); 6978 } else { 6979 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 6980 cmpb(in_progress, 0); 6981 } 6982 jcc(Assembler::equal, done); 6983 6984 // Do we need to load the previous value? 6985 if (obj != noreg) { 6986 load_heap_oop(pre_val, Address(obj, 0)); 6987 } 6988 6989 // Is the previous value null? 6990 cmpptr(pre_val, (int32_t) NULL_WORD); 6991 jcc(Assembler::equal, done); 6992 6993 // Can we store original value in the thread's buffer? 6994 // Is index == 0? 6995 // (The index field is typed as size_t.) 6996 6997 movptr(tmp, index); // tmp := *index_adr 6998 cmpptr(tmp, 0); // tmp == 0? 6999 jcc(Assembler::equal, runtime); // If yes, goto runtime 7000 7001 subptr(tmp, wordSize); // tmp := tmp - wordSize 7002 movptr(index, tmp); // *index_adr := tmp 7003 addptr(tmp, buffer); // tmp := tmp + *buffer_adr 7004 7005 // Record the previous value 7006 movptr(Address(tmp, 0), pre_val); 7007 jmp(done); 7008 7009 bind(runtime); 7010 // save the live input values 7011 if(tosca_live) push(rax); 7012 7013 if (obj != noreg && obj != rax) 7014 push(obj); 7015 7016 if (pre_val != rax) 7017 push(pre_val); 7018 7019 // Calling the runtime using the regular call_VM_leaf mechanism generates 7020 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 7021 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 7022 // 7023 // If we care generating the pre-barrier without a frame (e.g. in the 7024 // intrinsified Reference.get() routine) then ebp might be pointing to 7025 // the caller frame and so this check will most likely fail at runtime. 7026 // 7027 // Expanding the call directly bypasses the generation of the check. 7028 // So when we do not have have a full interpreter frame on the stack 7029 // expand_call should be passed true. 7030 7031 NOT_LP64( push(thread); ) 7032 7033 if (expand_call) { 7034 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 7035 pass_arg1(this, thread); 7036 pass_arg0(this, pre_val); 7037 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 7038 } else { 7039 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 7040 } 7041 7042 NOT_LP64( pop(thread); ) 7043 7044 // save the live input values 7045 if (pre_val != rax) 7046 pop(pre_val); 7047 7048 if (obj != noreg && obj != rax) 7049 pop(obj); 7050 7051 if(tosca_live) pop(rax); 7052 7053 bind(done); 7054 } 7055 7056 void MacroAssembler::g1_write_barrier_post(Register store_addr, 7057 Register new_val, 7058 Register thread, 7059 Register tmp, 7060 Register tmp2) { 7061 #ifdef _LP64 7062 assert(thread == r15_thread, "must be"); 7063 #endif // _LP64 7064 7065 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 7066 PtrQueue::byte_offset_of_index())); 7067 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 7068 PtrQueue::byte_offset_of_buf())); 7069 7070 BarrierSet* bs = Universe::heap()->barrier_set(); 7071 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 7072 Label done; 7073 Label runtime; 7074 7075 // Does store cross heap regions? 7076 7077 movptr(tmp, store_addr); 7078 xorptr(tmp, new_val); 7079 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 7080 jcc(Assembler::equal, done); 7081 7082 // crosses regions, storing NULL? 7083 7084 cmpptr(new_val, (int32_t) NULL_WORD); 7085 jcc(Assembler::equal, done); 7086 7087 // storing region crossing non-NULL, is card already dirty? 7088 7089 ExternalAddress cardtable((address) ct->byte_map_base); 7090 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 7091 #ifdef _LP64 7092 const Register card_addr = tmp; 7093 7094 movq(card_addr, store_addr); 7095 shrq(card_addr, CardTableModRefBS::card_shift); 7096 7097 lea(tmp2, cardtable); 7098 7099 // get the address of the card 7100 addq(card_addr, tmp2); 7101 #else 7102 const Register card_index = tmp; 7103 7104 movl(card_index, store_addr); 7105 shrl(card_index, CardTableModRefBS::card_shift); 7106 7107 Address index(noreg, card_index, Address::times_1); 7108 const Register card_addr = tmp; 7109 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 7110 #endif 7111 cmpb(Address(card_addr, 0), 0); 7112 jcc(Assembler::equal, done); 7113 7114 // storing a region crossing, non-NULL oop, card is clean. 7115 // dirty card and log. 7116 7117 movb(Address(card_addr, 0), 0); 7118 7119 cmpl(queue_index, 0); 7120 jcc(Assembler::equal, runtime); 7121 subl(queue_index, wordSize); 7122 movptr(tmp2, buffer); 7123 #ifdef _LP64 7124 movslq(rscratch1, queue_index); 7125 addq(tmp2, rscratch1); 7126 movq(Address(tmp2, 0), card_addr); 7127 #else 7128 addl(tmp2, queue_index); 7129 movl(Address(tmp2, 0), card_index); 7130 #endif 7131 jmp(done); 7132 7133 bind(runtime); 7134 // save the live input values 7135 push(store_addr); 7136 push(new_val); 7137 #ifdef _LP64 7138 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 7139 #else 7140 push(thread); 7141 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 7142 pop(thread); 7143 #endif 7144 pop(new_val); 7145 pop(store_addr); 7146 7147 bind(done); 7148 } 7149 7150 #endif // SERIALGC 7151 ////////////////////////////////////////////////////////////////////////////////// 7152 7153 7154 void MacroAssembler::store_check(Register obj) { 7155 // Does a store check for the oop in register obj. The content of 7156 // register obj is destroyed afterwards. 7157 store_check_part_1(obj); 7158 store_check_part_2(obj); 7159 } 7160 7161 void MacroAssembler::store_check(Register obj, Address dst) { 7162 store_check(obj); 7163 } 7164 7165 7166 // split the store check operation so that other instructions can be scheduled inbetween 7167 void MacroAssembler::store_check_part_1(Register obj) { 7168 BarrierSet* bs = Universe::heap()->barrier_set(); 7169 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 7170 shrptr(obj, CardTableModRefBS::card_shift); 7171 } 7172 7173 void MacroAssembler::store_check_part_2(Register obj) { 7174 BarrierSet* bs = Universe::heap()->barrier_set(); 7175 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 7176 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 7177 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 7178 7179 // The calculation for byte_map_base is as follows: 7180 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 7181 // So this essentially converts an address to a displacement and 7182 // it will never need to be relocated. On 64bit however the value may be too 7183 // large for a 32bit displacement 7184 7185 intptr_t disp = (intptr_t) ct->byte_map_base; 7186 if (is_simm32(disp)) { 7187 Address cardtable(noreg, obj, Address::times_1, disp); 7188 movb(cardtable, 0); 7189 } else { 7190 // By doing it as an ExternalAddress disp could be converted to a rip-relative 7191 // displacement and done in a single instruction given favorable mapping and 7192 // a smarter version of as_Address. Worst case it is two instructions which 7193 // is no worse off then loading disp into a register and doing as a simple 7194 // Address() as above. 7195 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 7196 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 7197 // in some cases we'll get a single instruction version. 7198 7199 ExternalAddress cardtable((address)disp); 7200 Address index(noreg, obj, Address::times_1); 7201 movb(as_Address(ArrayAddress(cardtable, index)), 0); 7202 } 7203 } 7204 7205 void MacroAssembler::subptr(Register dst, int32_t imm32) { 7206 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 7207 } 7208 7209 void MacroAssembler::subptr(Register dst, Register src) { 7210 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 7211 } 7212 7213 // C++ bool manipulation 7214 void MacroAssembler::testbool(Register dst) { 7215 if(sizeof(bool) == 1) 7216 testb(dst, 0xff); 7217 else if(sizeof(bool) == 2) { 7218 // testw implementation needed for two byte bools 7219 ShouldNotReachHere(); 7220 } else if(sizeof(bool) == 4) 7221 testl(dst, dst); 7222 else 7223 // unsupported 7224 ShouldNotReachHere(); 7225 } 7226 7227 void MacroAssembler::testptr(Register dst, Register src) { 7228 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 7229 } 7230 7231 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 7232 void MacroAssembler::tlab_allocate(Register obj, 7233 Register var_size_in_bytes, 7234 int con_size_in_bytes, 7235 Register t1, 7236 Register t2, 7237 Label& slow_case) { 7238 assert_different_registers(obj, t1, t2); 7239 assert_different_registers(obj, var_size_in_bytes, t1); 7240 Register end = t2; 7241 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 7242 7243 verify_tlab(); 7244 7245 NOT_LP64(get_thread(thread)); 7246 7247 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 7248 if (var_size_in_bytes == noreg) { 7249 lea(end, Address(obj, con_size_in_bytes)); 7250 } else { 7251 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 7252 } 7253 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 7254 jcc(Assembler::above, slow_case); 7255 7256 // update the tlab top pointer 7257 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 7258 7259 // recover var_size_in_bytes if necessary 7260 if (var_size_in_bytes == end) { 7261 subptr(var_size_in_bytes, obj); 7262 } 7263 verify_tlab(); 7264 } 7265 7266 // Preserves rbx, and rdx. 7267 Register MacroAssembler::tlab_refill(Label& retry, 7268 Label& try_eden, 7269 Label& slow_case) { 7270 Register top = rax; 7271 Register t1 = rcx; 7272 Register t2 = rsi; 7273 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 7274 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 7275 Label do_refill, discard_tlab; 7276 7277 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 7278 // No allocation in the shared eden. 7279 jmp(slow_case); 7280 } 7281 7282 NOT_LP64(get_thread(thread_reg)); 7283 7284 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 7285 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 7286 7287 // calculate amount of free space 7288 subptr(t1, top); 7289 shrptr(t1, LogHeapWordSize); 7290 7291 // Retain tlab and allocate object in shared space if 7292 // the amount free in the tlab is too large to discard. 7293 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 7294 jcc(Assembler::lessEqual, discard_tlab); 7295 7296 // Retain 7297 // %%% yuck as movptr... 7298 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 7299 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 7300 if (TLABStats) { 7301 // increment number of slow_allocations 7302 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 7303 } 7304 jmp(try_eden); 7305 7306 bind(discard_tlab); 7307 if (TLABStats) { 7308 // increment number of refills 7309 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 7310 // accumulate wastage -- t1 is amount free in tlab 7311 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 7312 } 7313 7314 // if tlab is currently allocated (top or end != null) then 7315 // fill [top, end + alignment_reserve) with array object 7316 testptr(top, top); 7317 jcc(Assembler::zero, do_refill); 7318 7319 // set up the mark word 7320 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 7321 // set the length to the remaining space 7322 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 7323 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 7324 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 7325 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 7326 // set klass to intArrayKlass 7327 // dubious reloc why not an oop reloc? 7328 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); 7329 // store klass last. concurrent gcs assumes klass length is valid if 7330 // klass field is not null. 7331 store_klass(top, t1); 7332 7333 movptr(t1, top); 7334 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 7335 incr_allocated_bytes(thread_reg, t1, 0); 7336 7337 // refill the tlab with an eden allocation 7338 bind(do_refill); 7339 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 7340 shlptr(t1, LogHeapWordSize); 7341 // allocate new tlab, address returned in top 7342 eden_allocate(top, t1, 0, t2, slow_case); 7343 7344 // Check that t1 was preserved in eden_allocate. 7345 #ifdef ASSERT 7346 if (UseTLAB) { 7347 Label ok; 7348 Register tsize = rsi; 7349 assert_different_registers(tsize, thread_reg, t1); 7350 push(tsize); 7351 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 7352 shlptr(tsize, LogHeapWordSize); 7353 cmpptr(t1, tsize); 7354 jcc(Assembler::equal, ok); 7355 stop("assert(t1 != tlab size)"); 7356 should_not_reach_here(); 7357 7358 bind(ok); 7359 pop(tsize); 7360 } 7361 #endif 7362 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 7363 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 7364 addptr(top, t1); 7365 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 7366 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 7367 verify_tlab(); 7368 jmp(retry); 7369 7370 return thread_reg; // for use by caller 7371 } 7372 7373 void MacroAssembler::incr_allocated_bytes(Register thread, 7374 Register var_size_in_bytes, 7375 int con_size_in_bytes, 7376 Register t1) { 7377 #ifdef _LP64 7378 if (var_size_in_bytes->is_valid()) { 7379 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 7380 } else { 7381 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 7382 } 7383 #else 7384 if (!thread->is_valid()) { 7385 assert(t1->is_valid(), "need temp reg"); 7386 thread = t1; 7387 get_thread(thread); 7388 } 7389 7390 if (var_size_in_bytes->is_valid()) { 7391 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 7392 } else { 7393 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 7394 } 7395 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); 7396 #endif 7397 } 7398 7399 static const double pi_4 = 0.7853981633974483; 7400 7401 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 7402 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 7403 // was attempted in this code; unfortunately it appears that the 7404 // switch to 80-bit precision and back causes this to be 7405 // unprofitable compared with simply performing a runtime call if 7406 // the argument is out of the (-pi/4, pi/4) range. 7407 7408 Register tmp = noreg; 7409 if (!VM_Version::supports_cmov()) { 7410 // fcmp needs a temporary so preserve rbx, 7411 tmp = rbx; 7412 push(tmp); 7413 } 7414 7415 Label slow_case, done; 7416 7417 ExternalAddress pi4_adr = (address)&pi_4; 7418 if (reachable(pi4_adr)) { 7419 // x ?<= pi/4 7420 fld_d(pi4_adr); 7421 fld_s(1); // Stack: X PI/4 X 7422 fabs(); // Stack: |X| PI/4 X 7423 fcmp(tmp); 7424 jcc(Assembler::above, slow_case); 7425 7426 // fastest case: -pi/4 <= x <= pi/4 7427 switch(trig) { 7428 case 's': 7429 fsin(); 7430 break; 7431 case 'c': 7432 fcos(); 7433 break; 7434 case 't': 7435 ftan(); 7436 break; 7437 default: 7438 assert(false, "bad intrinsic"); 7439 break; 7440 } 7441 jmp(done); 7442 } 7443 7444 // slow case: runtime call 7445 bind(slow_case); 7446 // Preserve registers across runtime call 7447 pusha(); 7448 int incoming_argument_and_return_value_offset = -1; 7449 if (num_fpu_regs_in_use > 1) { 7450 // Must preserve all other FPU regs (could alternatively convert 7451 // SharedRuntime::dsin and dcos into assembly routines known not to trash 7452 // FPU state, but can not trust C compiler) 7453 NEEDS_CLEANUP; 7454 // NOTE that in this case we also push the incoming argument to 7455 // the stack and restore it later; we also use this stack slot to 7456 // hold the return value from dsin or dcos. 7457 for (int i = 0; i < num_fpu_regs_in_use; i++) { 7458 subptr(rsp, sizeof(jdouble)); 7459 fstp_d(Address(rsp, 0)); 7460 } 7461 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 7462 fld_d(Address(rsp, incoming_argument_and_return_value_offset)); 7463 } 7464 subptr(rsp, sizeof(jdouble)); 7465 fstp_d(Address(rsp, 0)); 7466 #ifdef _LP64 7467 movdbl(xmm0, Address(rsp, 0)); 7468 #endif // _LP64 7469 7470 // NOTE: we must not use call_VM_leaf here because that requires a 7471 // complete interpreter frame in debug mode -- same bug as 4387334 7472 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 7473 // do proper 64bit abi 7474 7475 NEEDS_CLEANUP; 7476 // Need to add stack banging before this runtime call if it needs to 7477 // be taken; however, there is no generic stack banging routine at 7478 // the MacroAssembler level 7479 switch(trig) { 7480 case 's': 7481 { 7482 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0); 7483 } 7484 break; 7485 case 'c': 7486 { 7487 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0); 7488 } 7489 break; 7490 case 't': 7491 { 7492 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0); 7493 } 7494 break; 7495 default: 7496 assert(false, "bad intrinsic"); 7497 break; 7498 } 7499 #ifdef _LP64 7500 movsd(Address(rsp, 0), xmm0); 7501 fld_d(Address(rsp, 0)); 7502 #endif // _LP64 7503 addptr(rsp, sizeof(jdouble)); 7504 if (num_fpu_regs_in_use > 1) { 7505 // Must save return value to stack and then restore entire FPU stack 7506 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 7507 for (int i = 0; i < num_fpu_regs_in_use; i++) { 7508 fld_d(Address(rsp, 0)); 7509 addptr(rsp, sizeof(jdouble)); 7510 } 7511 } 7512 popa(); 7513 7514 // Come here with result in F-TOS 7515 bind(done); 7516 7517 if (tmp != noreg) { 7518 pop(tmp); 7519 } 7520 } 7521 7522 7523 // Look up the method for a megamorphic invokeinterface call. 7524 // The target method is determined by <intf_klass, itable_index>. 7525 // The receiver klass is in recv_klass. 7526 // On success, the result will be in method_result, and execution falls through. 7527 // On failure, execution transfers to the given label. 7528 void MacroAssembler::lookup_interface_method(Register recv_klass, 7529 Register intf_klass, 7530 RegisterOrConstant itable_index, 7531 Register method_result, 7532 Register scan_temp, 7533 Label& L_no_such_interface) { 7534 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 7535 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 7536 "caller must use same register for non-constant itable index as for method"); 7537 7538 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 7539 int vtable_base = instanceKlass::vtable_start_offset() * wordSize; 7540 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 7541 int scan_step = itableOffsetEntry::size() * wordSize; 7542 int vte_size = vtableEntry::size() * wordSize; 7543 Address::ScaleFactor times_vte_scale = Address::times_ptr; 7544 assert(vte_size == wordSize, "else adjust times_vte_scale"); 7545 7546 movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize)); 7547 7548 // %%% Could store the aligned, prescaled offset in the klassoop. 7549 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 7550 if (HeapWordsPerLong > 1) { 7551 // Round up to align_object_offset boundary 7552 // see code for instanceKlass::start_of_itable! 7553 round_to(scan_temp, BytesPerLong); 7554 } 7555 7556 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 7557 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 7558 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 7559 7560 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 7561 // if (scan->interface() == intf) { 7562 // result = (klass + scan->offset() + itable_index); 7563 // } 7564 // } 7565 Label search, found_method; 7566 7567 for (int peel = 1; peel >= 0; peel--) { 7568 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 7569 cmpptr(intf_klass, method_result); 7570 7571 if (peel) { 7572 jccb(Assembler::equal, found_method); 7573 } else { 7574 jccb(Assembler::notEqual, search); 7575 // (invert the test to fall through to found_method...) 7576 } 7577 7578 if (!peel) break; 7579 7580 bind(search); 7581 7582 // Check that the previous entry is non-null. A null entry means that 7583 // the receiver class doesn't implement the interface, and wasn't the 7584 // same as when the caller was compiled. 7585 testptr(method_result, method_result); 7586 jcc(Assembler::zero, L_no_such_interface); 7587 addptr(scan_temp, scan_step); 7588 } 7589 7590 bind(found_method); 7591 7592 // Got a hit. 7593 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 7594 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 7595 } 7596 7597 7598 void MacroAssembler::check_klass_subtype(Register sub_klass, 7599 Register super_klass, 7600 Register temp_reg, 7601 Label& L_success) { 7602 Label L_failure; 7603 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 7604 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 7605 bind(L_failure); 7606 } 7607 7608 7609 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 7610 Register super_klass, 7611 Register temp_reg, 7612 Label* L_success, 7613 Label* L_failure, 7614 Label* L_slow_path, 7615 RegisterOrConstant super_check_offset) { 7616 assert_different_registers(sub_klass, super_klass, temp_reg); 7617 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 7618 if (super_check_offset.is_register()) { 7619 assert_different_registers(sub_klass, super_klass, 7620 super_check_offset.as_register()); 7621 } else if (must_load_sco) { 7622 assert(temp_reg != noreg, "supply either a temp or a register offset"); 7623 } 7624 7625 Label L_fallthrough; 7626 int label_nulls = 0; 7627 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 7628 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 7629 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 7630 assert(label_nulls <= 1, "at most one NULL in the batch"); 7631 7632 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 7633 Klass::secondary_super_cache_offset_in_bytes()); 7634 int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 7635 Klass::super_check_offset_offset_in_bytes()); 7636 Address super_check_offset_addr(super_klass, sco_offset); 7637 7638 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 7639 // range of a jccb. If this routine grows larger, reconsider at 7640 // least some of these. 7641 #define local_jcc(assembler_cond, label) \ 7642 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 7643 else jcc( assembler_cond, label) /*omit semi*/ 7644 7645 // Hacked jmp, which may only be used just before L_fallthrough. 7646 #define final_jmp(label) \ 7647 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 7648 else jmp(label) /*omit semi*/ 7649 7650 // If the pointers are equal, we are done (e.g., String[] elements). 7651 // This self-check enables sharing of secondary supertype arrays among 7652 // non-primary types such as array-of-interface. Otherwise, each such 7653 // type would need its own customized SSA. 7654 // We move this check to the front of the fast path because many 7655 // type checks are in fact trivially successful in this manner, 7656 // so we get a nicely predicted branch right at the start of the check. 7657 cmpptr(sub_klass, super_klass); 7658 local_jcc(Assembler::equal, *L_success); 7659 7660 // Check the supertype display: 7661 if (must_load_sco) { 7662 // Positive movl does right thing on LP64. 7663 movl(temp_reg, super_check_offset_addr); 7664 super_check_offset = RegisterOrConstant(temp_reg); 7665 } 7666 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 7667 cmpptr(super_klass, super_check_addr); // load displayed supertype 7668 7669 // This check has worked decisively for primary supers. 7670 // Secondary supers are sought in the super_cache ('super_cache_addr'). 7671 // (Secondary supers are interfaces and very deeply nested subtypes.) 7672 // This works in the same check above because of a tricky aliasing 7673 // between the super_cache and the primary super display elements. 7674 // (The 'super_check_addr' can address either, as the case requires.) 7675 // Note that the cache is updated below if it does not help us find 7676 // what we need immediately. 7677 // So if it was a primary super, we can just fail immediately. 7678 // Otherwise, it's the slow path for us (no success at this point). 7679 7680 if (super_check_offset.is_register()) { 7681 local_jcc(Assembler::equal, *L_success); 7682 cmpl(super_check_offset.as_register(), sc_offset); 7683 if (L_failure == &L_fallthrough) { 7684 local_jcc(Assembler::equal, *L_slow_path); 7685 } else { 7686 local_jcc(Assembler::notEqual, *L_failure); 7687 final_jmp(*L_slow_path); 7688 } 7689 } else if (super_check_offset.as_constant() == sc_offset) { 7690 // Need a slow path; fast failure is impossible. 7691 if (L_slow_path == &L_fallthrough) { 7692 local_jcc(Assembler::equal, *L_success); 7693 } else { 7694 local_jcc(Assembler::notEqual, *L_slow_path); 7695 final_jmp(*L_success); 7696 } 7697 } else { 7698 // No slow path; it's a fast decision. 7699 if (L_failure == &L_fallthrough) { 7700 local_jcc(Assembler::equal, *L_success); 7701 } else { 7702 local_jcc(Assembler::notEqual, *L_failure); 7703 final_jmp(*L_success); 7704 } 7705 } 7706 7707 bind(L_fallthrough); 7708 7709 #undef local_jcc 7710 #undef final_jmp 7711 } 7712 7713 7714 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 7715 Register super_klass, 7716 Register temp_reg, 7717 Register temp2_reg, 7718 Label* L_success, 7719 Label* L_failure, 7720 bool set_cond_codes) { 7721 assert_different_registers(sub_klass, super_klass, temp_reg); 7722 if (temp2_reg != noreg) 7723 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 7724 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 7725 7726 Label L_fallthrough; 7727 int label_nulls = 0; 7728 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 7729 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 7730 assert(label_nulls <= 1, "at most one NULL in the batch"); 7731 7732 // a couple of useful fields in sub_klass: 7733 int ss_offset = (klassOopDesc::header_size() * HeapWordSize + 7734 Klass::secondary_supers_offset_in_bytes()); 7735 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 7736 Klass::secondary_super_cache_offset_in_bytes()); 7737 Address secondary_supers_addr(sub_klass, ss_offset); 7738 Address super_cache_addr( sub_klass, sc_offset); 7739 7740 // Do a linear scan of the secondary super-klass chain. 7741 // This code is rarely used, so simplicity is a virtue here. 7742 // The repne_scan instruction uses fixed registers, which we must spill. 7743 // Don't worry too much about pre-existing connections with the input regs. 7744 7745 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 7746 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 7747 7748 // Get super_klass value into rax (even if it was in rdi or rcx). 7749 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 7750 if (super_klass != rax || UseCompressedOops) { 7751 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 7752 mov(rax, super_klass); 7753 } 7754 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 7755 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 7756 7757 #ifndef PRODUCT 7758 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 7759 ExternalAddress pst_counter_addr((address) pst_counter); 7760 NOT_LP64( incrementl(pst_counter_addr) ); 7761 LP64_ONLY( lea(rcx, pst_counter_addr) ); 7762 LP64_ONLY( incrementl(Address(rcx, 0)) ); 7763 #endif //PRODUCT 7764 7765 // We will consult the secondary-super array. 7766 movptr(rdi, secondary_supers_addr); 7767 // Load the array length. (Positive movl does right thing on LP64.) 7768 movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 7769 // Skip to start of data. 7770 addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 7771 7772 // Scan RCX words at [RDI] for an occurrence of RAX. 7773 // Set NZ/Z based on last compare. 7774 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 7775 // not change flags (only scas instruction which is repeated sets flags). 7776 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 7777 #ifdef _LP64 7778 // This part is tricky, as values in supers array could be 32 or 64 bit wide 7779 // and we store values in objArrays always encoded, thus we need to encode 7780 // the value of rax before repne. Note that rax is dead after the repne. 7781 if (UseCompressedOops) { 7782 encode_heap_oop_not_null(rax); // Changes flags. 7783 // The superclass is never null; it would be a basic system error if a null 7784 // pointer were to sneak in here. Note that we have already loaded the 7785 // Klass::super_check_offset from the super_klass in the fast path, 7786 // so if there is a null in that register, we are already in the afterlife. 7787 testl(rax,rax); // Set Z = 0 7788 repne_scanl(); 7789 } else 7790 #endif // _LP64 7791 { 7792 testptr(rax,rax); // Set Z = 0 7793 repne_scan(); 7794 } 7795 // Unspill the temp. registers: 7796 if (pushed_rdi) pop(rdi); 7797 if (pushed_rcx) pop(rcx); 7798 if (pushed_rax) pop(rax); 7799 7800 if (set_cond_codes) { 7801 // Special hack for the AD files: rdi is guaranteed non-zero. 7802 assert(!pushed_rdi, "rdi must be left non-NULL"); 7803 // Also, the condition codes are properly set Z/NZ on succeed/failure. 7804 } 7805 7806 if (L_failure == &L_fallthrough) 7807 jccb(Assembler::notEqual, *L_failure); 7808 else jcc(Assembler::notEqual, *L_failure); 7809 7810 // Success. Cache the super we found and proceed in triumph. 7811 movptr(super_cache_addr, super_klass); 7812 7813 if (L_success != &L_fallthrough) { 7814 jmp(*L_success); 7815 } 7816 7817 #undef IS_A_TEMP 7818 7819 bind(L_fallthrough); 7820 } 7821 7822 7823 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 7824 ucomisd(dst, as_Address(src)); 7825 } 7826 7827 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 7828 ucomiss(dst, as_Address(src)); 7829 } 7830 7831 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 7832 if (reachable(src)) { 7833 xorpd(dst, as_Address(src)); 7834 } else { 7835 lea(rscratch1, src); 7836 xorpd(dst, Address(rscratch1, 0)); 7837 } 7838 } 7839 7840 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 7841 if (reachable(src)) { 7842 xorps(dst, as_Address(src)); 7843 } else { 7844 lea(rscratch1, src); 7845 xorps(dst, Address(rscratch1, 0)); 7846 } 7847 } 7848 7849 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 7850 if (VM_Version::supports_cmov()) { 7851 cmovl(cc, dst, src); 7852 } else { 7853 Label L; 7854 jccb(negate_condition(cc), L); 7855 movl(dst, src); 7856 bind(L); 7857 } 7858 } 7859 7860 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { 7861 if (VM_Version::supports_cmov()) { 7862 cmovl(cc, dst, src); 7863 } else { 7864 Label L; 7865 jccb(negate_condition(cc), L); 7866 movl(dst, src); 7867 bind(L); 7868 } 7869 } 7870 7871 void MacroAssembler::verify_oop(Register reg, const char* s) { 7872 if (!VerifyOops) return; 7873 7874 // Pass register number to verify_oop_subroutine 7875 char* b = new char[strlen(s) + 50]; 7876 sprintf(b, "verify_oop: %s: %s", reg->name(), s); 7877 #ifdef _LP64 7878 push(rscratch1); // save r10, trashed by movptr() 7879 #endif 7880 push(rax); // save rax, 7881 push(reg); // pass register argument 7882 ExternalAddress buffer((address) b); 7883 // avoid using pushptr, as it modifies scratch registers 7884 // and our contract is not to modify anything 7885 movptr(rax, buffer.addr()); 7886 push(rax); 7887 // call indirectly to solve generation ordering problem 7888 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 7889 call(rax); 7890 // Caller pops the arguments (oop, message) and restores rax, r10 7891 } 7892 7893 7894 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 7895 Register tmp, 7896 int offset) { 7897 intptr_t value = *delayed_value_addr; 7898 if (value != 0) 7899 return RegisterOrConstant(value + offset); 7900 7901 // load indirectly to solve generation ordering problem 7902 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 7903 7904 #ifdef ASSERT 7905 { Label L; 7906 testptr(tmp, tmp); 7907 if (WizardMode) { 7908 jcc(Assembler::notZero, L); 7909 char* buf = new char[40]; 7910 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 7911 stop(buf); 7912 } else { 7913 jccb(Assembler::notZero, L); 7914 hlt(); 7915 } 7916 bind(L); 7917 } 7918 #endif 7919 7920 if (offset != 0) 7921 addptr(tmp, offset); 7922 7923 return RegisterOrConstant(tmp); 7924 } 7925 7926 7927 // registers on entry: 7928 // - rax ('check' register): required MethodType 7929 // - rcx: method handle 7930 // - rdx, rsi, or ?: killable temp 7931 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg, 7932 Register temp_reg, 7933 Label& wrong_method_type) { 7934 Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)); 7935 // compare method type against that of the receiver 7936 if (UseCompressedOops) { 7937 load_heap_oop(temp_reg, type_addr); 7938 cmpptr(mtype_reg, temp_reg); 7939 } else { 7940 cmpptr(mtype_reg, type_addr); 7941 } 7942 jcc(Assembler::notEqual, wrong_method_type); 7943 } 7944 7945 7946 // A method handle has a "vmslots" field which gives the size of its 7947 // argument list in JVM stack slots. This field is either located directly 7948 // in every method handle, or else is indirectly accessed through the 7949 // method handle's MethodType. This macro hides the distinction. 7950 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg, 7951 Register temp_reg) { 7952 assert_different_registers(vmslots_reg, mh_reg, temp_reg); 7953 // load mh.type.form.vmslots 7954 if (java_lang_invoke_MethodHandle::vmslots_offset_in_bytes() != 0) { 7955 // hoist vmslots into every mh to avoid dependent load chain 7956 movl(vmslots_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmslots_offset_in_bytes, temp_reg))); 7957 } else { 7958 Register temp2_reg = vmslots_reg; 7959 load_heap_oop(temp2_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg))); 7960 load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg))); 7961 movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg))); 7962 } 7963 } 7964 7965 7966 // registers on entry: 7967 // - rcx: method handle 7968 // - rdx: killable temp (interpreted only) 7969 // - rax: killable temp (compiled only) 7970 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) { 7971 assert(mh_reg == rcx, "caller must put MH object in rcx"); 7972 assert_different_registers(mh_reg, temp_reg); 7973 7974 // pick out the interpreted side of the handler 7975 // NOTE: vmentry is not an oop! 7976 movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg))); 7977 7978 // off we go... 7979 jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes())); 7980 7981 // for the various stubs which take control at this point, 7982 // see MethodHandles::generate_method_handle_stub 7983 } 7984 7985 7986 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 7987 int extra_slot_offset) { 7988 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 7989 int stackElementSize = Interpreter::stackElementSize; 7990 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 7991 #ifdef ASSERT 7992 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 7993 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 7994 #endif 7995 Register scale_reg = noreg; 7996 Address::ScaleFactor scale_factor = Address::no_scale; 7997 if (arg_slot.is_constant()) { 7998 offset += arg_slot.as_constant() * stackElementSize; 7999 } else { 8000 scale_reg = arg_slot.as_register(); 8001 scale_factor = Address::times(stackElementSize); 8002 } 8003 offset += wordSize; // return PC is on stack 8004 return Address(rsp, scale_reg, scale_factor, offset); 8005 } 8006 8007 8008 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 8009 if (!VerifyOops) return; 8010 8011 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 8012 // Pass register number to verify_oop_subroutine 8013 char* b = new char[strlen(s) + 50]; 8014 sprintf(b, "verify_oop_addr: %s", s); 8015 8016 #ifdef _LP64 8017 push(rscratch1); // save r10, trashed by movptr() 8018 #endif 8019 push(rax); // save rax, 8020 // addr may contain rsp so we will have to adjust it based on the push 8021 // we just did (and on 64 bit we do two pushes) 8022 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 8023 // stores rax into addr which is backwards of what was intended. 8024 if (addr.uses(rsp)) { 8025 lea(rax, addr); 8026 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); 8027 } else { 8028 pushptr(addr); 8029 } 8030 8031 ExternalAddress buffer((address) b); 8032 // pass msg argument 8033 // avoid using pushptr, as it modifies scratch registers 8034 // and our contract is not to modify anything 8035 movptr(rax, buffer.addr()); 8036 push(rax); 8037 8038 // call indirectly to solve generation ordering problem 8039 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 8040 call(rax); 8041 // Caller pops the arguments (addr, message) and restores rax, r10. 8042 } 8043 8044 void MacroAssembler::verify_tlab() { 8045 #ifdef ASSERT 8046 if (UseTLAB && VerifyOops) { 8047 Label next, ok; 8048 Register t1 = rsi; 8049 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 8050 8051 push(t1); 8052 NOT_LP64(push(thread_reg)); 8053 NOT_LP64(get_thread(thread_reg)); 8054 8055 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8056 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 8057 jcc(Assembler::aboveEqual, next); 8058 stop("assert(top >= start)"); 8059 should_not_reach_here(); 8060 8061 bind(next); 8062 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 8063 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8064 jcc(Assembler::aboveEqual, ok); 8065 stop("assert(top <= end)"); 8066 should_not_reach_here(); 8067 8068 bind(ok); 8069 NOT_LP64(pop(thread_reg)); 8070 pop(t1); 8071 } 8072 #endif 8073 } 8074 8075 class ControlWord { 8076 public: 8077 int32_t _value; 8078 8079 int rounding_control() const { return (_value >> 10) & 3 ; } 8080 int precision_control() const { return (_value >> 8) & 3 ; } 8081 bool precision() const { return ((_value >> 5) & 1) != 0; } 8082 bool underflow() const { return ((_value >> 4) & 1) != 0; } 8083 bool overflow() const { return ((_value >> 3) & 1) != 0; } 8084 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 8085 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 8086 bool invalid() const { return ((_value >> 0) & 1) != 0; } 8087 8088 void print() const { 8089 // rounding control 8090 const char* rc; 8091 switch (rounding_control()) { 8092 case 0: rc = "round near"; break; 8093 case 1: rc = "round down"; break; 8094 case 2: rc = "round up "; break; 8095 case 3: rc = "chop "; break; 8096 }; 8097 // precision control 8098 const char* pc; 8099 switch (precision_control()) { 8100 case 0: pc = "24 bits "; break; 8101 case 1: pc = "reserved"; break; 8102 case 2: pc = "53 bits "; break; 8103 case 3: pc = "64 bits "; break; 8104 }; 8105 // flags 8106 char f[9]; 8107 f[0] = ' '; 8108 f[1] = ' '; 8109 f[2] = (precision ()) ? 'P' : 'p'; 8110 f[3] = (underflow ()) ? 'U' : 'u'; 8111 f[4] = (overflow ()) ? 'O' : 'o'; 8112 f[5] = (zero_divide ()) ? 'Z' : 'z'; 8113 f[6] = (denormalized()) ? 'D' : 'd'; 8114 f[7] = (invalid ()) ? 'I' : 'i'; 8115 f[8] = '\x0'; 8116 // output 8117 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 8118 } 8119 8120 }; 8121 8122 class StatusWord { 8123 public: 8124 int32_t _value; 8125 8126 bool busy() const { return ((_value >> 15) & 1) != 0; } 8127 bool C3() const { return ((_value >> 14) & 1) != 0; } 8128 bool C2() const { return ((_value >> 10) & 1) != 0; } 8129 bool C1() const { return ((_value >> 9) & 1) != 0; } 8130 bool C0() const { return ((_value >> 8) & 1) != 0; } 8131 int top() const { return (_value >> 11) & 7 ; } 8132 bool error_status() const { return ((_value >> 7) & 1) != 0; } 8133 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 8134 bool precision() const { return ((_value >> 5) & 1) != 0; } 8135 bool underflow() const { return ((_value >> 4) & 1) != 0; } 8136 bool overflow() const { return ((_value >> 3) & 1) != 0; } 8137 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 8138 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 8139 bool invalid() const { return ((_value >> 0) & 1) != 0; } 8140 8141 void print() const { 8142 // condition codes 8143 char c[5]; 8144 c[0] = (C3()) ? '3' : '-'; 8145 c[1] = (C2()) ? '2' : '-'; 8146 c[2] = (C1()) ? '1' : '-'; 8147 c[3] = (C0()) ? '0' : '-'; 8148 c[4] = '\x0'; 8149 // flags 8150 char f[9]; 8151 f[0] = (error_status()) ? 'E' : '-'; 8152 f[1] = (stack_fault ()) ? 'S' : '-'; 8153 f[2] = (precision ()) ? 'P' : '-'; 8154 f[3] = (underflow ()) ? 'U' : '-'; 8155 f[4] = (overflow ()) ? 'O' : '-'; 8156 f[5] = (zero_divide ()) ? 'Z' : '-'; 8157 f[6] = (denormalized()) ? 'D' : '-'; 8158 f[7] = (invalid ()) ? 'I' : '-'; 8159 f[8] = '\x0'; 8160 // output 8161 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 8162 } 8163 8164 }; 8165 8166 class TagWord { 8167 public: 8168 int32_t _value; 8169 8170 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 8171 8172 void print() const { 8173 printf("%04x", _value & 0xFFFF); 8174 } 8175 8176 }; 8177 8178 class FPU_Register { 8179 public: 8180 int32_t _m0; 8181 int32_t _m1; 8182 int16_t _ex; 8183 8184 bool is_indefinite() const { 8185 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 8186 } 8187 8188 void print() const { 8189 char sign = (_ex < 0) ? '-' : '+'; 8190 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 8191 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 8192 }; 8193 8194 }; 8195 8196 class FPU_State { 8197 public: 8198 enum { 8199 register_size = 10, 8200 number_of_registers = 8, 8201 register_mask = 7 8202 }; 8203 8204 ControlWord _control_word; 8205 StatusWord _status_word; 8206 TagWord _tag_word; 8207 int32_t _error_offset; 8208 int32_t _error_selector; 8209 int32_t _data_offset; 8210 int32_t _data_selector; 8211 int8_t _register[register_size * number_of_registers]; 8212 8213 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 8214 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 8215 8216 const char* tag_as_string(int tag) const { 8217 switch (tag) { 8218 case 0: return "valid"; 8219 case 1: return "zero"; 8220 case 2: return "special"; 8221 case 3: return "empty"; 8222 } 8223 ShouldNotReachHere(); 8224 return NULL; 8225 } 8226 8227 void print() const { 8228 // print computation registers 8229 { int t = _status_word.top(); 8230 for (int i = 0; i < number_of_registers; i++) { 8231 int j = (i - t) & register_mask; 8232 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 8233 st(j)->print(); 8234 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 8235 } 8236 } 8237 printf("\n"); 8238 // print control registers 8239 printf("ctrl = "); _control_word.print(); printf("\n"); 8240 printf("stat = "); _status_word .print(); printf("\n"); 8241 printf("tags = "); _tag_word .print(); printf("\n"); 8242 } 8243 8244 }; 8245 8246 class Flag_Register { 8247 public: 8248 int32_t _value; 8249 8250 bool overflow() const { return ((_value >> 11) & 1) != 0; } 8251 bool direction() const { return ((_value >> 10) & 1) != 0; } 8252 bool sign() const { return ((_value >> 7) & 1) != 0; } 8253 bool zero() const { return ((_value >> 6) & 1) != 0; } 8254 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 8255 bool parity() const { return ((_value >> 2) & 1) != 0; } 8256 bool carry() const { return ((_value >> 0) & 1) != 0; } 8257 8258 void print() const { 8259 // flags 8260 char f[8]; 8261 f[0] = (overflow ()) ? 'O' : '-'; 8262 f[1] = (direction ()) ? 'D' : '-'; 8263 f[2] = (sign ()) ? 'S' : '-'; 8264 f[3] = (zero ()) ? 'Z' : '-'; 8265 f[4] = (auxiliary_carry()) ? 'A' : '-'; 8266 f[5] = (parity ()) ? 'P' : '-'; 8267 f[6] = (carry ()) ? 'C' : '-'; 8268 f[7] = '\x0'; 8269 // output 8270 printf("%08x flags = %s", _value, f); 8271 } 8272 8273 }; 8274 8275 class IU_Register { 8276 public: 8277 int32_t _value; 8278 8279 void print() const { 8280 printf("%08x %11d", _value, _value); 8281 } 8282 8283 }; 8284 8285 class IU_State { 8286 public: 8287 Flag_Register _eflags; 8288 IU_Register _rdi; 8289 IU_Register _rsi; 8290 IU_Register _rbp; 8291 IU_Register _rsp; 8292 IU_Register _rbx; 8293 IU_Register _rdx; 8294 IU_Register _rcx; 8295 IU_Register _rax; 8296 8297 void print() const { 8298 // computation registers 8299 printf("rax, = "); _rax.print(); printf("\n"); 8300 printf("rbx, = "); _rbx.print(); printf("\n"); 8301 printf("rcx = "); _rcx.print(); printf("\n"); 8302 printf("rdx = "); _rdx.print(); printf("\n"); 8303 printf("rdi = "); _rdi.print(); printf("\n"); 8304 printf("rsi = "); _rsi.print(); printf("\n"); 8305 printf("rbp, = "); _rbp.print(); printf("\n"); 8306 printf("rsp = "); _rsp.print(); printf("\n"); 8307 printf("\n"); 8308 // control registers 8309 printf("flgs = "); _eflags.print(); printf("\n"); 8310 } 8311 }; 8312 8313 8314 class CPU_State { 8315 public: 8316 FPU_State _fpu_state; 8317 IU_State _iu_state; 8318 8319 void print() const { 8320 printf("--------------------------------------------------\n"); 8321 _iu_state .print(); 8322 printf("\n"); 8323 _fpu_state.print(); 8324 printf("--------------------------------------------------\n"); 8325 } 8326 8327 }; 8328 8329 8330 static void _print_CPU_state(CPU_State* state) { 8331 state->print(); 8332 }; 8333 8334 8335 void MacroAssembler::print_CPU_state() { 8336 push_CPU_state(); 8337 push(rsp); // pass CPU state 8338 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 8339 addptr(rsp, wordSize); // discard argument 8340 pop_CPU_state(); 8341 } 8342 8343 8344 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 8345 static int counter = 0; 8346 FPU_State* fs = &state->_fpu_state; 8347 counter++; 8348 // For leaf calls, only verify that the top few elements remain empty. 8349 // We only need 1 empty at the top for C2 code. 8350 if( stack_depth < 0 ) { 8351 if( fs->tag_for_st(7) != 3 ) { 8352 printf("FPR7 not empty\n"); 8353 state->print(); 8354 assert(false, "error"); 8355 return false; 8356 } 8357 return true; // All other stack states do not matter 8358 } 8359 8360 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 8361 "bad FPU control word"); 8362 8363 // compute stack depth 8364 int i = 0; 8365 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 8366 int d = i; 8367 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 8368 // verify findings 8369 if (i != FPU_State::number_of_registers) { 8370 // stack not contiguous 8371 printf("%s: stack not contiguous at ST%d\n", s, i); 8372 state->print(); 8373 assert(false, "error"); 8374 return false; 8375 } 8376 // check if computed stack depth corresponds to expected stack depth 8377 if (stack_depth < 0) { 8378 // expected stack depth is -stack_depth or less 8379 if (d > -stack_depth) { 8380 // too many elements on the stack 8381 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 8382 state->print(); 8383 assert(false, "error"); 8384 return false; 8385 } 8386 } else { 8387 // expected stack depth is stack_depth 8388 if (d != stack_depth) { 8389 // wrong stack depth 8390 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 8391 state->print(); 8392 assert(false, "error"); 8393 return false; 8394 } 8395 } 8396 // everything is cool 8397 return true; 8398 } 8399 8400 8401 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 8402 if (!VerifyFPU) return; 8403 push_CPU_state(); 8404 push(rsp); // pass CPU state 8405 ExternalAddress msg((address) s); 8406 // pass message string s 8407 pushptr(msg.addr()); 8408 push(stack_depth); // pass stack depth 8409 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 8410 addptr(rsp, 3 * wordSize); // discard arguments 8411 // check for error 8412 { Label L; 8413 testl(rax, rax); 8414 jcc(Assembler::notZero, L); 8415 int3(); // break if error condition 8416 bind(L); 8417 } 8418 pop_CPU_state(); 8419 } 8420 8421 void MacroAssembler::load_klass(Register dst, Register src) { 8422 #ifdef _LP64 8423 if (UseCompressedOops) { 8424 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8425 decode_heap_oop_not_null(dst); 8426 } else 8427 #endif 8428 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8429 } 8430 8431 void MacroAssembler::load_prototype_header(Register dst, Register src) { 8432 #ifdef _LP64 8433 if (UseCompressedOops) { 8434 assert (Universe::heap() != NULL, "java heap should be initialized"); 8435 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8436 if (Universe::narrow_oop_shift() != 0) { 8437 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8438 if (LogMinObjAlignmentInBytes == Address::times_8) { 8439 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8440 } else { 8441 // OK to use shift since we don't need to preserve flags. 8442 shlq(dst, LogMinObjAlignmentInBytes); 8443 movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8444 } 8445 } else { 8446 movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8447 } 8448 } else 8449 #endif 8450 { 8451 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8452 movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8453 } 8454 } 8455 8456 void MacroAssembler::store_klass(Register dst, Register src) { 8457 #ifdef _LP64 8458 if (UseCompressedOops) { 8459 encode_heap_oop_not_null(src); 8460 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 8461 } else 8462 #endif 8463 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 8464 } 8465 8466 void MacroAssembler::load_heap_oop(Register dst, Address src) { 8467 #ifdef _LP64 8468 if (UseCompressedOops) { 8469 movl(dst, src); 8470 decode_heap_oop(dst); 8471 } else 8472 #endif 8473 movptr(dst, src); 8474 } 8475 8476 // Doesn't do verfication, generates fixed size code 8477 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { 8478 #ifdef _LP64 8479 if (UseCompressedOops) { 8480 movl(dst, src); 8481 decode_heap_oop_not_null(dst); 8482 } else 8483 #endif 8484 movptr(dst, src); 8485 } 8486 8487 void MacroAssembler::store_heap_oop(Address dst, Register src) { 8488 #ifdef _LP64 8489 if (UseCompressedOops) { 8490 assert(!dst.uses(src), "not enough registers"); 8491 encode_heap_oop(src); 8492 movl(dst, src); 8493 } else 8494 #endif 8495 movptr(dst, src); 8496 } 8497 8498 // Used for storing NULLs. 8499 void MacroAssembler::store_heap_oop_null(Address dst) { 8500 #ifdef _LP64 8501 if (UseCompressedOops) { 8502 movl(dst, (int32_t)NULL_WORD); 8503 } else { 8504 movslq(dst, (int32_t)NULL_WORD); 8505 } 8506 #else 8507 movl(dst, (int32_t)NULL_WORD); 8508 #endif 8509 } 8510 8511 #ifdef _LP64 8512 void MacroAssembler::store_klass_gap(Register dst, Register src) { 8513 if (UseCompressedOops) { 8514 // Store to klass gap in destination 8515 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 8516 } 8517 } 8518 8519 #ifdef ASSERT 8520 void MacroAssembler::verify_heapbase(const char* msg) { 8521 assert (UseCompressedOops, "should be compressed"); 8522 assert (Universe::heap() != NULL, "java heap should be initialized"); 8523 if (CheckCompressedOops) { 8524 Label ok; 8525 push(rscratch1); // cmpptr trashes rscratch1 8526 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 8527 jcc(Assembler::equal, ok); 8528 stop(msg); 8529 bind(ok); 8530 pop(rscratch1); 8531 } 8532 } 8533 #endif 8534 8535 // Algorithm must match oop.inline.hpp encode_heap_oop. 8536 void MacroAssembler::encode_heap_oop(Register r) { 8537 #ifdef ASSERT 8538 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 8539 #endif 8540 verify_oop(r, "broken oop in encode_heap_oop"); 8541 if (Universe::narrow_oop_base() == NULL) { 8542 if (Universe::narrow_oop_shift() != 0) { 8543 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8544 shrq(r, LogMinObjAlignmentInBytes); 8545 } 8546 return; 8547 } 8548 testq(r, r); 8549 cmovq(Assembler::equal, r, r12_heapbase); 8550 subq(r, r12_heapbase); 8551 shrq(r, LogMinObjAlignmentInBytes); 8552 } 8553 8554 void MacroAssembler::encode_heap_oop_not_null(Register r) { 8555 #ifdef ASSERT 8556 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 8557 if (CheckCompressedOops) { 8558 Label ok; 8559 testq(r, r); 8560 jcc(Assembler::notEqual, ok); 8561 stop("null oop passed to encode_heap_oop_not_null"); 8562 bind(ok); 8563 } 8564 #endif 8565 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 8566 if (Universe::narrow_oop_base() != NULL) { 8567 subq(r, r12_heapbase); 8568 } 8569 if (Universe::narrow_oop_shift() != 0) { 8570 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8571 shrq(r, LogMinObjAlignmentInBytes); 8572 } 8573 } 8574 8575 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 8576 #ifdef ASSERT 8577 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 8578 if (CheckCompressedOops) { 8579 Label ok; 8580 testq(src, src); 8581 jcc(Assembler::notEqual, ok); 8582 stop("null oop passed to encode_heap_oop_not_null2"); 8583 bind(ok); 8584 } 8585 #endif 8586 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 8587 if (dst != src) { 8588 movq(dst, src); 8589 } 8590 if (Universe::narrow_oop_base() != NULL) { 8591 subq(dst, r12_heapbase); 8592 } 8593 if (Universe::narrow_oop_shift() != 0) { 8594 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8595 shrq(dst, LogMinObjAlignmentInBytes); 8596 } 8597 } 8598 8599 void MacroAssembler::decode_heap_oop(Register r) { 8600 #ifdef ASSERT 8601 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 8602 #endif 8603 if (Universe::narrow_oop_base() == NULL) { 8604 if (Universe::narrow_oop_shift() != 0) { 8605 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8606 shlq(r, LogMinObjAlignmentInBytes); 8607 } 8608 } else { 8609 Label done; 8610 shlq(r, LogMinObjAlignmentInBytes); 8611 jccb(Assembler::equal, done); 8612 addq(r, r12_heapbase); 8613 bind(done); 8614 } 8615 verify_oop(r, "broken oop in decode_heap_oop"); 8616 } 8617 8618 void MacroAssembler::decode_heap_oop_not_null(Register r) { 8619 // Note: it will change flags 8620 assert (UseCompressedOops, "should only be used for compressed headers"); 8621 assert (Universe::heap() != NULL, "java heap should be initialized"); 8622 // Cannot assert, unverified entry point counts instructions (see .ad file) 8623 // vtableStubs also counts instructions in pd_code_size_limit. 8624 // Also do not verify_oop as this is called by verify_oop. 8625 if (Universe::narrow_oop_shift() != 0) { 8626 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8627 shlq(r, LogMinObjAlignmentInBytes); 8628 if (Universe::narrow_oop_base() != NULL) { 8629 addq(r, r12_heapbase); 8630 } 8631 } else { 8632 assert (Universe::narrow_oop_base() == NULL, "sanity"); 8633 } 8634 } 8635 8636 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 8637 // Note: it will change flags 8638 assert (UseCompressedOops, "should only be used for compressed headers"); 8639 assert (Universe::heap() != NULL, "java heap should be initialized"); 8640 // Cannot assert, unverified entry point counts instructions (see .ad file) 8641 // vtableStubs also counts instructions in pd_code_size_limit. 8642 // Also do not verify_oop as this is called by verify_oop. 8643 if (Universe::narrow_oop_shift() != 0) { 8644 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8645 if (LogMinObjAlignmentInBytes == Address::times_8) { 8646 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 8647 } else { 8648 if (dst != src) { 8649 movq(dst, src); 8650 } 8651 shlq(dst, LogMinObjAlignmentInBytes); 8652 if (Universe::narrow_oop_base() != NULL) { 8653 addq(dst, r12_heapbase); 8654 } 8655 } 8656 } else { 8657 assert (Universe::narrow_oop_base() == NULL, "sanity"); 8658 if (dst != src) { 8659 movq(dst, src); 8660 } 8661 } 8662 } 8663 8664 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 8665 assert (UseCompressedOops, "should only be used for compressed headers"); 8666 assert (Universe::heap() != NULL, "java heap should be initialized"); 8667 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8668 int oop_index = oop_recorder()->find_index(obj); 8669 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8670 mov_narrow_oop(dst, oop_index, rspec); 8671 } 8672 8673 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 8674 assert (UseCompressedOops, "should only be used for compressed headers"); 8675 assert (Universe::heap() != NULL, "java heap should be initialized"); 8676 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8677 int oop_index = oop_recorder()->find_index(obj); 8678 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8679 mov_narrow_oop(dst, oop_index, rspec); 8680 } 8681 8682 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 8683 assert (UseCompressedOops, "should only be used for compressed headers"); 8684 assert (Universe::heap() != NULL, "java heap should be initialized"); 8685 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8686 int oop_index = oop_recorder()->find_index(obj); 8687 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8688 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 8689 } 8690 8691 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 8692 assert (UseCompressedOops, "should only be used for compressed headers"); 8693 assert (Universe::heap() != NULL, "java heap should be initialized"); 8694 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8695 int oop_index = oop_recorder()->find_index(obj); 8696 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8697 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 8698 } 8699 8700 void MacroAssembler::reinit_heapbase() { 8701 if (UseCompressedOops) { 8702 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 8703 } 8704 } 8705 #endif // _LP64 8706 8707 // IndexOf for constant substrings with size >= 8 chars 8708 // which don't need to be loaded through stack. 8709 void MacroAssembler::string_indexofC8(Register str1, Register str2, 8710 Register cnt1, Register cnt2, 8711 int int_cnt2, Register result, 8712 XMMRegister vec, Register tmp) { 8713 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 8714 8715 // This method uses pcmpestri inxtruction with bound registers 8716 // inputs: 8717 // xmm - substring 8718 // rax - substring length (elements count) 8719 // mem - scanned string 8720 // rdx - string length (elements count) 8721 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 8722 // outputs: 8723 // rcx - matched index in string 8724 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 8725 8726 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, 8727 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, 8728 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; 8729 8730 // Note, inline_string_indexOf() generates checks: 8731 // if (substr.count > string.count) return -1; 8732 // if (substr.count == 0) return 0; 8733 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); 8734 8735 // Load substring. 8736 movdqu(vec, Address(str2, 0)); 8737 movl(cnt2, int_cnt2); 8738 movptr(result, str1); // string addr 8739 8740 if (int_cnt2 > 8) { 8741 jmpb(SCAN_TO_SUBSTR); 8742 8743 // Reload substr for rescan, this code 8744 // is executed only for large substrings (> 8 chars) 8745 bind(RELOAD_SUBSTR); 8746 movdqu(vec, Address(str2, 0)); 8747 negptr(cnt2); // Jumped here with negative cnt2, convert to positive 8748 8749 bind(RELOAD_STR); 8750 // We came here after the beginning of the substring was 8751 // matched but the rest of it was not so we need to search 8752 // again. Start from the next element after the previous match. 8753 8754 // cnt2 is number of substring reminding elements and 8755 // cnt1 is number of string reminding elements when cmp failed. 8756 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 8757 subl(cnt1, cnt2); 8758 addl(cnt1, int_cnt2); 8759 movl(cnt2, int_cnt2); // Now restore cnt2 8760 8761 decrementl(cnt1); // Shift to next element 8762 cmpl(cnt1, cnt2); 8763 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 8764 8765 addptr(result, 2); 8766 8767 } // (int_cnt2 > 8) 8768 8769 // Scan string for start of substr in 16-byte vectors 8770 bind(SCAN_TO_SUBSTR); 8771 pcmpestri(vec, Address(result, 0), 0x0d); 8772 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 8773 subl(cnt1, 8); 8774 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 8775 cmpl(cnt1, cnt2); 8776 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 8777 addptr(result, 16); 8778 jmpb(SCAN_TO_SUBSTR); 8779 8780 // Found a potential substr 8781 bind(FOUND_CANDIDATE); 8782 // Matched whole vector if first element matched (tmp(rcx) == 0). 8783 if (int_cnt2 == 8) { 8784 jccb(Assembler::overflow, RET_FOUND); // OF == 1 8785 } else { // int_cnt2 > 8 8786 jccb(Assembler::overflow, FOUND_SUBSTR); 8787 } 8788 // After pcmpestri tmp(rcx) contains matched element index 8789 // Compute start addr of substr 8790 lea(result, Address(result, tmp, Address::times_2)); 8791 8792 // Make sure string is still long enough 8793 subl(cnt1, tmp); 8794 cmpl(cnt1, cnt2); 8795 if (int_cnt2 == 8) { 8796 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 8797 } else { // int_cnt2 > 8 8798 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); 8799 } 8800 // Left less then substring. 8801 8802 bind(RET_NOT_FOUND); 8803 movl(result, -1); 8804 jmpb(EXIT); 8805 8806 if (int_cnt2 > 8) { 8807 // This code is optimized for the case when whole substring 8808 // is matched if its head is matched. 8809 bind(MATCH_SUBSTR_HEAD); 8810 pcmpestri(vec, Address(result, 0), 0x0d); 8811 // Reload only string if does not match 8812 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 8813 8814 Label CONT_SCAN_SUBSTR; 8815 // Compare the rest of substring (> 8 chars). 8816 bind(FOUND_SUBSTR); 8817 // First 8 chars are already matched. 8818 negptr(cnt2); 8819 addptr(cnt2, 8); 8820 8821 bind(SCAN_SUBSTR); 8822 subl(cnt1, 8); 8823 cmpl(cnt2, -8); // Do not read beyond substring 8824 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); 8825 // Back-up strings to avoid reading beyond substring: 8826 // cnt1 = cnt1 - cnt2 + 8 8827 addl(cnt1, cnt2); // cnt2 is negative 8828 addl(cnt1, 8); 8829 movl(cnt2, 8); negptr(cnt2); 8830 bind(CONT_SCAN_SUBSTR); 8831 if (int_cnt2 < (int)G) { 8832 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); 8833 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); 8834 } else { 8835 // calculate index in register to avoid integer overflow (int_cnt2*2) 8836 movl(tmp, int_cnt2); 8837 addptr(tmp, cnt2); 8838 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); 8839 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 8840 } 8841 // Need to reload strings pointers if not matched whole vector 8842 jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 8843 addptr(cnt2, 8); 8844 jccb(Assembler::negative, SCAN_SUBSTR); 8845 // Fall through if found full substring 8846 8847 } // (int_cnt2 > 8) 8848 8849 bind(RET_FOUND); 8850 // Found result if we matched full small substring. 8851 // Compute substr offset 8852 subptr(result, str1); 8853 shrl(result, 1); // index 8854 bind(EXIT); 8855 8856 } // string_indexofC8 8857 8858 // Small strings are loaded through stack if they cross page boundary. 8859 void MacroAssembler::string_indexof(Register str1, Register str2, 8860 Register cnt1, Register cnt2, 8861 int int_cnt2, Register result, 8862 XMMRegister vec, Register tmp) { 8863 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 8864 // 8865 // int_cnt2 is length of small (< 8 chars) constant substring 8866 // or (-1) for non constant substring in which case its length 8867 // is in cnt2 register. 8868 // 8869 // Note, inline_string_indexOf() generates checks: 8870 // if (substr.count > string.count) return -1; 8871 // if (substr.count == 0) return 0; 8872 // 8873 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); 8874 8875 // This method uses pcmpestri inxtruction with bound registers 8876 // inputs: 8877 // xmm - substring 8878 // rax - substring length (elements count) 8879 // mem - scanned string 8880 // rdx - string length (elements count) 8881 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 8882 // outputs: 8883 // rcx - matched index in string 8884 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 8885 8886 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, 8887 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, 8888 FOUND_CANDIDATE; 8889 8890 { //======================================================== 8891 // We don't know where these strings are located 8892 // and we can't read beyond them. Load them through stack. 8893 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; 8894 8895 movptr(tmp, rsp); // save old SP 8896 8897 if (int_cnt2 > 0) { // small (< 8 chars) constant substring 8898 if (int_cnt2 == 1) { // One char 8899 load_unsigned_short(result, Address(str2, 0)); 8900 movdl(vec, result); // move 32 bits 8901 } else if (int_cnt2 == 2) { // Two chars 8902 movdl(vec, Address(str2, 0)); // move 32 bits 8903 } else if (int_cnt2 == 4) { // Four chars 8904 movq(vec, Address(str2, 0)); // move 64 bits 8905 } else { // cnt2 = { 3, 5, 6, 7 } 8906 // Array header size is 12 bytes in 32-bit VM 8907 // + 6 bytes for 3 chars == 18 bytes, 8908 // enough space to load vec and shift. 8909 assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity"); 8910 movdqu(vec, Address(str2, (int_cnt2*2)-16)); 8911 psrldq(vec, 16-(int_cnt2*2)); 8912 } 8913 } else { // not constant substring 8914 cmpl(cnt2, 8); 8915 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough 8916 8917 // We can read beyond string if srt+16 does not cross page boundary 8918 // since heaps are aligned and mapped by pages. 8919 assert(os::vm_page_size() < (int)G, "default page should be small"); 8920 movl(result, str2); // We need only low 32 bits 8921 andl(result, (os::vm_page_size()-1)); 8922 cmpl(result, (os::vm_page_size()-16)); 8923 jccb(Assembler::belowEqual, CHECK_STR); 8924 8925 // Move small strings to stack to allow load 16 bytes into vec. 8926 subptr(rsp, 16); 8927 int stk_offset = wordSize-2; 8928 push(cnt2); 8929 8930 bind(COPY_SUBSTR); 8931 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); 8932 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 8933 decrement(cnt2); 8934 jccb(Assembler::notZero, COPY_SUBSTR); 8935 8936 pop(cnt2); 8937 movptr(str2, rsp); // New substring address 8938 } // non constant 8939 8940 bind(CHECK_STR); 8941 cmpl(cnt1, 8); 8942 jccb(Assembler::aboveEqual, BIG_STRINGS); 8943 8944 // Check cross page boundary. 8945 movl(result, str1); // We need only low 32 bits 8946 andl(result, (os::vm_page_size()-1)); 8947 cmpl(result, (os::vm_page_size()-16)); 8948 jccb(Assembler::belowEqual, BIG_STRINGS); 8949 8950 subptr(rsp, 16); 8951 int stk_offset = -2; 8952 if (int_cnt2 < 0) { // not constant 8953 push(cnt2); 8954 stk_offset += wordSize; 8955 } 8956 movl(cnt2, cnt1); 8957 8958 bind(COPY_STR); 8959 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); 8960 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 8961 decrement(cnt2); 8962 jccb(Assembler::notZero, COPY_STR); 8963 8964 if (int_cnt2 < 0) { // not constant 8965 pop(cnt2); 8966 } 8967 movptr(str1, rsp); // New string address 8968 8969 bind(BIG_STRINGS); 8970 // Load substring. 8971 if (int_cnt2 < 0) { // -1 8972 movdqu(vec, Address(str2, 0)); 8973 push(cnt2); // substr count 8974 push(str2); // substr addr 8975 push(str1); // string addr 8976 } else { 8977 // Small (< 8 chars) constant substrings are loaded already. 8978 movl(cnt2, int_cnt2); 8979 } 8980 push(tmp); // original SP 8981 8982 } // Finished loading 8983 8984 //======================================================== 8985 // Start search 8986 // 8987 8988 movptr(result, str1); // string addr 8989 8990 if (int_cnt2 < 0) { // Only for non constant substring 8991 jmpb(SCAN_TO_SUBSTR); 8992 8993 // SP saved at sp+0 8994 // String saved at sp+1*wordSize 8995 // Substr saved at sp+2*wordSize 8996 // Substr count saved at sp+3*wordSize 8997 8998 // Reload substr for rescan, this code 8999 // is executed only for large substrings (> 8 chars) 9000 bind(RELOAD_SUBSTR); 9001 movptr(str2, Address(rsp, 2*wordSize)); 9002 movl(cnt2, Address(rsp, 3*wordSize)); 9003 movdqu(vec, Address(str2, 0)); 9004 // We came here after the beginning of the substring was 9005 // matched but the rest of it was not so we need to search 9006 // again. Start from the next element after the previous match. 9007 subptr(str1, result); // Restore counter 9008 shrl(str1, 1); 9009 addl(cnt1, str1); 9010 decrementl(cnt1); // Shift to next element 9011 cmpl(cnt1, cnt2); 9012 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 9013 9014 addptr(result, 2); 9015 } // non constant 9016 9017 // Scan string for start of substr in 16-byte vectors 9018 bind(SCAN_TO_SUBSTR); 9019 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 9020 pcmpestri(vec, Address(result, 0), 0x0d); 9021 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 9022 subl(cnt1, 8); 9023 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 9024 cmpl(cnt1, cnt2); 9025 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 9026 addptr(result, 16); 9027 9028 bind(ADJUST_STR); 9029 cmpl(cnt1, 8); // Do not read beyond string 9030 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 9031 // Back-up string to avoid reading beyond string. 9032 lea(result, Address(result, cnt1, Address::times_2, -16)); 9033 movl(cnt1, 8); 9034 jmpb(SCAN_TO_SUBSTR); 9035 9036 // Found a potential substr 9037 bind(FOUND_CANDIDATE); 9038 // After pcmpestri tmp(rcx) contains matched element index 9039 9040 // Make sure string is still long enough 9041 subl(cnt1, tmp); 9042 cmpl(cnt1, cnt2); 9043 jccb(Assembler::greaterEqual, FOUND_SUBSTR); 9044 // Left less then substring. 9045 9046 bind(RET_NOT_FOUND); 9047 movl(result, -1); 9048 jmpb(CLEANUP); 9049 9050 bind(FOUND_SUBSTR); 9051 // Compute start addr of substr 9052 lea(result, Address(result, tmp, Address::times_2)); 9053 9054 if (int_cnt2 > 0) { // Constant substring 9055 // Repeat search for small substring (< 8 chars) 9056 // from new point without reloading substring. 9057 // Have to check that we don't read beyond string. 9058 cmpl(tmp, 8-int_cnt2); 9059 jccb(Assembler::greater, ADJUST_STR); 9060 // Fall through if matched whole substring. 9061 } else { // non constant 9062 assert(int_cnt2 == -1, "should be != 0"); 9063 9064 addl(tmp, cnt2); 9065 // Found result if we matched whole substring. 9066 cmpl(tmp, 8); 9067 jccb(Assembler::lessEqual, RET_FOUND); 9068 9069 // Repeat search for small substring (<= 8 chars) 9070 // from new point 'str1' without reloading substring. 9071 cmpl(cnt2, 8); 9072 // Have to check that we don't read beyond string. 9073 jccb(Assembler::lessEqual, ADJUST_STR); 9074 9075 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; 9076 // Compare the rest of substring (> 8 chars). 9077 movptr(str1, result); 9078 9079 cmpl(tmp, cnt2); 9080 // First 8 chars are already matched. 9081 jccb(Assembler::equal, CHECK_NEXT); 9082 9083 bind(SCAN_SUBSTR); 9084 pcmpestri(vec, Address(str1, 0), 0x0d); 9085 // Need to reload strings pointers if not matched whole vector 9086 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 9087 9088 bind(CHECK_NEXT); 9089 subl(cnt2, 8); 9090 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring 9091 addptr(str1, 16); 9092 addptr(str2, 16); 9093 subl(cnt1, 8); 9094 cmpl(cnt2, 8); // Do not read beyond substring 9095 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); 9096 // Back-up strings to avoid reading beyond substring. 9097 lea(str2, Address(str2, cnt2, Address::times_2, -16)); 9098 lea(str1, Address(str1, cnt2, Address::times_2, -16)); 9099 subl(cnt1, cnt2); 9100 movl(cnt2, 8); 9101 addl(cnt1, 8); 9102 bind(CONT_SCAN_SUBSTR); 9103 movdqu(vec, Address(str2, 0)); 9104 jmpb(SCAN_SUBSTR); 9105 9106 bind(RET_FOUND_LONG); 9107 movptr(str1, Address(rsp, wordSize)); 9108 } // non constant 9109 9110 bind(RET_FOUND); 9111 // Compute substr offset 9112 subptr(result, str1); 9113 shrl(result, 1); // index 9114 9115 bind(CLEANUP); 9116 pop(rsp); // restore SP 9117 9118 } // string_indexof 9119 9120 // Compare strings. 9121 void MacroAssembler::string_compare(Register str1, Register str2, 9122 Register cnt1, Register cnt2, Register result, 9123 XMMRegister vec1) { 9124 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 9125 9126 // Compute the minimum of the string lengths and the 9127 // difference of the string lengths (stack). 9128 // Do the conditional move stuff 9129 movl(result, cnt1); 9130 subl(cnt1, cnt2); 9131 push(cnt1); 9132 cmov32(Assembler::lessEqual, cnt2, result); 9133 9134 // Is the minimum length zero? 9135 testl(cnt2, cnt2); 9136 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 9137 9138 // Load first characters 9139 load_unsigned_short(result, Address(str1, 0)); 9140 load_unsigned_short(cnt1, Address(str2, 0)); 9141 9142 // Compare first characters 9143 subl(result, cnt1); 9144 jcc(Assembler::notZero, POP_LABEL); 9145 decrementl(cnt2); 9146 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 9147 9148 { 9149 // Check after comparing first character to see if strings are equivalent 9150 Label LSkip2; 9151 // Check if the strings start at same location 9152 cmpptr(str1, str2); 9153 jccb(Assembler::notEqual, LSkip2); 9154 9155 // Check if the length difference is zero (from stack) 9156 cmpl(Address(rsp, 0), 0x0); 9157 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 9158 9159 // Strings might not be equivalent 9160 bind(LSkip2); 9161 } 9162 9163 Address::ScaleFactor scale = Address::times_2; 9164 int stride = 8; 9165 9166 // Advance to next element 9167 addptr(str1, 16/stride); 9168 addptr(str2, 16/stride); 9169 9170 if (UseSSE42Intrinsics) { 9171 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 9172 int pcmpmask = 0x19; 9173 // Setup to compare 16-byte vectors 9174 movl(result, cnt2); 9175 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 9176 jccb(Assembler::zero, COMPARE_TAIL); 9177 9178 lea(str1, Address(str1, result, scale)); 9179 lea(str2, Address(str2, result, scale)); 9180 negptr(result); 9181 9182 // pcmpestri 9183 // inputs: 9184 // vec1- substring 9185 // rax - negative string length (elements count) 9186 // mem - scaned string 9187 // rdx - string length (elements count) 9188 // pcmpmask - cmp mode: 11000 (string compare with negated result) 9189 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 9190 // outputs: 9191 // rcx - first mismatched element index 9192 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 9193 9194 bind(COMPARE_WIDE_VECTORS); 9195 movdqu(vec1, Address(str1, result, scale)); 9196 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 9197 // After pcmpestri cnt1(rcx) contains mismatched element index 9198 9199 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 9200 addptr(result, stride); 9201 subptr(cnt2, stride); 9202 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); 9203 9204 // compare wide vectors tail 9205 testl(result, result); 9206 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 9207 9208 movl(cnt2, stride); 9209 movl(result, stride); 9210 negptr(result); 9211 movdqu(vec1, Address(str1, result, scale)); 9212 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 9213 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); 9214 9215 // Mismatched characters in the vectors 9216 bind(VECTOR_NOT_EQUAL); 9217 addptr(result, cnt1); 9218 movptr(cnt2, result); 9219 load_unsigned_short(result, Address(str1, cnt2, scale)); 9220 load_unsigned_short(cnt1, Address(str2, cnt2, scale)); 9221 subl(result, cnt1); 9222 jmpb(POP_LABEL); 9223 9224 bind(COMPARE_TAIL); // limit is zero 9225 movl(cnt2, result); 9226 // Fallthru to tail compare 9227 } 9228 9229 // Shift str2 and str1 to the end of the arrays, negate min 9230 lea(str1, Address(str1, cnt2, scale, 0)); 9231 lea(str2, Address(str2, cnt2, scale, 0)); 9232 negptr(cnt2); 9233 9234 // Compare the rest of the elements 9235 bind(WHILE_HEAD_LABEL); 9236 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); 9237 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); 9238 subl(result, cnt1); 9239 jccb(Assembler::notZero, POP_LABEL); 9240 increment(cnt2); 9241 jccb(Assembler::notZero, WHILE_HEAD_LABEL); 9242 9243 // Strings are equal up to min length. Return the length difference. 9244 bind(LENGTH_DIFF_LABEL); 9245 pop(result); 9246 jmpb(DONE_LABEL); 9247 9248 // Discard the stored length difference 9249 bind(POP_LABEL); 9250 pop(cnt1); 9251 9252 // That's it 9253 bind(DONE_LABEL); 9254 } 9255 9256 // Compare char[] arrays aligned to 4 bytes or substrings. 9257 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 9258 Register limit, Register result, Register chr, 9259 XMMRegister vec1, XMMRegister vec2) { 9260 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 9261 9262 int length_offset = arrayOopDesc::length_offset_in_bytes(); 9263 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 9264 9265 // Check the input args 9266 cmpptr(ary1, ary2); 9267 jcc(Assembler::equal, TRUE_LABEL); 9268 9269 if (is_array_equ) { 9270 // Need additional checks for arrays_equals. 9271 testptr(ary1, ary1); 9272 jcc(Assembler::zero, FALSE_LABEL); 9273 testptr(ary2, ary2); 9274 jcc(Assembler::zero, FALSE_LABEL); 9275 9276 // Check the lengths 9277 movl(limit, Address(ary1, length_offset)); 9278 cmpl(limit, Address(ary2, length_offset)); 9279 jcc(Assembler::notEqual, FALSE_LABEL); 9280 } 9281 9282 // count == 0 9283 testl(limit, limit); 9284 jcc(Assembler::zero, TRUE_LABEL); 9285 9286 if (is_array_equ) { 9287 // Load array address 9288 lea(ary1, Address(ary1, base_offset)); 9289 lea(ary2, Address(ary2, base_offset)); 9290 } 9291 9292 shll(limit, 1); // byte count != 0 9293 movl(result, limit); // copy 9294 9295 if (UseSSE42Intrinsics) { 9296 // With SSE4.2, use double quad vector compare 9297 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 9298 9299 // Compare 16-byte vectors 9300 andl(result, 0x0000000e); // tail count (in bytes) 9301 andl(limit, 0xfffffff0); // vector count (in bytes) 9302 jccb(Assembler::zero, COMPARE_TAIL); 9303 9304 lea(ary1, Address(ary1, limit, Address::times_1)); 9305 lea(ary2, Address(ary2, limit, Address::times_1)); 9306 negptr(limit); 9307 9308 bind(COMPARE_WIDE_VECTORS); 9309 movdqu(vec1, Address(ary1, limit, Address::times_1)); 9310 movdqu(vec2, Address(ary2, limit, Address::times_1)); 9311 pxor(vec1, vec2); 9312 9313 ptest(vec1, vec1); 9314 jccb(Assembler::notZero, FALSE_LABEL); 9315 addptr(limit, 16); 9316 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 9317 9318 testl(result, result); 9319 jccb(Assembler::zero, TRUE_LABEL); 9320 9321 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); 9322 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); 9323 pxor(vec1, vec2); 9324 9325 ptest(vec1, vec1); 9326 jccb(Assembler::notZero, FALSE_LABEL); 9327 jmpb(TRUE_LABEL); 9328 9329 bind(COMPARE_TAIL); // limit is zero 9330 movl(limit, result); 9331 // Fallthru to tail compare 9332 } 9333 9334 // Compare 4-byte vectors 9335 andl(limit, 0xfffffffc); // vector count (in bytes) 9336 jccb(Assembler::zero, COMPARE_CHAR); 9337 9338 lea(ary1, Address(ary1, limit, Address::times_1)); 9339 lea(ary2, Address(ary2, limit, Address::times_1)); 9340 negptr(limit); 9341 9342 bind(COMPARE_VECTORS); 9343 movl(chr, Address(ary1, limit, Address::times_1)); 9344 cmpl(chr, Address(ary2, limit, Address::times_1)); 9345 jccb(Assembler::notEqual, FALSE_LABEL); 9346 addptr(limit, 4); 9347 jcc(Assembler::notZero, COMPARE_VECTORS); 9348 9349 // Compare trailing char (final 2 bytes), if any 9350 bind(COMPARE_CHAR); 9351 testl(result, 0x2); // tail char 9352 jccb(Assembler::zero, TRUE_LABEL); 9353 load_unsigned_short(chr, Address(ary1, 0)); 9354 load_unsigned_short(limit, Address(ary2, 0)); 9355 cmpl(chr, limit); 9356 jccb(Assembler::notEqual, FALSE_LABEL); 9357 9358 bind(TRUE_LABEL); 9359 movl(result, 1); // return true 9360 jmpb(DONE); 9361 9362 bind(FALSE_LABEL); 9363 xorl(result, result); // return false 9364 9365 // That's it 9366 bind(DONE); 9367 } 9368 9369 #ifdef PRODUCT 9370 #define BLOCK_COMMENT(str) /* nothing */ 9371 #else 9372 #define BLOCK_COMMENT(str) block_comment(str) 9373 #endif 9374 9375 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 9376 void MacroAssembler::generate_fill(BasicType t, bool aligned, 9377 Register to, Register value, Register count, 9378 Register rtmp, XMMRegister xtmp) { 9379 assert_different_registers(to, value, count, rtmp); 9380 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 9381 Label L_fill_2_bytes, L_fill_4_bytes; 9382 9383 int shift = -1; 9384 switch (t) { 9385 case T_BYTE: 9386 shift = 2; 9387 break; 9388 case T_SHORT: 9389 shift = 1; 9390 break; 9391 case T_INT: 9392 shift = 0; 9393 break; 9394 default: ShouldNotReachHere(); 9395 } 9396 9397 if (t == T_BYTE) { 9398 andl(value, 0xff); 9399 movl(rtmp, value); 9400 shll(rtmp, 8); 9401 orl(value, rtmp); 9402 } 9403 if (t == T_SHORT) { 9404 andl(value, 0xffff); 9405 } 9406 if (t == T_BYTE || t == T_SHORT) { 9407 movl(rtmp, value); 9408 shll(rtmp, 16); 9409 orl(value, rtmp); 9410 } 9411 9412 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 9413 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 9414 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 9415 // align source address at 4 bytes address boundary 9416 if (t == T_BYTE) { 9417 // One byte misalignment happens only for byte arrays 9418 testptr(to, 1); 9419 jccb(Assembler::zero, L_skip_align1); 9420 movb(Address(to, 0), value); 9421 increment(to); 9422 decrement(count); 9423 BIND(L_skip_align1); 9424 } 9425 // Two bytes misalignment happens only for byte and short (char) arrays 9426 testptr(to, 2); 9427 jccb(Assembler::zero, L_skip_align2); 9428 movw(Address(to, 0), value); 9429 addptr(to, 2); 9430 subl(count, 1<<(shift-1)); 9431 BIND(L_skip_align2); 9432 } 9433 if (UseSSE < 2) { 9434 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 9435 // Fill 32-byte chunks 9436 subl(count, 8 << shift); 9437 jcc(Assembler::less, L_check_fill_8_bytes); 9438 align(16); 9439 9440 BIND(L_fill_32_bytes_loop); 9441 9442 for (int i = 0; i < 32; i += 4) { 9443 movl(Address(to, i), value); 9444 } 9445 9446 addptr(to, 32); 9447 subl(count, 8 << shift); 9448 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 9449 BIND(L_check_fill_8_bytes); 9450 addl(count, 8 << shift); 9451 jccb(Assembler::zero, L_exit); 9452 jmpb(L_fill_8_bytes); 9453 9454 // 9455 // length is too short, just fill qwords 9456 // 9457 BIND(L_fill_8_bytes_loop); 9458 movl(Address(to, 0), value); 9459 movl(Address(to, 4), value); 9460 addptr(to, 8); 9461 BIND(L_fill_8_bytes); 9462 subl(count, 1 << (shift + 1)); 9463 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 9464 // fall through to fill 4 bytes 9465 } else { 9466 Label L_fill_32_bytes; 9467 if (!UseUnalignedLoadStores) { 9468 // align to 8 bytes, we know we are 4 byte aligned to start 9469 testptr(to, 4); 9470 jccb(Assembler::zero, L_fill_32_bytes); 9471 movl(Address(to, 0), value); 9472 addptr(to, 4); 9473 subl(count, 1<<shift); 9474 } 9475 BIND(L_fill_32_bytes); 9476 { 9477 assert( UseSSE >= 2, "supported cpu only" ); 9478 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 9479 // Fill 32-byte chunks 9480 movdl(xtmp, value); 9481 pshufd(xtmp, xtmp, 0); 9482 9483 subl(count, 8 << shift); 9484 jcc(Assembler::less, L_check_fill_8_bytes); 9485 align(16); 9486 9487 BIND(L_fill_32_bytes_loop); 9488 9489 if (UseUnalignedLoadStores) { 9490 movdqu(Address(to, 0), xtmp); 9491 movdqu(Address(to, 16), xtmp); 9492 } else { 9493 movq(Address(to, 0), xtmp); 9494 movq(Address(to, 8), xtmp); 9495 movq(Address(to, 16), xtmp); 9496 movq(Address(to, 24), xtmp); 9497 } 9498 9499 addptr(to, 32); 9500 subl(count, 8 << shift); 9501 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 9502 BIND(L_check_fill_8_bytes); 9503 addl(count, 8 << shift); 9504 jccb(Assembler::zero, L_exit); 9505 jmpb(L_fill_8_bytes); 9506 9507 // 9508 // length is too short, just fill qwords 9509 // 9510 BIND(L_fill_8_bytes_loop); 9511 movq(Address(to, 0), xtmp); 9512 addptr(to, 8); 9513 BIND(L_fill_8_bytes); 9514 subl(count, 1 << (shift + 1)); 9515 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 9516 } 9517 } 9518 // fill trailing 4 bytes 9519 BIND(L_fill_4_bytes); 9520 testl(count, 1<<shift); 9521 jccb(Assembler::zero, L_fill_2_bytes); 9522 movl(Address(to, 0), value); 9523 if (t == T_BYTE || t == T_SHORT) { 9524 addptr(to, 4); 9525 BIND(L_fill_2_bytes); 9526 // fill trailing 2 bytes 9527 testl(count, 1<<(shift-1)); 9528 jccb(Assembler::zero, L_fill_byte); 9529 movw(Address(to, 0), value); 9530 if (t == T_BYTE) { 9531 addptr(to, 2); 9532 BIND(L_fill_byte); 9533 // fill trailing byte 9534 testl(count, 1); 9535 jccb(Assembler::zero, L_exit); 9536 movb(Address(to, 0), value); 9537 } else { 9538 BIND(L_fill_byte); 9539 } 9540 } else { 9541 BIND(L_fill_2_bytes); 9542 } 9543 BIND(L_exit); 9544 } 9545 #undef BIND 9546 #undef BLOCK_COMMENT 9547 9548 9549 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 9550 switch (cond) { 9551 // Note some conditions are synonyms for others 9552 case Assembler::zero: return Assembler::notZero; 9553 case Assembler::notZero: return Assembler::zero; 9554 case Assembler::less: return Assembler::greaterEqual; 9555 case Assembler::lessEqual: return Assembler::greater; 9556 case Assembler::greater: return Assembler::lessEqual; 9557 case Assembler::greaterEqual: return Assembler::less; 9558 case Assembler::below: return Assembler::aboveEqual; 9559 case Assembler::belowEqual: return Assembler::above; 9560 case Assembler::above: return Assembler::belowEqual; 9561 case Assembler::aboveEqual: return Assembler::below; 9562 case Assembler::overflow: return Assembler::noOverflow; 9563 case Assembler::noOverflow: return Assembler::overflow; 9564 case Assembler::negative: return Assembler::positive; 9565 case Assembler::positive: return Assembler::negative; 9566 case Assembler::parity: return Assembler::noParity; 9567 case Assembler::noParity: return Assembler::parity; 9568 } 9569 ShouldNotReachHere(); return Assembler::overflow; 9570 } 9571 9572 SkipIfEqual::SkipIfEqual( 9573 MacroAssembler* masm, const bool* flag_addr, bool value) { 9574 _masm = masm; 9575 _masm->cmp8(ExternalAddress((address)flag_addr), value); 9576 _masm->jcc(Assembler::equal, _label); 9577 } 9578 9579 SkipIfEqual::~SkipIfEqual() { 9580 _masm->bind(_label); 9581 }