1 /* 2 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "incls/_precompiled.incl" 26 #include "incls/_assembler_x86.cpp.incl" 27 28 // Implementation of AddressLiteral 29 30 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { 31 _is_lval = false; 32 _target = target; 33 switch (rtype) { 34 case relocInfo::oop_type: 35 // Oops are a special case. Normally they would be their own section 36 // but in cases like icBuffer they are literals in the code stream that 37 // we don't have a section for. We use none so that we get a literal address 38 // which is always patchable. 39 break; 40 case relocInfo::external_word_type: 41 _rspec = external_word_Relocation::spec(target); 42 break; 43 case relocInfo::internal_word_type: 44 _rspec = internal_word_Relocation::spec(target); 45 break; 46 case relocInfo::opt_virtual_call_type: 47 _rspec = opt_virtual_call_Relocation::spec(); 48 break; 49 case relocInfo::static_call_type: 50 _rspec = static_call_Relocation::spec(); 51 break; 52 case relocInfo::runtime_call_type: 53 _rspec = runtime_call_Relocation::spec(); 54 break; 55 case relocInfo::poll_type: 56 case relocInfo::poll_return_type: 57 _rspec = Relocation::spec_simple(rtype); 58 break; 59 case relocInfo::none: 60 break; 61 default: 62 ShouldNotReachHere(); 63 break; 64 } 65 } 66 67 // Implementation of Address 68 69 #ifdef _LP64 70 71 Address Address::make_array(ArrayAddress adr) { 72 // Not implementable on 64bit machines 73 // Should have been handled higher up the call chain. 74 ShouldNotReachHere(); 75 return Address(); 76 } 77 78 // exceedingly dangerous constructor 79 Address::Address(int disp, address loc, relocInfo::relocType rtype) { 80 _base = noreg; 81 _index = noreg; 82 _scale = no_scale; 83 _disp = disp; 84 switch (rtype) { 85 case relocInfo::external_word_type: 86 _rspec = external_word_Relocation::spec(loc); 87 break; 88 case relocInfo::internal_word_type: 89 _rspec = internal_word_Relocation::spec(loc); 90 break; 91 case relocInfo::runtime_call_type: 92 // HMM 93 _rspec = runtime_call_Relocation::spec(); 94 break; 95 case relocInfo::poll_type: 96 case relocInfo::poll_return_type: 97 _rspec = Relocation::spec_simple(rtype); 98 break; 99 case relocInfo::none: 100 break; 101 default: 102 ShouldNotReachHere(); 103 } 104 } 105 #else // LP64 106 107 Address Address::make_array(ArrayAddress adr) { 108 AddressLiteral base = adr.base(); 109 Address index = adr.index(); 110 assert(index._disp == 0, "must not have disp"); // maybe it can? 111 Address array(index._base, index._index, index._scale, (intptr_t) base.target()); 112 array._rspec = base._rspec; 113 return array; 114 } 115 116 // exceedingly dangerous constructor 117 Address::Address(address loc, RelocationHolder spec) { 118 _base = noreg; 119 _index = noreg; 120 _scale = no_scale; 121 _disp = (intptr_t) loc; 122 _rspec = spec; 123 } 124 125 #endif // _LP64 126 127 128 129 // Convert the raw encoding form into the form expected by the constructor for 130 // Address. An index of 4 (rsp) corresponds to having no index, so convert 131 // that to noreg for the Address constructor. 132 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) { 133 RelocationHolder rspec; 134 if (disp_is_oop) { 135 rspec = Relocation::spec_simple(relocInfo::oop_type); 136 } 137 bool valid_index = index != rsp->encoding(); 138 if (valid_index) { 139 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); 140 madr._rspec = rspec; 141 return madr; 142 } else { 143 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); 144 madr._rspec = rspec; 145 return madr; 146 } 147 } 148 149 // Implementation of Assembler 150 151 int AbstractAssembler::code_fill_byte() { 152 return (u_char)'\xF4'; // hlt 153 } 154 155 // make this go away someday 156 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { 157 if (rtype == relocInfo::none) 158 emit_long(data); 159 else emit_data(data, Relocation::spec_simple(rtype), format); 160 } 161 162 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { 163 assert(imm_operand == 0, "default format must be immediate in this file"); 164 assert(inst_mark() != NULL, "must be inside InstructionMark"); 165 if (rspec.type() != relocInfo::none) { 166 #ifdef ASSERT 167 check_relocation(rspec, format); 168 #endif 169 // Do not use AbstractAssembler::relocate, which is not intended for 170 // embedded words. Instead, relocate to the enclosing instruction. 171 172 // hack. call32 is too wide for mask so use disp32 173 if (format == call32_operand) 174 code_section()->relocate(inst_mark(), rspec, disp32_operand); 175 else 176 code_section()->relocate(inst_mark(), rspec, format); 177 } 178 emit_long(data); 179 } 180 181 static int encode(Register r) { 182 int enc = r->encoding(); 183 if (enc >= 8) { 184 enc -= 8; 185 } 186 return enc; 187 } 188 189 static int encode(XMMRegister r) { 190 int enc = r->encoding(); 191 if (enc >= 8) { 192 enc -= 8; 193 } 194 return enc; 195 } 196 197 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { 198 assert(dst->has_byte_register(), "must have byte register"); 199 assert(isByte(op1) && isByte(op2), "wrong opcode"); 200 assert(isByte(imm8), "not a byte"); 201 assert((op1 & 0x01) == 0, "should be 8bit operation"); 202 emit_byte(op1); 203 emit_byte(op2 | encode(dst)); 204 emit_byte(imm8); 205 } 206 207 208 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 209 assert(isByte(op1) && isByte(op2), "wrong opcode"); 210 assert((op1 & 0x01) == 1, "should be 32bit operation"); 211 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 212 if (is8bit(imm32)) { 213 emit_byte(op1 | 0x02); // set sign bit 214 emit_byte(op2 | encode(dst)); 215 emit_byte(imm32 & 0xFF); 216 } else { 217 emit_byte(op1); 218 emit_byte(op2 | encode(dst)); 219 emit_long(imm32); 220 } 221 } 222 223 // immediate-to-memory forms 224 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 225 assert((op1 & 0x01) == 1, "should be 32bit operation"); 226 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 227 if (is8bit(imm32)) { 228 emit_byte(op1 | 0x02); // set sign bit 229 emit_operand(rm, adr, 1); 230 emit_byte(imm32 & 0xFF); 231 } else { 232 emit_byte(op1); 233 emit_operand(rm, adr, 4); 234 emit_long(imm32); 235 } 236 } 237 238 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) { 239 LP64_ONLY(ShouldNotReachHere()); 240 assert(isByte(op1) && isByte(op2), "wrong opcode"); 241 assert((op1 & 0x01) == 1, "should be 32bit operation"); 242 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 243 InstructionMark im(this); 244 emit_byte(op1); 245 emit_byte(op2 | encode(dst)); 246 emit_data((intptr_t)obj, relocInfo::oop_type, 0); 247 } 248 249 250 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { 251 assert(isByte(op1) && isByte(op2), "wrong opcode"); 252 emit_byte(op1); 253 emit_byte(op2 | encode(dst) << 3 | encode(src)); 254 } 255 256 257 void Assembler::emit_operand(Register reg, Register base, Register index, 258 Address::ScaleFactor scale, int disp, 259 RelocationHolder const& rspec, 260 int rip_relative_correction) { 261 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type(); 262 263 // Encode the registers as needed in the fields they are used in 264 265 int regenc = encode(reg) << 3; 266 int indexenc = index->is_valid() ? encode(index) << 3 : 0; 267 int baseenc = base->is_valid() ? encode(base) : 0; 268 269 if (base->is_valid()) { 270 if (index->is_valid()) { 271 assert(scale != Address::no_scale, "inconsistent address"); 272 // [base + index*scale + disp] 273 if (disp == 0 && rtype == relocInfo::none && 274 base != rbp LP64_ONLY(&& base != r13)) { 275 // [base + index*scale] 276 // [00 reg 100][ss index base] 277 assert(index != rsp, "illegal addressing mode"); 278 emit_byte(0x04 | regenc); 279 emit_byte(scale << 6 | indexenc | baseenc); 280 } else if (is8bit(disp) && rtype == relocInfo::none) { 281 // [base + index*scale + imm8] 282 // [01 reg 100][ss index base] imm8 283 assert(index != rsp, "illegal addressing mode"); 284 emit_byte(0x44 | regenc); 285 emit_byte(scale << 6 | indexenc | baseenc); 286 emit_byte(disp & 0xFF); 287 } else { 288 // [base + index*scale + disp32] 289 // [10 reg 100][ss index base] disp32 290 assert(index != rsp, "illegal addressing mode"); 291 emit_byte(0x84 | regenc); 292 emit_byte(scale << 6 | indexenc | baseenc); 293 emit_data(disp, rspec, disp32_operand); 294 } 295 } else if (base == rsp LP64_ONLY(|| base == r12)) { 296 // [rsp + disp] 297 if (disp == 0 && rtype == relocInfo::none) { 298 // [rsp] 299 // [00 reg 100][00 100 100] 300 emit_byte(0x04 | regenc); 301 emit_byte(0x24); 302 } else if (is8bit(disp) && rtype == relocInfo::none) { 303 // [rsp + imm8] 304 // [01 reg 100][00 100 100] disp8 305 emit_byte(0x44 | regenc); 306 emit_byte(0x24); 307 emit_byte(disp & 0xFF); 308 } else { 309 // [rsp + imm32] 310 // [10 reg 100][00 100 100] disp32 311 emit_byte(0x84 | regenc); 312 emit_byte(0x24); 313 emit_data(disp, rspec, disp32_operand); 314 } 315 } else { 316 // [base + disp] 317 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode"); 318 if (disp == 0 && rtype == relocInfo::none && 319 base != rbp LP64_ONLY(&& base != r13)) { 320 // [base] 321 // [00 reg base] 322 emit_byte(0x00 | regenc | baseenc); 323 } else if (is8bit(disp) && rtype == relocInfo::none) { 324 // [base + disp8] 325 // [01 reg base] disp8 326 emit_byte(0x40 | regenc | baseenc); 327 emit_byte(disp & 0xFF); 328 } else { 329 // [base + disp32] 330 // [10 reg base] disp32 331 emit_byte(0x80 | regenc | baseenc); 332 emit_data(disp, rspec, disp32_operand); 333 } 334 } 335 } else { 336 if (index->is_valid()) { 337 assert(scale != Address::no_scale, "inconsistent address"); 338 // [index*scale + disp] 339 // [00 reg 100][ss index 101] disp32 340 assert(index != rsp, "illegal addressing mode"); 341 emit_byte(0x04 | regenc); 342 emit_byte(scale << 6 | indexenc | 0x05); 343 emit_data(disp, rspec, disp32_operand); 344 } else if (rtype != relocInfo::none ) { 345 // [disp] (64bit) RIP-RELATIVE (32bit) abs 346 // [00 000 101] disp32 347 348 emit_byte(0x05 | regenc); 349 // Note that the RIP-rel. correction applies to the generated 350 // disp field, but _not_ to the target address in the rspec. 351 352 // disp was created by converting the target address minus the pc 353 // at the start of the instruction. That needs more correction here. 354 // intptr_t disp = target - next_ip; 355 assert(inst_mark() != NULL, "must be inside InstructionMark"); 356 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction; 357 int64_t adjusted = disp; 358 // Do rip-rel adjustment for 64bit 359 LP64_ONLY(adjusted -= (next_ip - inst_mark())); 360 assert(is_simm32(adjusted), 361 "must be 32bit offset (RIP relative address)"); 362 emit_data((int32_t) adjusted, rspec, disp32_operand); 363 364 } else { 365 // 32bit never did this, did everything as the rip-rel/disp code above 366 // [disp] ABSOLUTE 367 // [00 reg 100][00 100 101] disp32 368 emit_byte(0x04 | regenc); 369 emit_byte(0x25); 370 emit_data(disp, rspec, disp32_operand); 371 } 372 } 373 } 374 375 void Assembler::emit_operand(XMMRegister reg, Register base, Register index, 376 Address::ScaleFactor scale, int disp, 377 RelocationHolder const& rspec) { 378 emit_operand((Register)reg, base, index, scale, disp, rspec); 379 } 380 381 // Secret local extension to Assembler::WhichOperand: 382 #define end_pc_operand (_WhichOperand_limit) 383 384 address Assembler::locate_operand(address inst, WhichOperand which) { 385 // Decode the given instruction, and return the address of 386 // an embedded 32-bit operand word. 387 388 // If "which" is disp32_operand, selects the displacement portion 389 // of an effective address specifier. 390 // If "which" is imm64_operand, selects the trailing immediate constant. 391 // If "which" is call32_operand, selects the displacement of a call or jump. 392 // Caller is responsible for ensuring that there is such an operand, 393 // and that it is 32/64 bits wide. 394 395 // If "which" is end_pc_operand, find the end of the instruction. 396 397 address ip = inst; 398 bool is_64bit = false; 399 400 debug_only(bool has_disp32 = false); 401 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn 402 403 again_after_prefix: 404 switch (0xFF & *ip++) { 405 406 // These convenience macros generate groups of "case" labels for the switch. 407 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3 408 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \ 409 case (x)+4: case (x)+5: case (x)+6: case (x)+7 410 #define REP16(x) REP8((x)+0): \ 411 case REP8((x)+8) 412 413 case CS_segment: 414 case SS_segment: 415 case DS_segment: 416 case ES_segment: 417 case FS_segment: 418 case GS_segment: 419 // Seems dubious 420 LP64_ONLY(assert(false, "shouldn't have that prefix")); 421 assert(ip == inst+1, "only one prefix allowed"); 422 goto again_after_prefix; 423 424 case 0x67: 425 case REX: 426 case REX_B: 427 case REX_X: 428 case REX_XB: 429 case REX_R: 430 case REX_RB: 431 case REX_RX: 432 case REX_RXB: 433 NOT_LP64(assert(false, "64bit prefixes")); 434 goto again_after_prefix; 435 436 case REX_W: 437 case REX_WB: 438 case REX_WX: 439 case REX_WXB: 440 case REX_WR: 441 case REX_WRB: 442 case REX_WRX: 443 case REX_WRXB: 444 NOT_LP64(assert(false, "64bit prefixes")); 445 is_64bit = true; 446 goto again_after_prefix; 447 448 case 0xFF: // pushq a; decl a; incl a; call a; jmp a 449 case 0x88: // movb a, r 450 case 0x89: // movl a, r 451 case 0x8A: // movb r, a 452 case 0x8B: // movl r, a 453 case 0x8F: // popl a 454 debug_only(has_disp32 = true); 455 break; 456 457 case 0x68: // pushq #32 458 if (which == end_pc_operand) { 459 return ip + 4; 460 } 461 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate"); 462 return ip; // not produced by emit_operand 463 464 case 0x66: // movw ... (size prefix) 465 again_after_size_prefix2: 466 switch (0xFF & *ip++) { 467 case REX: 468 case REX_B: 469 case REX_X: 470 case REX_XB: 471 case REX_R: 472 case REX_RB: 473 case REX_RX: 474 case REX_RXB: 475 case REX_W: 476 case REX_WB: 477 case REX_WX: 478 case REX_WXB: 479 case REX_WR: 480 case REX_WRB: 481 case REX_WRX: 482 case REX_WRXB: 483 NOT_LP64(assert(false, "64bit prefix found")); 484 goto again_after_size_prefix2; 485 case 0x8B: // movw r, a 486 case 0x89: // movw a, r 487 debug_only(has_disp32 = true); 488 break; 489 case 0xC7: // movw a, #16 490 debug_only(has_disp32 = true); 491 tail_size = 2; // the imm16 492 break; 493 case 0x0F: // several SSE/SSE2 variants 494 ip--; // reparse the 0x0F 495 goto again_after_prefix; 496 default: 497 ShouldNotReachHere(); 498 } 499 break; 500 501 case REP8(0xB8): // movl/q r, #32/#64(oop?) 502 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4); 503 // these asserts are somewhat nonsensical 504 #ifndef _LP64 505 assert(which == imm_operand || which == disp32_operand, ""); 506 #else 507 assert((which == call32_operand || which == imm_operand) && is_64bit || 508 which == narrow_oop_operand && !is_64bit, ""); 509 #endif // _LP64 510 return ip; 511 512 case 0x69: // imul r, a, #32 513 case 0xC7: // movl a, #32(oop?) 514 tail_size = 4; 515 debug_only(has_disp32 = true); // has both kinds of operands! 516 break; 517 518 case 0x0F: // movx..., etc. 519 switch (0xFF & *ip++) { 520 case 0x12: // movlps 521 case 0x28: // movaps 522 case 0x2E: // ucomiss 523 case 0x2F: // comiss 524 case 0x54: // andps 525 case 0x55: // andnps 526 case 0x56: // orps 527 case 0x57: // xorps 528 case 0x6E: // movd 529 case 0x7E: // movd 530 case 0xAE: // ldmxcsr a 531 // 64bit side says it these have both operands but that doesn't 532 // appear to be true 533 debug_only(has_disp32 = true); 534 break; 535 536 case 0xAD: // shrd r, a, %cl 537 case 0xAF: // imul r, a 538 case 0xBE: // movsbl r, a (movsxb) 539 case 0xBF: // movswl r, a (movsxw) 540 case 0xB6: // movzbl r, a (movzxb) 541 case 0xB7: // movzwl r, a (movzxw) 542 case REP16(0x40): // cmovl cc, r, a 543 case 0xB0: // cmpxchgb 544 case 0xB1: // cmpxchg 545 case 0xC1: // xaddl 546 case 0xC7: // cmpxchg8 547 case REP16(0x90): // setcc a 548 debug_only(has_disp32 = true); 549 // fall out of the switch to decode the address 550 break; 551 552 case 0xAC: // shrd r, a, #8 553 debug_only(has_disp32 = true); 554 tail_size = 1; // the imm8 555 break; 556 557 case REP16(0x80): // jcc rdisp32 558 if (which == end_pc_operand) return ip + 4; 559 assert(which == call32_operand, "jcc has no disp32 or imm"); 560 return ip; 561 default: 562 ShouldNotReachHere(); 563 } 564 break; 565 566 case 0x81: // addl a, #32; addl r, #32 567 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 568 // on 32bit in the case of cmpl, the imm might be an oop 569 tail_size = 4; 570 debug_only(has_disp32 = true); // has both kinds of operands! 571 break; 572 573 case 0x83: // addl a, #8; addl r, #8 574 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 575 debug_only(has_disp32 = true); // has both kinds of operands! 576 tail_size = 1; 577 break; 578 579 case 0x9B: 580 switch (0xFF & *ip++) { 581 case 0xD9: // fnstcw a 582 debug_only(has_disp32 = true); 583 break; 584 default: 585 ShouldNotReachHere(); 586 } 587 break; 588 589 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a 590 case REP4(0x10): // adc... 591 case REP4(0x20): // and... 592 case REP4(0x30): // xor... 593 case REP4(0x08): // or... 594 case REP4(0x18): // sbb... 595 case REP4(0x28): // sub... 596 case 0xF7: // mull a 597 case 0x8D: // lea r, a 598 case 0x87: // xchg r, a 599 case REP4(0x38): // cmp... 600 case 0x85: // test r, a 601 debug_only(has_disp32 = true); // has both kinds of operands! 602 break; 603 604 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8 605 case 0xC6: // movb a, #8 606 case 0x80: // cmpb a, #8 607 case 0x6B: // imul r, a, #8 608 debug_only(has_disp32 = true); // has both kinds of operands! 609 tail_size = 1; // the imm8 610 break; 611 612 case 0xE8: // call rdisp32 613 case 0xE9: // jmp rdisp32 614 if (which == end_pc_operand) return ip + 4; 615 assert(which == call32_operand, "call has no disp32 or imm"); 616 return ip; 617 618 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 619 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 620 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a 621 case 0xDD: // fld_d a; fst_d a; fstp_d a 622 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a 623 case 0xDF: // fild_d a; fistp_d a 624 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a 625 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a 626 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a 627 debug_only(has_disp32 = true); 628 break; 629 630 case 0xF0: // Lock 631 assert(os::is_MP(), "only on MP"); 632 goto again_after_prefix; 633 634 case 0xF3: // For SSE 635 case 0xF2: // For SSE2 636 switch (0xFF & *ip++) { 637 case REX: 638 case REX_B: 639 case REX_X: 640 case REX_XB: 641 case REX_R: 642 case REX_RB: 643 case REX_RX: 644 case REX_RXB: 645 case REX_W: 646 case REX_WB: 647 case REX_WX: 648 case REX_WXB: 649 case REX_WR: 650 case REX_WRB: 651 case REX_WRX: 652 case REX_WRXB: 653 NOT_LP64(assert(false, "found 64bit prefix")); 654 ip++; 655 default: 656 ip++; 657 } 658 debug_only(has_disp32 = true); // has both kinds of operands! 659 break; 660 661 default: 662 ShouldNotReachHere(); 663 664 #undef REP8 665 #undef REP16 666 } 667 668 assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); 669 #ifdef _LP64 670 assert(which != imm_operand, "instruction is not a movq reg, imm64"); 671 #else 672 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field"); 673 assert(which != imm_operand || has_disp32, "instruction has no imm32 field"); 674 #endif // LP64 675 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field"); 676 677 // parse the output of emit_operand 678 int op2 = 0xFF & *ip++; 679 int base = op2 & 0x07; 680 int op3 = -1; 681 const int b100 = 4; 682 const int b101 = 5; 683 if (base == b100 && (op2 >> 6) != 3) { 684 op3 = 0xFF & *ip++; 685 base = op3 & 0x07; // refetch the base 686 } 687 // now ip points at the disp (if any) 688 689 switch (op2 >> 6) { 690 case 0: 691 // [00 reg 100][ss index base] 692 // [00 reg 100][00 100 esp] 693 // [00 reg base] 694 // [00 reg 100][ss index 101][disp32] 695 // [00 reg 101] [disp32] 696 697 if (base == b101) { 698 if (which == disp32_operand) 699 return ip; // caller wants the disp32 700 ip += 4; // skip the disp32 701 } 702 break; 703 704 case 1: 705 // [01 reg 100][ss index base][disp8] 706 // [01 reg 100][00 100 esp][disp8] 707 // [01 reg base] [disp8] 708 ip += 1; // skip the disp8 709 break; 710 711 case 2: 712 // [10 reg 100][ss index base][disp32] 713 // [10 reg 100][00 100 esp][disp32] 714 // [10 reg base] [disp32] 715 if (which == disp32_operand) 716 return ip; // caller wants the disp32 717 ip += 4; // skip the disp32 718 break; 719 720 case 3: 721 // [11 reg base] (not a memory addressing mode) 722 break; 723 } 724 725 if (which == end_pc_operand) { 726 return ip + tail_size; 727 } 728 729 #ifdef _LP64 730 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32"); 731 #else 732 assert(which == imm_operand, "instruction has only an imm field"); 733 #endif // LP64 734 return ip; 735 } 736 737 address Assembler::locate_next_instruction(address inst) { 738 // Secretly share code with locate_operand: 739 return locate_operand(inst, end_pc_operand); 740 } 741 742 743 #ifdef ASSERT 744 void Assembler::check_relocation(RelocationHolder const& rspec, int format) { 745 address inst = inst_mark(); 746 assert(inst != NULL && inst < pc(), "must point to beginning of instruction"); 747 address opnd; 748 749 Relocation* r = rspec.reloc(); 750 if (r->type() == relocInfo::none) { 751 return; 752 } else if (r->is_call() || format == call32_operand) { 753 // assert(format == imm32_operand, "cannot specify a nonzero format"); 754 opnd = locate_operand(inst, call32_operand); 755 } else if (r->is_data()) { 756 assert(format == imm_operand || format == disp32_operand 757 LP64_ONLY(|| format == narrow_oop_operand), "format ok"); 758 opnd = locate_operand(inst, (WhichOperand)format); 759 } else { 760 assert(format == imm_operand, "cannot specify a format"); 761 return; 762 } 763 assert(opnd == pc(), "must put operand where relocs can find it"); 764 } 765 #endif // ASSERT 766 767 void Assembler::emit_operand32(Register reg, Address adr) { 768 assert(reg->encoding() < 8, "no extended registers"); 769 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 770 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 771 adr._rspec); 772 } 773 774 void Assembler::emit_operand(Register reg, Address adr, 775 int rip_relative_correction) { 776 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 777 adr._rspec, 778 rip_relative_correction); 779 } 780 781 void Assembler::emit_operand(XMMRegister reg, Address adr) { 782 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 783 adr._rspec); 784 } 785 786 // MMX operations 787 void Assembler::emit_operand(MMXRegister reg, Address adr) { 788 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 789 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 790 } 791 792 // work around gcc (3.2.1-7a) bug 793 void Assembler::emit_operand(Address adr, MMXRegister reg) { 794 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 795 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 796 } 797 798 799 void Assembler::emit_farith(int b1, int b2, int i) { 800 assert(isByte(b1) && isByte(b2), "wrong opcode"); 801 assert(0 <= i && i < 8, "illegal stack offset"); 802 emit_byte(b1); 803 emit_byte(b2 + i); 804 } 805 806 807 // Now the Assembler instruction (identical for 32/64 bits) 808 809 void Assembler::adcl(Register dst, int32_t imm32) { 810 prefix(dst); 811 emit_arith(0x81, 0xD0, dst, imm32); 812 } 813 814 void Assembler::adcl(Register dst, Address src) { 815 InstructionMark im(this); 816 prefix(src, dst); 817 emit_byte(0x13); 818 emit_operand(dst, src); 819 } 820 821 void Assembler::adcl(Register dst, Register src) { 822 (void) prefix_and_encode(dst->encoding(), src->encoding()); 823 emit_arith(0x13, 0xC0, dst, src); 824 } 825 826 void Assembler::addl(Address dst, int32_t imm32) { 827 InstructionMark im(this); 828 prefix(dst); 829 emit_arith_operand(0x81, rax, dst, imm32); 830 } 831 832 void Assembler::addl(Address dst, Register src) { 833 InstructionMark im(this); 834 prefix(dst, src); 835 emit_byte(0x01); 836 emit_operand(src, dst); 837 } 838 839 void Assembler::addl(Register dst, int32_t imm32) { 840 prefix(dst); 841 emit_arith(0x81, 0xC0, dst, imm32); 842 } 843 844 void Assembler::addl(Register dst, Address src) { 845 InstructionMark im(this); 846 prefix(src, dst); 847 emit_byte(0x03); 848 emit_operand(dst, src); 849 } 850 851 void Assembler::addl(Register dst, Register src) { 852 (void) prefix_and_encode(dst->encoding(), src->encoding()); 853 emit_arith(0x03, 0xC0, dst, src); 854 } 855 856 void Assembler::addr_nop_4() { 857 // 4 bytes: NOP DWORD PTR [EAX+0] 858 emit_byte(0x0F); 859 emit_byte(0x1F); 860 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 861 emit_byte(0); // 8-bits offset (1 byte) 862 } 863 864 void Assembler::addr_nop_5() { 865 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 866 emit_byte(0x0F); 867 emit_byte(0x1F); 868 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 869 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 870 emit_byte(0); // 8-bits offset (1 byte) 871 } 872 873 void Assembler::addr_nop_7() { 874 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 875 emit_byte(0x0F); 876 emit_byte(0x1F); 877 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 878 emit_long(0); // 32-bits offset (4 bytes) 879 } 880 881 void Assembler::addr_nop_8() { 882 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 883 emit_byte(0x0F); 884 emit_byte(0x1F); 885 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 886 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 887 emit_long(0); // 32-bits offset (4 bytes) 888 } 889 890 void Assembler::addsd(XMMRegister dst, XMMRegister src) { 891 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 892 emit_byte(0xF2); 893 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 894 emit_byte(0x0F); 895 emit_byte(0x58); 896 emit_byte(0xC0 | encode); 897 } 898 899 void Assembler::addsd(XMMRegister dst, Address src) { 900 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 901 InstructionMark im(this); 902 emit_byte(0xF2); 903 prefix(src, dst); 904 emit_byte(0x0F); 905 emit_byte(0x58); 906 emit_operand(dst, src); 907 } 908 909 void Assembler::addss(XMMRegister dst, XMMRegister src) { 910 NOT_LP64(assert(VM_Version::supports_sse(), "")); 911 emit_byte(0xF3); 912 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 913 emit_byte(0x0F); 914 emit_byte(0x58); 915 emit_byte(0xC0 | encode); 916 } 917 918 void Assembler::addss(XMMRegister dst, Address src) { 919 NOT_LP64(assert(VM_Version::supports_sse(), "")); 920 InstructionMark im(this); 921 emit_byte(0xF3); 922 prefix(src, dst); 923 emit_byte(0x0F); 924 emit_byte(0x58); 925 emit_operand(dst, src); 926 } 927 928 void Assembler::andl(Register dst, int32_t imm32) { 929 prefix(dst); 930 emit_arith(0x81, 0xE0, dst, imm32); 931 } 932 933 void Assembler::andl(Register dst, Address src) { 934 InstructionMark im(this); 935 prefix(src, dst); 936 emit_byte(0x23); 937 emit_operand(dst, src); 938 } 939 940 void Assembler::andl(Register dst, Register src) { 941 (void) prefix_and_encode(dst->encoding(), src->encoding()); 942 emit_arith(0x23, 0xC0, dst, src); 943 } 944 945 void Assembler::andpd(XMMRegister dst, Address src) { 946 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 947 InstructionMark im(this); 948 emit_byte(0x66); 949 prefix(src, dst); 950 emit_byte(0x0F); 951 emit_byte(0x54); 952 emit_operand(dst, src); 953 } 954 955 void Assembler::bsfl(Register dst, Register src) { 956 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 957 emit_byte(0x0F); 958 emit_byte(0xBC); 959 emit_byte(0xC0 | encode); 960 } 961 962 void Assembler::bsrl(Register dst, Register src) { 963 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 964 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 965 emit_byte(0x0F); 966 emit_byte(0xBD); 967 emit_byte(0xC0 | encode); 968 } 969 970 void Assembler::bswapl(Register reg) { // bswap 971 int encode = prefix_and_encode(reg->encoding()); 972 emit_byte(0x0F); 973 emit_byte(0xC8 | encode); 974 } 975 976 void Assembler::call(Label& L, relocInfo::relocType rtype) { 977 // suspect disp32 is always good 978 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); 979 980 if (L.is_bound()) { 981 const int long_size = 5; 982 int offs = (int)( target(L) - pc() ); 983 assert(offs <= 0, "assembler error"); 984 InstructionMark im(this); 985 // 1110 1000 #32-bit disp 986 emit_byte(0xE8); 987 emit_data(offs - long_size, rtype, operand); 988 } else { 989 InstructionMark im(this); 990 // 1110 1000 #32-bit disp 991 L.add_patch_at(code(), locator()); 992 993 emit_byte(0xE8); 994 emit_data(int(0), rtype, operand); 995 } 996 } 997 998 void Assembler::call(Register dst) { 999 // This was originally using a 32bit register encoding 1000 // and surely we want 64bit! 1001 // this is a 32bit encoding but in 64bit mode the default 1002 // operand size is 64bit so there is no need for the 1003 // wide prefix. So prefix only happens if we use the 1004 // new registers. Much like push/pop. 1005 int x = offset(); 1006 // this may be true but dbx disassembles it as if it 1007 // were 32bits... 1008 // int encode = prefix_and_encode(dst->encoding()); 1009 // if (offset() != x) assert(dst->encoding() >= 8, "what?"); 1010 int encode = prefixq_and_encode(dst->encoding()); 1011 1012 emit_byte(0xFF); 1013 emit_byte(0xD0 | encode); 1014 } 1015 1016 1017 void Assembler::call(Address adr) { 1018 InstructionMark im(this); 1019 prefix(adr); 1020 emit_byte(0xFF); 1021 emit_operand(rdx, adr); 1022 } 1023 1024 void Assembler::call_literal(address entry, RelocationHolder const& rspec) { 1025 assert(entry != NULL, "call most probably wrong"); 1026 InstructionMark im(this); 1027 emit_byte(0xE8); 1028 intptr_t disp = entry - (_code_pos + sizeof(int32_t)); 1029 assert(is_simm32(disp), "must be 32bit offset (call2)"); 1030 // Technically, should use call32_operand, but this format is 1031 // implied by the fact that we're emitting a call instruction. 1032 1033 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand); 1034 emit_data((int) disp, rspec, operand); 1035 } 1036 1037 void Assembler::cdql() { 1038 emit_byte(0x99); 1039 } 1040 1041 void Assembler::cmovl(Condition cc, Register dst, Register src) { 1042 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1043 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1044 emit_byte(0x0F); 1045 emit_byte(0x40 | cc); 1046 emit_byte(0xC0 | encode); 1047 } 1048 1049 1050 void Assembler::cmovl(Condition cc, Register dst, Address src) { 1051 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1052 prefix(src, dst); 1053 emit_byte(0x0F); 1054 emit_byte(0x40 | cc); 1055 emit_operand(dst, src); 1056 } 1057 1058 void Assembler::cmpb(Address dst, int imm8) { 1059 InstructionMark im(this); 1060 prefix(dst); 1061 emit_byte(0x80); 1062 emit_operand(rdi, dst, 1); 1063 emit_byte(imm8); 1064 } 1065 1066 void Assembler::cmpl(Address dst, int32_t imm32) { 1067 InstructionMark im(this); 1068 prefix(dst); 1069 emit_byte(0x81); 1070 emit_operand(rdi, dst, 4); 1071 emit_long(imm32); 1072 } 1073 1074 void Assembler::cmpl(Register dst, int32_t imm32) { 1075 prefix(dst); 1076 emit_arith(0x81, 0xF8, dst, imm32); 1077 } 1078 1079 void Assembler::cmpl(Register dst, Register src) { 1080 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1081 emit_arith(0x3B, 0xC0, dst, src); 1082 } 1083 1084 1085 void Assembler::cmpl(Register dst, Address src) { 1086 InstructionMark im(this); 1087 prefix(src, dst); 1088 emit_byte(0x3B); 1089 emit_operand(dst, src); 1090 } 1091 1092 void Assembler::cmpw(Address dst, int imm16) { 1093 InstructionMark im(this); 1094 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers"); 1095 emit_byte(0x66); 1096 emit_byte(0x81); 1097 emit_operand(rdi, dst, 2); 1098 emit_word(imm16); 1099 } 1100 1101 // The 32-bit cmpxchg compares the value at adr with the contents of rax, 1102 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1103 // The ZF is set if the compared values were equal, and cleared otherwise. 1104 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg 1105 if (Atomics & 2) { 1106 // caveat: no instructionmark, so this isn't relocatable. 1107 // Emit a synthetic, non-atomic, CAS equivalent. 1108 // Beware. The synthetic form sets all ICCs, not just ZF. 1109 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r) 1110 cmpl(rax, adr); 1111 movl(rax, adr); 1112 if (reg != rax) { 1113 Label L ; 1114 jcc(Assembler::notEqual, L); 1115 movl(adr, reg); 1116 bind(L); 1117 } 1118 } else { 1119 InstructionMark im(this); 1120 prefix(adr, reg); 1121 emit_byte(0x0F); 1122 emit_byte(0xB1); 1123 emit_operand(reg, adr); 1124 } 1125 } 1126 1127 void Assembler::comisd(XMMRegister dst, Address src) { 1128 // NOTE: dbx seems to decode this as comiss even though the 1129 // 0x66 is there. Strangly ucomisd comes out correct 1130 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1131 emit_byte(0x66); 1132 comiss(dst, src); 1133 } 1134 1135 void Assembler::comiss(XMMRegister dst, Address src) { 1136 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1137 1138 InstructionMark im(this); 1139 prefix(src, dst); 1140 emit_byte(0x0F); 1141 emit_byte(0x2F); 1142 emit_operand(dst, src); 1143 } 1144 1145 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 1146 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1147 emit_byte(0xF3); 1148 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1149 emit_byte(0x0F); 1150 emit_byte(0xE6); 1151 emit_byte(0xC0 | encode); 1152 } 1153 1154 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 1155 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1156 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1157 emit_byte(0x0F); 1158 emit_byte(0x5B); 1159 emit_byte(0xC0 | encode); 1160 } 1161 1162 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 1163 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1164 emit_byte(0xF2); 1165 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1166 emit_byte(0x0F); 1167 emit_byte(0x5A); 1168 emit_byte(0xC0 | encode); 1169 } 1170 1171 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { 1172 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1173 emit_byte(0xF2); 1174 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1175 emit_byte(0x0F); 1176 emit_byte(0x2A); 1177 emit_byte(0xC0 | encode); 1178 } 1179 1180 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { 1181 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1182 emit_byte(0xF3); 1183 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1184 emit_byte(0x0F); 1185 emit_byte(0x2A); 1186 emit_byte(0xC0 | encode); 1187 } 1188 1189 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { 1190 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1191 emit_byte(0xF3); 1192 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1193 emit_byte(0x0F); 1194 emit_byte(0x5A); 1195 emit_byte(0xC0 | encode); 1196 } 1197 1198 void Assembler::cvttsd2sil(Register dst, XMMRegister src) { 1199 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1200 emit_byte(0xF2); 1201 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1202 emit_byte(0x0F); 1203 emit_byte(0x2C); 1204 emit_byte(0xC0 | encode); 1205 } 1206 1207 void Assembler::cvttss2sil(Register dst, XMMRegister src) { 1208 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1209 emit_byte(0xF3); 1210 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1211 emit_byte(0x0F); 1212 emit_byte(0x2C); 1213 emit_byte(0xC0 | encode); 1214 } 1215 1216 void Assembler::decl(Address dst) { 1217 // Don't use it directly. Use MacroAssembler::decrement() instead. 1218 InstructionMark im(this); 1219 prefix(dst); 1220 emit_byte(0xFF); 1221 emit_operand(rcx, dst); 1222 } 1223 1224 void Assembler::divsd(XMMRegister dst, Address src) { 1225 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1226 InstructionMark im(this); 1227 emit_byte(0xF2); 1228 prefix(src, dst); 1229 emit_byte(0x0F); 1230 emit_byte(0x5E); 1231 emit_operand(dst, src); 1232 } 1233 1234 void Assembler::divsd(XMMRegister dst, XMMRegister src) { 1235 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1236 emit_byte(0xF2); 1237 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1238 emit_byte(0x0F); 1239 emit_byte(0x5E); 1240 emit_byte(0xC0 | encode); 1241 } 1242 1243 void Assembler::divss(XMMRegister dst, Address src) { 1244 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1245 InstructionMark im(this); 1246 emit_byte(0xF3); 1247 prefix(src, dst); 1248 emit_byte(0x0F); 1249 emit_byte(0x5E); 1250 emit_operand(dst, src); 1251 } 1252 1253 void Assembler::divss(XMMRegister dst, XMMRegister src) { 1254 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1255 emit_byte(0xF3); 1256 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1257 emit_byte(0x0F); 1258 emit_byte(0x5E); 1259 emit_byte(0xC0 | encode); 1260 } 1261 1262 void Assembler::emms() { 1263 NOT_LP64(assert(VM_Version::supports_mmx(), "")); 1264 emit_byte(0x0F); 1265 emit_byte(0x77); 1266 } 1267 1268 void Assembler::hlt() { 1269 emit_byte(0xF4); 1270 } 1271 1272 void Assembler::idivl(Register src) { 1273 int encode = prefix_and_encode(src->encoding()); 1274 emit_byte(0xF7); 1275 emit_byte(0xF8 | encode); 1276 } 1277 1278 void Assembler::divl(Register src) { // Unsigned 1279 int encode = prefix_and_encode(src->encoding()); 1280 emit_byte(0xF7); 1281 emit_byte(0xF0 | encode); 1282 } 1283 1284 void Assembler::imull(Register dst, Register src) { 1285 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1286 emit_byte(0x0F); 1287 emit_byte(0xAF); 1288 emit_byte(0xC0 | encode); 1289 } 1290 1291 1292 void Assembler::imull(Register dst, Register src, int value) { 1293 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1294 if (is8bit(value)) { 1295 emit_byte(0x6B); 1296 emit_byte(0xC0 | encode); 1297 emit_byte(value & 0xFF); 1298 } else { 1299 emit_byte(0x69); 1300 emit_byte(0xC0 | encode); 1301 emit_long(value); 1302 } 1303 } 1304 1305 void Assembler::incl(Address dst) { 1306 // Don't use it directly. Use MacroAssembler::increment() instead. 1307 InstructionMark im(this); 1308 prefix(dst); 1309 emit_byte(0xFF); 1310 emit_operand(rax, dst); 1311 } 1312 1313 void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) { 1314 InstructionMark im(this); 1315 relocate(rtype); 1316 assert((0 <= cc) && (cc < 16), "illegal cc"); 1317 if (L.is_bound()) { 1318 address dst = target(L); 1319 assert(dst != NULL, "jcc most probably wrong"); 1320 1321 const int short_size = 2; 1322 const int long_size = 6; 1323 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos; 1324 if (rtype == relocInfo::none && is8bit(offs - short_size)) { 1325 // 0111 tttn #8-bit disp 1326 emit_byte(0x70 | cc); 1327 emit_byte((offs - short_size) & 0xFF); 1328 } else { 1329 // 0000 1111 1000 tttn #32-bit disp 1330 assert(is_simm32(offs - long_size), 1331 "must be 32bit offset (call4)"); 1332 emit_byte(0x0F); 1333 emit_byte(0x80 | cc); 1334 emit_long(offs - long_size); 1335 } 1336 } else { 1337 // Note: could eliminate cond. jumps to this jump if condition 1338 // is the same however, seems to be rather unlikely case. 1339 // Note: use jccb() if label to be bound is very close to get 1340 // an 8-bit displacement 1341 L.add_patch_at(code(), locator()); 1342 emit_byte(0x0F); 1343 emit_byte(0x80 | cc); 1344 emit_long(0); 1345 } 1346 } 1347 1348 void Assembler::jccb(Condition cc, Label& L) { 1349 if (L.is_bound()) { 1350 const int short_size = 2; 1351 address entry = target(L); 1352 assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)), 1353 "Dispacement too large for a short jmp"); 1354 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; 1355 // 0111 tttn #8-bit disp 1356 emit_byte(0x70 | cc); 1357 emit_byte((offs - short_size) & 0xFF); 1358 } else { 1359 InstructionMark im(this); 1360 L.add_patch_at(code(), locator()); 1361 emit_byte(0x70 | cc); 1362 emit_byte(0); 1363 } 1364 } 1365 1366 void Assembler::jmp(Address adr) { 1367 InstructionMark im(this); 1368 prefix(adr); 1369 emit_byte(0xFF); 1370 emit_operand(rsp, adr); 1371 } 1372 1373 void Assembler::jmp(Label& L, relocInfo::relocType rtype) { 1374 if (L.is_bound()) { 1375 address entry = target(L); 1376 assert(entry != NULL, "jmp most probably wrong"); 1377 InstructionMark im(this); 1378 const int short_size = 2; 1379 const int long_size = 5; 1380 intptr_t offs = entry - _code_pos; 1381 if (rtype == relocInfo::none && is8bit(offs - short_size)) { 1382 emit_byte(0xEB); 1383 emit_byte((offs - short_size) & 0xFF); 1384 } else { 1385 emit_byte(0xE9); 1386 emit_long(offs - long_size); 1387 } 1388 } else { 1389 // By default, forward jumps are always 32-bit displacements, since 1390 // we can't yet know where the label will be bound. If you're sure that 1391 // the forward jump will not run beyond 256 bytes, use jmpb to 1392 // force an 8-bit displacement. 1393 InstructionMark im(this); 1394 relocate(rtype); 1395 L.add_patch_at(code(), locator()); 1396 emit_byte(0xE9); 1397 emit_long(0); 1398 } 1399 } 1400 1401 void Assembler::jmp(Register entry) { 1402 int encode = prefix_and_encode(entry->encoding()); 1403 emit_byte(0xFF); 1404 emit_byte(0xE0 | encode); 1405 } 1406 1407 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { 1408 InstructionMark im(this); 1409 emit_byte(0xE9); 1410 assert(dest != NULL, "must have a target"); 1411 intptr_t disp = dest - (_code_pos + sizeof(int32_t)); 1412 assert(is_simm32(disp), "must be 32bit offset (jmp)"); 1413 emit_data(disp, rspec.reloc(), call32_operand); 1414 } 1415 1416 void Assembler::jmpb(Label& L) { 1417 if (L.is_bound()) { 1418 const int short_size = 2; 1419 address entry = target(L); 1420 assert(is8bit((entry - _code_pos) + short_size), 1421 "Dispacement too large for a short jmp"); 1422 assert(entry != NULL, "jmp most probably wrong"); 1423 intptr_t offs = entry - _code_pos; 1424 emit_byte(0xEB); 1425 emit_byte((offs - short_size) & 0xFF); 1426 } else { 1427 InstructionMark im(this); 1428 L.add_patch_at(code(), locator()); 1429 emit_byte(0xEB); 1430 emit_byte(0); 1431 } 1432 } 1433 1434 void Assembler::ldmxcsr( Address src) { 1435 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1436 InstructionMark im(this); 1437 prefix(src); 1438 emit_byte(0x0F); 1439 emit_byte(0xAE); 1440 emit_operand(as_Register(2), src); 1441 } 1442 1443 void Assembler::leal(Register dst, Address src) { 1444 InstructionMark im(this); 1445 #ifdef _LP64 1446 emit_byte(0x67); // addr32 1447 prefix(src, dst); 1448 #endif // LP64 1449 emit_byte(0x8D); 1450 emit_operand(dst, src); 1451 } 1452 1453 void Assembler::lock() { 1454 if (Atomics & 1) { 1455 // Emit either nothing, a NOP, or a NOP: prefix 1456 emit_byte(0x90) ; 1457 } else { 1458 emit_byte(0xF0); 1459 } 1460 } 1461 1462 void Assembler::lzcntl(Register dst, Register src) { 1463 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 1464 emit_byte(0xF3); 1465 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1466 emit_byte(0x0F); 1467 emit_byte(0xBD); 1468 emit_byte(0xC0 | encode); 1469 } 1470 1471 // Emit mfence instruction 1472 void Assembler::mfence() { 1473 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) 1474 emit_byte( 0x0F ); 1475 emit_byte( 0xAE ); 1476 emit_byte( 0xF0 ); 1477 } 1478 1479 void Assembler::mov(Register dst, Register src) { 1480 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 1481 } 1482 1483 void Assembler::movapd(XMMRegister dst, XMMRegister src) { 1484 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1485 int dstenc = dst->encoding(); 1486 int srcenc = src->encoding(); 1487 emit_byte(0x66); 1488 if (dstenc < 8) { 1489 if (srcenc >= 8) { 1490 prefix(REX_B); 1491 srcenc -= 8; 1492 } 1493 } else { 1494 if (srcenc < 8) { 1495 prefix(REX_R); 1496 } else { 1497 prefix(REX_RB); 1498 srcenc -= 8; 1499 } 1500 dstenc -= 8; 1501 } 1502 emit_byte(0x0F); 1503 emit_byte(0x28); 1504 emit_byte(0xC0 | dstenc << 3 | srcenc); 1505 } 1506 1507 void Assembler::movaps(XMMRegister dst, XMMRegister src) { 1508 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1509 int dstenc = dst->encoding(); 1510 int srcenc = src->encoding(); 1511 if (dstenc < 8) { 1512 if (srcenc >= 8) { 1513 prefix(REX_B); 1514 srcenc -= 8; 1515 } 1516 } else { 1517 if (srcenc < 8) { 1518 prefix(REX_R); 1519 } else { 1520 prefix(REX_RB); 1521 srcenc -= 8; 1522 } 1523 dstenc -= 8; 1524 } 1525 emit_byte(0x0F); 1526 emit_byte(0x28); 1527 emit_byte(0xC0 | dstenc << 3 | srcenc); 1528 } 1529 1530 void Assembler::movb(Register dst, Address src) { 1531 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 1532 InstructionMark im(this); 1533 prefix(src, dst, true); 1534 emit_byte(0x8A); 1535 emit_operand(dst, src); 1536 } 1537 1538 1539 void Assembler::movb(Address dst, int imm8) { 1540 InstructionMark im(this); 1541 prefix(dst); 1542 emit_byte(0xC6); 1543 emit_operand(rax, dst, 1); 1544 emit_byte(imm8); 1545 } 1546 1547 1548 void Assembler::movb(Address dst, Register src) { 1549 assert(src->has_byte_register(), "must have byte register"); 1550 InstructionMark im(this); 1551 prefix(dst, src, true); 1552 emit_byte(0x88); 1553 emit_operand(src, dst); 1554 } 1555 1556 void Assembler::movdl(XMMRegister dst, Register src) { 1557 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1558 emit_byte(0x66); 1559 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1560 emit_byte(0x0F); 1561 emit_byte(0x6E); 1562 emit_byte(0xC0 | encode); 1563 } 1564 1565 void Assembler::movdl(Register dst, XMMRegister src) { 1566 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1567 emit_byte(0x66); 1568 // swap src/dst to get correct prefix 1569 int encode = prefix_and_encode(src->encoding(), dst->encoding()); 1570 emit_byte(0x0F); 1571 emit_byte(0x7E); 1572 emit_byte(0xC0 | encode); 1573 } 1574 1575 void Assembler::movdqa(XMMRegister dst, Address src) { 1576 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1577 InstructionMark im(this); 1578 emit_byte(0x66); 1579 prefix(src, dst); 1580 emit_byte(0x0F); 1581 emit_byte(0x6F); 1582 emit_operand(dst, src); 1583 } 1584 1585 void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 1586 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1587 emit_byte(0x66); 1588 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 1589 emit_byte(0x0F); 1590 emit_byte(0x6F); 1591 emit_byte(0xC0 | encode); 1592 } 1593 1594 void Assembler::movdqa(Address dst, XMMRegister src) { 1595 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1596 InstructionMark im(this); 1597 emit_byte(0x66); 1598 prefix(dst, src); 1599 emit_byte(0x0F); 1600 emit_byte(0x7F); 1601 emit_operand(src, dst); 1602 } 1603 1604 void Assembler::movdqu(XMMRegister dst, Address src) { 1605 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1606 InstructionMark im(this); 1607 emit_byte(0xF3); 1608 prefix(src, dst); 1609 emit_byte(0x0F); 1610 emit_byte(0x6F); 1611 emit_operand(dst, src); 1612 } 1613 1614 void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 1615 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1616 emit_byte(0xF3); 1617 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 1618 emit_byte(0x0F); 1619 emit_byte(0x6F); 1620 emit_byte(0xC0 | encode); 1621 } 1622 1623 void Assembler::movdqu(Address dst, XMMRegister src) { 1624 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1625 InstructionMark im(this); 1626 emit_byte(0xF3); 1627 prefix(dst, src); 1628 emit_byte(0x0F); 1629 emit_byte(0x7F); 1630 emit_operand(src, dst); 1631 } 1632 1633 // Uses zero extension on 64bit 1634 1635 void Assembler::movl(Register dst, int32_t imm32) { 1636 int encode = prefix_and_encode(dst->encoding()); 1637 emit_byte(0xB8 | encode); 1638 emit_long(imm32); 1639 } 1640 1641 void Assembler::movl(Register dst, Register src) { 1642 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1643 emit_byte(0x8B); 1644 emit_byte(0xC0 | encode); 1645 } 1646 1647 void Assembler::movl(Register dst, Address src) { 1648 InstructionMark im(this); 1649 prefix(src, dst); 1650 emit_byte(0x8B); 1651 emit_operand(dst, src); 1652 } 1653 1654 void Assembler::movl(Address dst, int32_t imm32) { 1655 InstructionMark im(this); 1656 prefix(dst); 1657 emit_byte(0xC7); 1658 emit_operand(rax, dst, 4); 1659 emit_long(imm32); 1660 } 1661 1662 void Assembler::movl(Address dst, Register src) { 1663 InstructionMark im(this); 1664 prefix(dst, src); 1665 emit_byte(0x89); 1666 emit_operand(src, dst); 1667 } 1668 1669 // New cpus require to use movsd and movss to avoid partial register stall 1670 // when loading from memory. But for old Opteron use movlpd instead of movsd. 1671 // The selection is done in MacroAssembler::movdbl() and movflt(). 1672 void Assembler::movlpd(XMMRegister dst, Address src) { 1673 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1674 InstructionMark im(this); 1675 emit_byte(0x66); 1676 prefix(src, dst); 1677 emit_byte(0x0F); 1678 emit_byte(0x12); 1679 emit_operand(dst, src); 1680 } 1681 1682 void Assembler::movq( MMXRegister dst, Address src ) { 1683 assert( VM_Version::supports_mmx(), "" ); 1684 emit_byte(0x0F); 1685 emit_byte(0x6F); 1686 emit_operand(dst, src); 1687 } 1688 1689 void Assembler::movq( Address dst, MMXRegister src ) { 1690 assert( VM_Version::supports_mmx(), "" ); 1691 emit_byte(0x0F); 1692 emit_byte(0x7F); 1693 // workaround gcc (3.2.1-7a) bug 1694 // In that version of gcc with only an emit_operand(MMX, Address) 1695 // gcc will tail jump and try and reverse the parameters completely 1696 // obliterating dst in the process. By having a version available 1697 // that doesn't need to swap the args at the tail jump the bug is 1698 // avoided. 1699 emit_operand(dst, src); 1700 } 1701 1702 void Assembler::movq(XMMRegister dst, Address src) { 1703 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1704 InstructionMark im(this); 1705 emit_byte(0xF3); 1706 prefix(src, dst); 1707 emit_byte(0x0F); 1708 emit_byte(0x7E); 1709 emit_operand(dst, src); 1710 } 1711 1712 void Assembler::movq(Address dst, XMMRegister src) { 1713 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1714 InstructionMark im(this); 1715 emit_byte(0x66); 1716 prefix(dst, src); 1717 emit_byte(0x0F); 1718 emit_byte(0xD6); 1719 emit_operand(src, dst); 1720 } 1721 1722 void Assembler::movsbl(Register dst, Address src) { // movsxb 1723 InstructionMark im(this); 1724 prefix(src, dst); 1725 emit_byte(0x0F); 1726 emit_byte(0xBE); 1727 emit_operand(dst, src); 1728 } 1729 1730 void Assembler::movsbl(Register dst, Register src) { // movsxb 1731 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1732 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1733 emit_byte(0x0F); 1734 emit_byte(0xBE); 1735 emit_byte(0xC0 | encode); 1736 } 1737 1738 void Assembler::movsd(XMMRegister dst, XMMRegister src) { 1739 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1740 emit_byte(0xF2); 1741 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1742 emit_byte(0x0F); 1743 emit_byte(0x10); 1744 emit_byte(0xC0 | encode); 1745 } 1746 1747 void Assembler::movsd(XMMRegister dst, Address src) { 1748 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1749 InstructionMark im(this); 1750 emit_byte(0xF2); 1751 prefix(src, dst); 1752 emit_byte(0x0F); 1753 emit_byte(0x10); 1754 emit_operand(dst, src); 1755 } 1756 1757 void Assembler::movsd(Address dst, XMMRegister src) { 1758 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1759 InstructionMark im(this); 1760 emit_byte(0xF2); 1761 prefix(dst, src); 1762 emit_byte(0x0F); 1763 emit_byte(0x11); 1764 emit_operand(src, dst); 1765 } 1766 1767 void Assembler::movss(XMMRegister dst, XMMRegister src) { 1768 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1769 emit_byte(0xF3); 1770 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1771 emit_byte(0x0F); 1772 emit_byte(0x10); 1773 emit_byte(0xC0 | encode); 1774 } 1775 1776 void Assembler::movss(XMMRegister dst, Address src) { 1777 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1778 InstructionMark im(this); 1779 emit_byte(0xF3); 1780 prefix(src, dst); 1781 emit_byte(0x0F); 1782 emit_byte(0x10); 1783 emit_operand(dst, src); 1784 } 1785 1786 void Assembler::movss(Address dst, XMMRegister src) { 1787 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1788 InstructionMark im(this); 1789 emit_byte(0xF3); 1790 prefix(dst, src); 1791 emit_byte(0x0F); 1792 emit_byte(0x11); 1793 emit_operand(src, dst); 1794 } 1795 1796 void Assembler::movswl(Register dst, Address src) { // movsxw 1797 InstructionMark im(this); 1798 prefix(src, dst); 1799 emit_byte(0x0F); 1800 emit_byte(0xBF); 1801 emit_operand(dst, src); 1802 } 1803 1804 void Assembler::movswl(Register dst, Register src) { // movsxw 1805 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1806 emit_byte(0x0F); 1807 emit_byte(0xBF); 1808 emit_byte(0xC0 | encode); 1809 } 1810 1811 void Assembler::movw(Address dst, int imm16) { 1812 InstructionMark im(this); 1813 1814 emit_byte(0x66); // switch to 16-bit mode 1815 prefix(dst); 1816 emit_byte(0xC7); 1817 emit_operand(rax, dst, 2); 1818 emit_word(imm16); 1819 } 1820 1821 void Assembler::movw(Register dst, Address src) { 1822 InstructionMark im(this); 1823 emit_byte(0x66); 1824 prefix(src, dst); 1825 emit_byte(0x8B); 1826 emit_operand(dst, src); 1827 } 1828 1829 void Assembler::movw(Address dst, Register src) { 1830 InstructionMark im(this); 1831 emit_byte(0x66); 1832 prefix(dst, src); 1833 emit_byte(0x89); 1834 emit_operand(src, dst); 1835 } 1836 1837 void Assembler::movzbl(Register dst, Address src) { // movzxb 1838 InstructionMark im(this); 1839 prefix(src, dst); 1840 emit_byte(0x0F); 1841 emit_byte(0xB6); 1842 emit_operand(dst, src); 1843 } 1844 1845 void Assembler::movzbl(Register dst, Register src) { // movzxb 1846 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1847 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1848 emit_byte(0x0F); 1849 emit_byte(0xB6); 1850 emit_byte(0xC0 | encode); 1851 } 1852 1853 void Assembler::movzwl(Register dst, Address src) { // movzxw 1854 InstructionMark im(this); 1855 prefix(src, dst); 1856 emit_byte(0x0F); 1857 emit_byte(0xB7); 1858 emit_operand(dst, src); 1859 } 1860 1861 void Assembler::movzwl(Register dst, Register src) { // movzxw 1862 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1863 emit_byte(0x0F); 1864 emit_byte(0xB7); 1865 emit_byte(0xC0 | encode); 1866 } 1867 1868 void Assembler::mull(Address src) { 1869 InstructionMark im(this); 1870 prefix(src); 1871 emit_byte(0xF7); 1872 emit_operand(rsp, src); 1873 } 1874 1875 void Assembler::mull(Register src) { 1876 int encode = prefix_and_encode(src->encoding()); 1877 emit_byte(0xF7); 1878 emit_byte(0xE0 | encode); 1879 } 1880 1881 void Assembler::mulsd(XMMRegister dst, Address src) { 1882 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1883 InstructionMark im(this); 1884 emit_byte(0xF2); 1885 prefix(src, dst); 1886 emit_byte(0x0F); 1887 emit_byte(0x59); 1888 emit_operand(dst, src); 1889 } 1890 1891 void Assembler::mulsd(XMMRegister dst, XMMRegister src) { 1892 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1893 emit_byte(0xF2); 1894 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1895 emit_byte(0x0F); 1896 emit_byte(0x59); 1897 emit_byte(0xC0 | encode); 1898 } 1899 1900 void Assembler::mulss(XMMRegister dst, Address src) { 1901 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1902 InstructionMark im(this); 1903 emit_byte(0xF3); 1904 prefix(src, dst); 1905 emit_byte(0x0F); 1906 emit_byte(0x59); 1907 emit_operand(dst, src); 1908 } 1909 1910 void Assembler::mulss(XMMRegister dst, XMMRegister src) { 1911 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1912 emit_byte(0xF3); 1913 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1914 emit_byte(0x0F); 1915 emit_byte(0x59); 1916 emit_byte(0xC0 | encode); 1917 } 1918 1919 void Assembler::negl(Register dst) { 1920 int encode = prefix_and_encode(dst->encoding()); 1921 emit_byte(0xF7); 1922 emit_byte(0xD8 | encode); 1923 } 1924 1925 void Assembler::nop(int i) { 1926 #ifdef ASSERT 1927 assert(i > 0, " "); 1928 // The fancy nops aren't currently recognized by debuggers making it a 1929 // pain to disassemble code while debugging. If asserts are on clearly 1930 // speed is not an issue so simply use the single byte traditional nop 1931 // to do alignment. 1932 1933 for (; i > 0 ; i--) emit_byte(0x90); 1934 return; 1935 1936 #endif // ASSERT 1937 1938 if (UseAddressNop && VM_Version::is_intel()) { 1939 // 1940 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 1941 // 1: 0x90 1942 // 2: 0x66 0x90 1943 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 1944 // 4: 0x0F 0x1F 0x40 0x00 1945 // 5: 0x0F 0x1F 0x44 0x00 0x00 1946 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 1947 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1948 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1949 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1950 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1951 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1952 1953 // The rest coding is Intel specific - don't use consecutive address nops 1954 1955 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1956 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1957 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1958 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1959 1960 while(i >= 15) { 1961 // For Intel don't generate consecutive addess nops (mix with regular nops) 1962 i -= 15; 1963 emit_byte(0x66); // size prefix 1964 emit_byte(0x66); // size prefix 1965 emit_byte(0x66); // size prefix 1966 addr_nop_8(); 1967 emit_byte(0x66); // size prefix 1968 emit_byte(0x66); // size prefix 1969 emit_byte(0x66); // size prefix 1970 emit_byte(0x90); // nop 1971 } 1972 switch (i) { 1973 case 14: 1974 emit_byte(0x66); // size prefix 1975 case 13: 1976 emit_byte(0x66); // size prefix 1977 case 12: 1978 addr_nop_8(); 1979 emit_byte(0x66); // size prefix 1980 emit_byte(0x66); // size prefix 1981 emit_byte(0x66); // size prefix 1982 emit_byte(0x90); // nop 1983 break; 1984 case 11: 1985 emit_byte(0x66); // size prefix 1986 case 10: 1987 emit_byte(0x66); // size prefix 1988 case 9: 1989 emit_byte(0x66); // size prefix 1990 case 8: 1991 addr_nop_8(); 1992 break; 1993 case 7: 1994 addr_nop_7(); 1995 break; 1996 case 6: 1997 emit_byte(0x66); // size prefix 1998 case 5: 1999 addr_nop_5(); 2000 break; 2001 case 4: 2002 addr_nop_4(); 2003 break; 2004 case 3: 2005 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2006 emit_byte(0x66); // size prefix 2007 case 2: 2008 emit_byte(0x66); // size prefix 2009 case 1: 2010 emit_byte(0x90); // nop 2011 break; 2012 default: 2013 assert(i == 0, " "); 2014 } 2015 return; 2016 } 2017 if (UseAddressNop && VM_Version::is_amd()) { 2018 // 2019 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD. 2020 // 1: 0x90 2021 // 2: 0x66 0x90 2022 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2023 // 4: 0x0F 0x1F 0x40 0x00 2024 // 5: 0x0F 0x1F 0x44 0x00 0x00 2025 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2026 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2027 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2028 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2029 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2030 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2031 2032 // The rest coding is AMD specific - use consecutive address nops 2033 2034 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2035 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2036 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2037 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2038 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2039 // Size prefixes (0x66) are added for larger sizes 2040 2041 while(i >= 22) { 2042 i -= 11; 2043 emit_byte(0x66); // size prefix 2044 emit_byte(0x66); // size prefix 2045 emit_byte(0x66); // size prefix 2046 addr_nop_8(); 2047 } 2048 // Generate first nop for size between 21-12 2049 switch (i) { 2050 case 21: 2051 i -= 1; 2052 emit_byte(0x66); // size prefix 2053 case 20: 2054 case 19: 2055 i -= 1; 2056 emit_byte(0x66); // size prefix 2057 case 18: 2058 case 17: 2059 i -= 1; 2060 emit_byte(0x66); // size prefix 2061 case 16: 2062 case 15: 2063 i -= 8; 2064 addr_nop_8(); 2065 break; 2066 case 14: 2067 case 13: 2068 i -= 7; 2069 addr_nop_7(); 2070 break; 2071 case 12: 2072 i -= 6; 2073 emit_byte(0x66); // size prefix 2074 addr_nop_5(); 2075 break; 2076 default: 2077 assert(i < 12, " "); 2078 } 2079 2080 // Generate second nop for size between 11-1 2081 switch (i) { 2082 case 11: 2083 emit_byte(0x66); // size prefix 2084 case 10: 2085 emit_byte(0x66); // size prefix 2086 case 9: 2087 emit_byte(0x66); // size prefix 2088 case 8: 2089 addr_nop_8(); 2090 break; 2091 case 7: 2092 addr_nop_7(); 2093 break; 2094 case 6: 2095 emit_byte(0x66); // size prefix 2096 case 5: 2097 addr_nop_5(); 2098 break; 2099 case 4: 2100 addr_nop_4(); 2101 break; 2102 case 3: 2103 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2104 emit_byte(0x66); // size prefix 2105 case 2: 2106 emit_byte(0x66); // size prefix 2107 case 1: 2108 emit_byte(0x90); // nop 2109 break; 2110 default: 2111 assert(i == 0, " "); 2112 } 2113 return; 2114 } 2115 2116 // Using nops with size prefixes "0x66 0x90". 2117 // From AMD Optimization Guide: 2118 // 1: 0x90 2119 // 2: 0x66 0x90 2120 // 3: 0x66 0x66 0x90 2121 // 4: 0x66 0x66 0x66 0x90 2122 // 5: 0x66 0x66 0x90 0x66 0x90 2123 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2124 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2125 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2126 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2127 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2128 // 2129 while(i > 12) { 2130 i -= 4; 2131 emit_byte(0x66); // size prefix 2132 emit_byte(0x66); 2133 emit_byte(0x66); 2134 emit_byte(0x90); // nop 2135 } 2136 // 1 - 12 nops 2137 if(i > 8) { 2138 if(i > 9) { 2139 i -= 1; 2140 emit_byte(0x66); 2141 } 2142 i -= 3; 2143 emit_byte(0x66); 2144 emit_byte(0x66); 2145 emit_byte(0x90); 2146 } 2147 // 1 - 8 nops 2148 if(i > 4) { 2149 if(i > 6) { 2150 i -= 1; 2151 emit_byte(0x66); 2152 } 2153 i -= 3; 2154 emit_byte(0x66); 2155 emit_byte(0x66); 2156 emit_byte(0x90); 2157 } 2158 switch (i) { 2159 case 4: 2160 emit_byte(0x66); 2161 case 3: 2162 emit_byte(0x66); 2163 case 2: 2164 emit_byte(0x66); 2165 case 1: 2166 emit_byte(0x90); 2167 break; 2168 default: 2169 assert(i == 0, " "); 2170 } 2171 } 2172 2173 void Assembler::notl(Register dst) { 2174 int encode = prefix_and_encode(dst->encoding()); 2175 emit_byte(0xF7); 2176 emit_byte(0xD0 | encode ); 2177 } 2178 2179 void Assembler::orl(Address dst, int32_t imm32) { 2180 InstructionMark im(this); 2181 prefix(dst); 2182 emit_byte(0x81); 2183 emit_operand(rcx, dst, 4); 2184 emit_long(imm32); 2185 } 2186 2187 void Assembler::orl(Register dst, int32_t imm32) { 2188 prefix(dst); 2189 emit_arith(0x81, 0xC8, dst, imm32); 2190 } 2191 2192 2193 void Assembler::orl(Register dst, Address src) { 2194 InstructionMark im(this); 2195 prefix(src, dst); 2196 emit_byte(0x0B); 2197 emit_operand(dst, src); 2198 } 2199 2200 2201 void Assembler::orl(Register dst, Register src) { 2202 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2203 emit_arith(0x0B, 0xC0, dst, src); 2204 } 2205 2206 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { 2207 assert(VM_Version::supports_sse4_2(), ""); 2208 2209 InstructionMark im(this); 2210 emit_byte(0x66); 2211 prefix(src, dst); 2212 emit_byte(0x0F); 2213 emit_byte(0x3A); 2214 emit_byte(0x61); 2215 emit_operand(dst, src); 2216 emit_byte(imm8); 2217 } 2218 2219 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { 2220 assert(VM_Version::supports_sse4_2(), ""); 2221 2222 emit_byte(0x66); 2223 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 2224 emit_byte(0x0F); 2225 emit_byte(0x3A); 2226 emit_byte(0x61); 2227 emit_byte(0xC0 | encode); 2228 emit_byte(imm8); 2229 } 2230 2231 // generic 2232 void Assembler::pop(Register dst) { 2233 int encode = prefix_and_encode(dst->encoding()); 2234 emit_byte(0x58 | encode); 2235 } 2236 2237 void Assembler::popcntl(Register dst, Address src) { 2238 assert(VM_Version::supports_popcnt(), "must support"); 2239 InstructionMark im(this); 2240 emit_byte(0xF3); 2241 prefix(src, dst); 2242 emit_byte(0x0F); 2243 emit_byte(0xB8); 2244 emit_operand(dst, src); 2245 } 2246 2247 void Assembler::popcntl(Register dst, Register src) { 2248 assert(VM_Version::supports_popcnt(), "must support"); 2249 emit_byte(0xF3); 2250 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2251 emit_byte(0x0F); 2252 emit_byte(0xB8); 2253 emit_byte(0xC0 | encode); 2254 } 2255 2256 void Assembler::popf() { 2257 emit_byte(0x9D); 2258 } 2259 2260 #ifndef _LP64 // no 32bit push/pop on amd64 2261 void Assembler::popl(Address dst) { 2262 // NOTE: this will adjust stack by 8byte on 64bits 2263 InstructionMark im(this); 2264 prefix(dst); 2265 emit_byte(0x8F); 2266 emit_operand(rax, dst); 2267 } 2268 #endif 2269 2270 void Assembler::prefetch_prefix(Address src) { 2271 prefix(src); 2272 emit_byte(0x0F); 2273 } 2274 2275 void Assembler::prefetchnta(Address src) { 2276 NOT_LP64(assert(VM_Version::supports_sse2(), "must support")); 2277 InstructionMark im(this); 2278 prefetch_prefix(src); 2279 emit_byte(0x18); 2280 emit_operand(rax, src); // 0, src 2281 } 2282 2283 void Assembler::prefetchr(Address src) { 2284 NOT_LP64(assert(VM_Version::supports_3dnow(), "must support")); 2285 InstructionMark im(this); 2286 prefetch_prefix(src); 2287 emit_byte(0x0D); 2288 emit_operand(rax, src); // 0, src 2289 } 2290 2291 void Assembler::prefetcht0(Address src) { 2292 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2293 InstructionMark im(this); 2294 prefetch_prefix(src); 2295 emit_byte(0x18); 2296 emit_operand(rcx, src); // 1, src 2297 } 2298 2299 void Assembler::prefetcht1(Address src) { 2300 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2301 InstructionMark im(this); 2302 prefetch_prefix(src); 2303 emit_byte(0x18); 2304 emit_operand(rdx, src); // 2, src 2305 } 2306 2307 void Assembler::prefetcht2(Address src) { 2308 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2309 InstructionMark im(this); 2310 prefetch_prefix(src); 2311 emit_byte(0x18); 2312 emit_operand(rbx, src); // 3, src 2313 } 2314 2315 void Assembler::prefetchw(Address src) { 2316 NOT_LP64(assert(VM_Version::supports_3dnow(), "must support")); 2317 InstructionMark im(this); 2318 prefetch_prefix(src); 2319 emit_byte(0x0D); 2320 emit_operand(rcx, src); // 1, src 2321 } 2322 2323 void Assembler::prefix(Prefix p) { 2324 a_byte(p); 2325 } 2326 2327 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 2328 assert(isByte(mode), "invalid value"); 2329 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2330 2331 emit_byte(0x66); 2332 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2333 emit_byte(0x0F); 2334 emit_byte(0x70); 2335 emit_byte(0xC0 | encode); 2336 emit_byte(mode & 0xFF); 2337 2338 } 2339 2340 void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 2341 assert(isByte(mode), "invalid value"); 2342 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2343 2344 InstructionMark im(this); 2345 emit_byte(0x66); 2346 prefix(src, dst); 2347 emit_byte(0x0F); 2348 emit_byte(0x70); 2349 emit_operand(dst, src); 2350 emit_byte(mode & 0xFF); 2351 } 2352 2353 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 2354 assert(isByte(mode), "invalid value"); 2355 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2356 2357 emit_byte(0xF2); 2358 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2359 emit_byte(0x0F); 2360 emit_byte(0x70); 2361 emit_byte(0xC0 | encode); 2362 emit_byte(mode & 0xFF); 2363 } 2364 2365 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 2366 assert(isByte(mode), "invalid value"); 2367 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2368 2369 InstructionMark im(this); 2370 emit_byte(0xF2); 2371 prefix(src, dst); // QQ new 2372 emit_byte(0x0F); 2373 emit_byte(0x70); 2374 emit_operand(dst, src); 2375 emit_byte(mode & 0xFF); 2376 } 2377 2378 void Assembler::psrlq(XMMRegister dst, int shift) { 2379 // HMM Table D-1 says sse2 or mmx 2380 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2381 2382 int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding()); 2383 emit_byte(0x66); 2384 emit_byte(0x0F); 2385 emit_byte(0x73); 2386 emit_byte(0xC0 | encode); 2387 emit_byte(shift); 2388 } 2389 2390 void Assembler::ptest(XMMRegister dst, Address src) { 2391 assert(VM_Version::supports_sse4_1(), ""); 2392 2393 InstructionMark im(this); 2394 emit_byte(0x66); 2395 prefix(src, dst); 2396 emit_byte(0x0F); 2397 emit_byte(0x38); 2398 emit_byte(0x17); 2399 emit_operand(dst, src); 2400 } 2401 2402 void Assembler::ptest(XMMRegister dst, XMMRegister src) { 2403 assert(VM_Version::supports_sse4_1(), ""); 2404 2405 emit_byte(0x66); 2406 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 2407 emit_byte(0x0F); 2408 emit_byte(0x38); 2409 emit_byte(0x17); 2410 emit_byte(0xC0 | encode); 2411 } 2412 2413 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 2414 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2415 emit_byte(0x66); 2416 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2417 emit_byte(0x0F); 2418 emit_byte(0x60); 2419 emit_byte(0xC0 | encode); 2420 } 2421 2422 void Assembler::push(int32_t imm32) { 2423 // in 64bits we push 64bits onto the stack but only 2424 // take a 32bit immediate 2425 emit_byte(0x68); 2426 emit_long(imm32); 2427 } 2428 2429 void Assembler::push(Register src) { 2430 int encode = prefix_and_encode(src->encoding()); 2431 2432 emit_byte(0x50 | encode); 2433 } 2434 2435 void Assembler::pushf() { 2436 emit_byte(0x9C); 2437 } 2438 2439 #ifndef _LP64 // no 32bit push/pop on amd64 2440 void Assembler::pushl(Address src) { 2441 // Note this will push 64bit on 64bit 2442 InstructionMark im(this); 2443 prefix(src); 2444 emit_byte(0xFF); 2445 emit_operand(rsi, src); 2446 } 2447 #endif 2448 2449 void Assembler::pxor(XMMRegister dst, Address src) { 2450 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2451 InstructionMark im(this); 2452 emit_byte(0x66); 2453 prefix(src, dst); 2454 emit_byte(0x0F); 2455 emit_byte(0xEF); 2456 emit_operand(dst, src); 2457 } 2458 2459 void Assembler::pxor(XMMRegister dst, XMMRegister src) { 2460 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2461 InstructionMark im(this); 2462 emit_byte(0x66); 2463 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2464 emit_byte(0x0F); 2465 emit_byte(0xEF); 2466 emit_byte(0xC0 | encode); 2467 } 2468 2469 void Assembler::rcll(Register dst, int imm8) { 2470 assert(isShiftCount(imm8), "illegal shift count"); 2471 int encode = prefix_and_encode(dst->encoding()); 2472 if (imm8 == 1) { 2473 emit_byte(0xD1); 2474 emit_byte(0xD0 | encode); 2475 } else { 2476 emit_byte(0xC1); 2477 emit_byte(0xD0 | encode); 2478 emit_byte(imm8); 2479 } 2480 } 2481 2482 // copies data from [esi] to [edi] using rcx pointer sized words 2483 // generic 2484 void Assembler::rep_mov() { 2485 emit_byte(0xF3); 2486 // MOVSQ 2487 LP64_ONLY(prefix(REX_W)); 2488 emit_byte(0xA5); 2489 } 2490 2491 // sets rcx pointer sized words with rax, value at [edi] 2492 // generic 2493 void Assembler::rep_set() { // rep_set 2494 emit_byte(0xF3); 2495 // STOSQ 2496 LP64_ONLY(prefix(REX_W)); 2497 emit_byte(0xAB); 2498 } 2499 2500 // scans rcx pointer sized words at [edi] for occurance of rax, 2501 // generic 2502 void Assembler::repne_scan() { // repne_scan 2503 emit_byte(0xF2); 2504 // SCASQ 2505 LP64_ONLY(prefix(REX_W)); 2506 emit_byte(0xAF); 2507 } 2508 2509 #ifdef _LP64 2510 // scans rcx 4 byte words at [edi] for occurance of rax, 2511 // generic 2512 void Assembler::repne_scanl() { // repne_scan 2513 emit_byte(0xF2); 2514 // SCASL 2515 emit_byte(0xAF); 2516 } 2517 #endif 2518 2519 void Assembler::ret(int imm16) { 2520 if (imm16 == 0) { 2521 emit_byte(0xC3); 2522 } else { 2523 emit_byte(0xC2); 2524 emit_word(imm16); 2525 } 2526 } 2527 2528 void Assembler::sahf() { 2529 #ifdef _LP64 2530 // Not supported in 64bit mode 2531 ShouldNotReachHere(); 2532 #endif 2533 emit_byte(0x9E); 2534 } 2535 2536 void Assembler::sarl(Register dst, int imm8) { 2537 int encode = prefix_and_encode(dst->encoding()); 2538 assert(isShiftCount(imm8), "illegal shift count"); 2539 if (imm8 == 1) { 2540 emit_byte(0xD1); 2541 emit_byte(0xF8 | encode); 2542 } else { 2543 emit_byte(0xC1); 2544 emit_byte(0xF8 | encode); 2545 emit_byte(imm8); 2546 } 2547 } 2548 2549 void Assembler::sarl(Register dst) { 2550 int encode = prefix_and_encode(dst->encoding()); 2551 emit_byte(0xD3); 2552 emit_byte(0xF8 | encode); 2553 } 2554 2555 void Assembler::sbbl(Address dst, int32_t imm32) { 2556 InstructionMark im(this); 2557 prefix(dst); 2558 emit_arith_operand(0x81, rbx, dst, imm32); 2559 } 2560 2561 void Assembler::sbbl(Register dst, int32_t imm32) { 2562 prefix(dst); 2563 emit_arith(0x81, 0xD8, dst, imm32); 2564 } 2565 2566 2567 void Assembler::sbbl(Register dst, Address src) { 2568 InstructionMark im(this); 2569 prefix(src, dst); 2570 emit_byte(0x1B); 2571 emit_operand(dst, src); 2572 } 2573 2574 void Assembler::sbbl(Register dst, Register src) { 2575 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2576 emit_arith(0x1B, 0xC0, dst, src); 2577 } 2578 2579 void Assembler::setb(Condition cc, Register dst) { 2580 assert(0 <= cc && cc < 16, "illegal cc"); 2581 int encode = prefix_and_encode(dst->encoding(), true); 2582 emit_byte(0x0F); 2583 emit_byte(0x90 | cc); 2584 emit_byte(0xC0 | encode); 2585 } 2586 2587 void Assembler::shll(Register dst, int imm8) { 2588 assert(isShiftCount(imm8), "illegal shift count"); 2589 int encode = prefix_and_encode(dst->encoding()); 2590 if (imm8 == 1 ) { 2591 emit_byte(0xD1); 2592 emit_byte(0xE0 | encode); 2593 } else { 2594 emit_byte(0xC1); 2595 emit_byte(0xE0 | encode); 2596 emit_byte(imm8); 2597 } 2598 } 2599 2600 void Assembler::shll(Register dst) { 2601 int encode = prefix_and_encode(dst->encoding()); 2602 emit_byte(0xD3); 2603 emit_byte(0xE0 | encode); 2604 } 2605 2606 void Assembler::shrl(Register dst, int imm8) { 2607 assert(isShiftCount(imm8), "illegal shift count"); 2608 int encode = prefix_and_encode(dst->encoding()); 2609 emit_byte(0xC1); 2610 emit_byte(0xE8 | encode); 2611 emit_byte(imm8); 2612 } 2613 2614 void Assembler::shrl(Register dst) { 2615 int encode = prefix_and_encode(dst->encoding()); 2616 emit_byte(0xD3); 2617 emit_byte(0xE8 | encode); 2618 } 2619 2620 // copies a single word from [esi] to [edi] 2621 void Assembler::smovl() { 2622 emit_byte(0xA5); 2623 } 2624 2625 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { 2626 // HMM Table D-1 says sse2 2627 // NOT_LP64(assert(VM_Version::supports_sse(), "")); 2628 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2629 emit_byte(0xF2); 2630 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2631 emit_byte(0x0F); 2632 emit_byte(0x51); 2633 emit_byte(0xC0 | encode); 2634 } 2635 2636 void Assembler::stmxcsr( Address dst) { 2637 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2638 InstructionMark im(this); 2639 prefix(dst); 2640 emit_byte(0x0F); 2641 emit_byte(0xAE); 2642 emit_operand(as_Register(3), dst); 2643 } 2644 2645 void Assembler::subl(Address dst, int32_t imm32) { 2646 InstructionMark im(this); 2647 prefix(dst); 2648 if (is8bit(imm32)) { 2649 emit_byte(0x83); 2650 emit_operand(rbp, dst, 1); 2651 emit_byte(imm32 & 0xFF); 2652 } else { 2653 emit_byte(0x81); 2654 emit_operand(rbp, dst, 4); 2655 emit_long(imm32); 2656 } 2657 } 2658 2659 void Assembler::subl(Register dst, int32_t imm32) { 2660 prefix(dst); 2661 emit_arith(0x81, 0xE8, dst, imm32); 2662 } 2663 2664 void Assembler::subl(Address dst, Register src) { 2665 InstructionMark im(this); 2666 prefix(dst, src); 2667 emit_byte(0x29); 2668 emit_operand(src, dst); 2669 } 2670 2671 void Assembler::subl(Register dst, Address src) { 2672 InstructionMark im(this); 2673 prefix(src, dst); 2674 emit_byte(0x2B); 2675 emit_operand(dst, src); 2676 } 2677 2678 void Assembler::subl(Register dst, Register src) { 2679 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2680 emit_arith(0x2B, 0xC0, dst, src); 2681 } 2682 2683 void Assembler::subsd(XMMRegister dst, XMMRegister src) { 2684 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2685 emit_byte(0xF2); 2686 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2687 emit_byte(0x0F); 2688 emit_byte(0x5C); 2689 emit_byte(0xC0 | encode); 2690 } 2691 2692 void Assembler::subsd(XMMRegister dst, Address src) { 2693 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2694 InstructionMark im(this); 2695 emit_byte(0xF2); 2696 prefix(src, dst); 2697 emit_byte(0x0F); 2698 emit_byte(0x5C); 2699 emit_operand(dst, src); 2700 } 2701 2702 void Assembler::subss(XMMRegister dst, XMMRegister src) { 2703 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2704 emit_byte(0xF3); 2705 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2706 emit_byte(0x0F); 2707 emit_byte(0x5C); 2708 emit_byte(0xC0 | encode); 2709 } 2710 2711 void Assembler::subss(XMMRegister dst, Address src) { 2712 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2713 InstructionMark im(this); 2714 emit_byte(0xF3); 2715 prefix(src, dst); 2716 emit_byte(0x0F); 2717 emit_byte(0x5C); 2718 emit_operand(dst, src); 2719 } 2720 2721 void Assembler::testb(Register dst, int imm8) { 2722 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 2723 (void) prefix_and_encode(dst->encoding(), true); 2724 emit_arith_b(0xF6, 0xC0, dst, imm8); 2725 } 2726 2727 void Assembler::testl(Register dst, int32_t imm32) { 2728 // not using emit_arith because test 2729 // doesn't support sign-extension of 2730 // 8bit operands 2731 int encode = dst->encoding(); 2732 if (encode == 0) { 2733 emit_byte(0xA9); 2734 } else { 2735 encode = prefix_and_encode(encode); 2736 emit_byte(0xF7); 2737 emit_byte(0xC0 | encode); 2738 } 2739 emit_long(imm32); 2740 } 2741 2742 void Assembler::testl(Register dst, Register src) { 2743 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2744 emit_arith(0x85, 0xC0, dst, src); 2745 } 2746 2747 void Assembler::testl(Register dst, Address src) { 2748 InstructionMark im(this); 2749 prefix(src, dst); 2750 emit_byte(0x85); 2751 emit_operand(dst, src); 2752 } 2753 2754 void Assembler::ucomisd(XMMRegister dst, Address src) { 2755 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2756 emit_byte(0x66); 2757 ucomiss(dst, src); 2758 } 2759 2760 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { 2761 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2762 emit_byte(0x66); 2763 ucomiss(dst, src); 2764 } 2765 2766 void Assembler::ucomiss(XMMRegister dst, Address src) { 2767 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2768 2769 InstructionMark im(this); 2770 prefix(src, dst); 2771 emit_byte(0x0F); 2772 emit_byte(0x2E); 2773 emit_operand(dst, src); 2774 } 2775 2776 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { 2777 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2778 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2779 emit_byte(0x0F); 2780 emit_byte(0x2E); 2781 emit_byte(0xC0 | encode); 2782 } 2783 2784 2785 void Assembler::xaddl(Address dst, Register src) { 2786 InstructionMark im(this); 2787 prefix(dst, src); 2788 emit_byte(0x0F); 2789 emit_byte(0xC1); 2790 emit_operand(src, dst); 2791 } 2792 2793 void Assembler::xchgl(Register dst, Address src) { // xchg 2794 InstructionMark im(this); 2795 prefix(src, dst); 2796 emit_byte(0x87); 2797 emit_operand(dst, src); 2798 } 2799 2800 void Assembler::xchgl(Register dst, Register src) { 2801 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2802 emit_byte(0x87); 2803 emit_byte(0xc0 | encode); 2804 } 2805 2806 void Assembler::xorl(Register dst, int32_t imm32) { 2807 prefix(dst); 2808 emit_arith(0x81, 0xF0, dst, imm32); 2809 } 2810 2811 void Assembler::xorl(Register dst, Address src) { 2812 InstructionMark im(this); 2813 prefix(src, dst); 2814 emit_byte(0x33); 2815 emit_operand(dst, src); 2816 } 2817 2818 void Assembler::xorl(Register dst, Register src) { 2819 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2820 emit_arith(0x33, 0xC0, dst, src); 2821 } 2822 2823 void Assembler::xorpd(XMMRegister dst, XMMRegister src) { 2824 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2825 emit_byte(0x66); 2826 xorps(dst, src); 2827 } 2828 2829 void Assembler::xorpd(XMMRegister dst, Address src) { 2830 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2831 InstructionMark im(this); 2832 emit_byte(0x66); 2833 prefix(src, dst); 2834 emit_byte(0x0F); 2835 emit_byte(0x57); 2836 emit_operand(dst, src); 2837 } 2838 2839 2840 void Assembler::xorps(XMMRegister dst, XMMRegister src) { 2841 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2842 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2843 emit_byte(0x0F); 2844 emit_byte(0x57); 2845 emit_byte(0xC0 | encode); 2846 } 2847 2848 void Assembler::xorps(XMMRegister dst, Address src) { 2849 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2850 InstructionMark im(this); 2851 prefix(src, dst); 2852 emit_byte(0x0F); 2853 emit_byte(0x57); 2854 emit_operand(dst, src); 2855 } 2856 2857 #ifndef _LP64 2858 // 32bit only pieces of the assembler 2859 2860 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { 2861 // NO PREFIX AS NEVER 64BIT 2862 InstructionMark im(this); 2863 emit_byte(0x81); 2864 emit_byte(0xF8 | src1->encoding()); 2865 emit_data(imm32, rspec, 0); 2866 } 2867 2868 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { 2869 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs 2870 InstructionMark im(this); 2871 emit_byte(0x81); 2872 emit_operand(rdi, src1); 2873 emit_data(imm32, rspec, 0); 2874 } 2875 2876 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax, 2877 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded 2878 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. 2879 void Assembler::cmpxchg8(Address adr) { 2880 InstructionMark im(this); 2881 emit_byte(0x0F); 2882 emit_byte(0xc7); 2883 emit_operand(rcx, adr); 2884 } 2885 2886 void Assembler::decl(Register dst) { 2887 // Don't use it directly. Use MacroAssembler::decrementl() instead. 2888 emit_byte(0x48 | dst->encoding()); 2889 } 2890 2891 #endif // _LP64 2892 2893 // 64bit typically doesn't use the x87 but needs to for the trig funcs 2894 2895 void Assembler::fabs() { 2896 emit_byte(0xD9); 2897 emit_byte(0xE1); 2898 } 2899 2900 void Assembler::fadd(int i) { 2901 emit_farith(0xD8, 0xC0, i); 2902 } 2903 2904 void Assembler::fadd_d(Address src) { 2905 InstructionMark im(this); 2906 emit_byte(0xDC); 2907 emit_operand32(rax, src); 2908 } 2909 2910 void Assembler::fadd_s(Address src) { 2911 InstructionMark im(this); 2912 emit_byte(0xD8); 2913 emit_operand32(rax, src); 2914 } 2915 2916 void Assembler::fadda(int i) { 2917 emit_farith(0xDC, 0xC0, i); 2918 } 2919 2920 void Assembler::faddp(int i) { 2921 emit_farith(0xDE, 0xC0, i); 2922 } 2923 2924 void Assembler::fchs() { 2925 emit_byte(0xD9); 2926 emit_byte(0xE0); 2927 } 2928 2929 void Assembler::fcom(int i) { 2930 emit_farith(0xD8, 0xD0, i); 2931 } 2932 2933 void Assembler::fcomp(int i) { 2934 emit_farith(0xD8, 0xD8, i); 2935 } 2936 2937 void Assembler::fcomp_d(Address src) { 2938 InstructionMark im(this); 2939 emit_byte(0xDC); 2940 emit_operand32(rbx, src); 2941 } 2942 2943 void Assembler::fcomp_s(Address src) { 2944 InstructionMark im(this); 2945 emit_byte(0xD8); 2946 emit_operand32(rbx, src); 2947 } 2948 2949 void Assembler::fcompp() { 2950 emit_byte(0xDE); 2951 emit_byte(0xD9); 2952 } 2953 2954 void Assembler::fcos() { 2955 emit_byte(0xD9); 2956 emit_byte(0xFF); 2957 } 2958 2959 void Assembler::fdecstp() { 2960 emit_byte(0xD9); 2961 emit_byte(0xF6); 2962 } 2963 2964 void Assembler::fdiv(int i) { 2965 emit_farith(0xD8, 0xF0, i); 2966 } 2967 2968 void Assembler::fdiv_d(Address src) { 2969 InstructionMark im(this); 2970 emit_byte(0xDC); 2971 emit_operand32(rsi, src); 2972 } 2973 2974 void Assembler::fdiv_s(Address src) { 2975 InstructionMark im(this); 2976 emit_byte(0xD8); 2977 emit_operand32(rsi, src); 2978 } 2979 2980 void Assembler::fdiva(int i) { 2981 emit_farith(0xDC, 0xF8, i); 2982 } 2983 2984 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) 2985 // is erroneous for some of the floating-point instructions below. 2986 2987 void Assembler::fdivp(int i) { 2988 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) 2989 } 2990 2991 void Assembler::fdivr(int i) { 2992 emit_farith(0xD8, 0xF8, i); 2993 } 2994 2995 void Assembler::fdivr_d(Address src) { 2996 InstructionMark im(this); 2997 emit_byte(0xDC); 2998 emit_operand32(rdi, src); 2999 } 3000 3001 void Assembler::fdivr_s(Address src) { 3002 InstructionMark im(this); 3003 emit_byte(0xD8); 3004 emit_operand32(rdi, src); 3005 } 3006 3007 void Assembler::fdivra(int i) { 3008 emit_farith(0xDC, 0xF0, i); 3009 } 3010 3011 void Assembler::fdivrp(int i) { 3012 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) 3013 } 3014 3015 void Assembler::ffree(int i) { 3016 emit_farith(0xDD, 0xC0, i); 3017 } 3018 3019 void Assembler::fild_d(Address adr) { 3020 InstructionMark im(this); 3021 emit_byte(0xDF); 3022 emit_operand32(rbp, adr); 3023 } 3024 3025 void Assembler::fild_s(Address adr) { 3026 InstructionMark im(this); 3027 emit_byte(0xDB); 3028 emit_operand32(rax, adr); 3029 } 3030 3031 void Assembler::fincstp() { 3032 emit_byte(0xD9); 3033 emit_byte(0xF7); 3034 } 3035 3036 void Assembler::finit() { 3037 emit_byte(0x9B); 3038 emit_byte(0xDB); 3039 emit_byte(0xE3); 3040 } 3041 3042 void Assembler::fist_s(Address adr) { 3043 InstructionMark im(this); 3044 emit_byte(0xDB); 3045 emit_operand32(rdx, adr); 3046 } 3047 3048 void Assembler::fistp_d(Address adr) { 3049 InstructionMark im(this); 3050 emit_byte(0xDF); 3051 emit_operand32(rdi, adr); 3052 } 3053 3054 void Assembler::fistp_s(Address adr) { 3055 InstructionMark im(this); 3056 emit_byte(0xDB); 3057 emit_operand32(rbx, adr); 3058 } 3059 3060 void Assembler::fld1() { 3061 emit_byte(0xD9); 3062 emit_byte(0xE8); 3063 } 3064 3065 void Assembler::fld_d(Address adr) { 3066 InstructionMark im(this); 3067 emit_byte(0xDD); 3068 emit_operand32(rax, adr); 3069 } 3070 3071 void Assembler::fld_s(Address adr) { 3072 InstructionMark im(this); 3073 emit_byte(0xD9); 3074 emit_operand32(rax, adr); 3075 } 3076 3077 3078 void Assembler::fld_s(int index) { 3079 emit_farith(0xD9, 0xC0, index); 3080 } 3081 3082 void Assembler::fld_x(Address adr) { 3083 InstructionMark im(this); 3084 emit_byte(0xDB); 3085 emit_operand32(rbp, adr); 3086 } 3087 3088 void Assembler::fldcw(Address src) { 3089 InstructionMark im(this); 3090 emit_byte(0xd9); 3091 emit_operand32(rbp, src); 3092 } 3093 3094 void Assembler::fldenv(Address src) { 3095 InstructionMark im(this); 3096 emit_byte(0xD9); 3097 emit_operand32(rsp, src); 3098 } 3099 3100 void Assembler::fldlg2() { 3101 emit_byte(0xD9); 3102 emit_byte(0xEC); 3103 } 3104 3105 void Assembler::fldln2() { 3106 emit_byte(0xD9); 3107 emit_byte(0xED); 3108 } 3109 3110 void Assembler::fldz() { 3111 emit_byte(0xD9); 3112 emit_byte(0xEE); 3113 } 3114 3115 void Assembler::flog() { 3116 fldln2(); 3117 fxch(); 3118 fyl2x(); 3119 } 3120 3121 void Assembler::flog10() { 3122 fldlg2(); 3123 fxch(); 3124 fyl2x(); 3125 } 3126 3127 void Assembler::fmul(int i) { 3128 emit_farith(0xD8, 0xC8, i); 3129 } 3130 3131 void Assembler::fmul_d(Address src) { 3132 InstructionMark im(this); 3133 emit_byte(0xDC); 3134 emit_operand32(rcx, src); 3135 } 3136 3137 void Assembler::fmul_s(Address src) { 3138 InstructionMark im(this); 3139 emit_byte(0xD8); 3140 emit_operand32(rcx, src); 3141 } 3142 3143 void Assembler::fmula(int i) { 3144 emit_farith(0xDC, 0xC8, i); 3145 } 3146 3147 void Assembler::fmulp(int i) { 3148 emit_farith(0xDE, 0xC8, i); 3149 } 3150 3151 void Assembler::fnsave(Address dst) { 3152 InstructionMark im(this); 3153 emit_byte(0xDD); 3154 emit_operand32(rsi, dst); 3155 } 3156 3157 void Assembler::fnstcw(Address src) { 3158 InstructionMark im(this); 3159 emit_byte(0x9B); 3160 emit_byte(0xD9); 3161 emit_operand32(rdi, src); 3162 } 3163 3164 void Assembler::fnstsw_ax() { 3165 emit_byte(0xdF); 3166 emit_byte(0xE0); 3167 } 3168 3169 void Assembler::fprem() { 3170 emit_byte(0xD9); 3171 emit_byte(0xF8); 3172 } 3173 3174 void Assembler::fprem1() { 3175 emit_byte(0xD9); 3176 emit_byte(0xF5); 3177 } 3178 3179 void Assembler::frstor(Address src) { 3180 InstructionMark im(this); 3181 emit_byte(0xDD); 3182 emit_operand32(rsp, src); 3183 } 3184 3185 void Assembler::fsin() { 3186 emit_byte(0xD9); 3187 emit_byte(0xFE); 3188 } 3189 3190 void Assembler::fsqrt() { 3191 emit_byte(0xD9); 3192 emit_byte(0xFA); 3193 } 3194 3195 void Assembler::fst_d(Address adr) { 3196 InstructionMark im(this); 3197 emit_byte(0xDD); 3198 emit_operand32(rdx, adr); 3199 } 3200 3201 void Assembler::fst_s(Address adr) { 3202 InstructionMark im(this); 3203 emit_byte(0xD9); 3204 emit_operand32(rdx, adr); 3205 } 3206 3207 void Assembler::fstp_d(Address adr) { 3208 InstructionMark im(this); 3209 emit_byte(0xDD); 3210 emit_operand32(rbx, adr); 3211 } 3212 3213 void Assembler::fstp_d(int index) { 3214 emit_farith(0xDD, 0xD8, index); 3215 } 3216 3217 void Assembler::fstp_s(Address adr) { 3218 InstructionMark im(this); 3219 emit_byte(0xD9); 3220 emit_operand32(rbx, adr); 3221 } 3222 3223 void Assembler::fstp_x(Address adr) { 3224 InstructionMark im(this); 3225 emit_byte(0xDB); 3226 emit_operand32(rdi, adr); 3227 } 3228 3229 void Assembler::fsub(int i) { 3230 emit_farith(0xD8, 0xE0, i); 3231 } 3232 3233 void Assembler::fsub_d(Address src) { 3234 InstructionMark im(this); 3235 emit_byte(0xDC); 3236 emit_operand32(rsp, src); 3237 } 3238 3239 void Assembler::fsub_s(Address src) { 3240 InstructionMark im(this); 3241 emit_byte(0xD8); 3242 emit_operand32(rsp, src); 3243 } 3244 3245 void Assembler::fsuba(int i) { 3246 emit_farith(0xDC, 0xE8, i); 3247 } 3248 3249 void Assembler::fsubp(int i) { 3250 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) 3251 } 3252 3253 void Assembler::fsubr(int i) { 3254 emit_farith(0xD8, 0xE8, i); 3255 } 3256 3257 void Assembler::fsubr_d(Address src) { 3258 InstructionMark im(this); 3259 emit_byte(0xDC); 3260 emit_operand32(rbp, src); 3261 } 3262 3263 void Assembler::fsubr_s(Address src) { 3264 InstructionMark im(this); 3265 emit_byte(0xD8); 3266 emit_operand32(rbp, src); 3267 } 3268 3269 void Assembler::fsubra(int i) { 3270 emit_farith(0xDC, 0xE0, i); 3271 } 3272 3273 void Assembler::fsubrp(int i) { 3274 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) 3275 } 3276 3277 void Assembler::ftan() { 3278 emit_byte(0xD9); 3279 emit_byte(0xF2); 3280 emit_byte(0xDD); 3281 emit_byte(0xD8); 3282 } 3283 3284 void Assembler::ftst() { 3285 emit_byte(0xD9); 3286 emit_byte(0xE4); 3287 } 3288 3289 void Assembler::fucomi(int i) { 3290 // make sure the instruction is supported (introduced for P6, together with cmov) 3291 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3292 emit_farith(0xDB, 0xE8, i); 3293 } 3294 3295 void Assembler::fucomip(int i) { 3296 // make sure the instruction is supported (introduced for P6, together with cmov) 3297 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3298 emit_farith(0xDF, 0xE8, i); 3299 } 3300 3301 void Assembler::fwait() { 3302 emit_byte(0x9B); 3303 } 3304 3305 void Assembler::fxch(int i) { 3306 emit_farith(0xD9, 0xC8, i); 3307 } 3308 3309 void Assembler::fyl2x() { 3310 emit_byte(0xD9); 3311 emit_byte(0xF1); 3312 } 3313 3314 3315 #ifndef _LP64 3316 3317 void Assembler::incl(Register dst) { 3318 // Don't use it directly. Use MacroAssembler::incrementl() instead. 3319 emit_byte(0x40 | dst->encoding()); 3320 } 3321 3322 void Assembler::lea(Register dst, Address src) { 3323 leal(dst, src); 3324 } 3325 3326 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { 3327 InstructionMark im(this); 3328 emit_byte(0xC7); 3329 emit_operand(rax, dst); 3330 emit_data((int)imm32, rspec, 0); 3331 } 3332 3333 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { 3334 InstructionMark im(this); 3335 int encode = prefix_and_encode(dst->encoding()); 3336 emit_byte(0xB8 | encode); 3337 emit_data((int)imm32, rspec, 0); 3338 } 3339 3340 void Assembler::popa() { // 32bit 3341 emit_byte(0x61); 3342 } 3343 3344 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { 3345 InstructionMark im(this); 3346 emit_byte(0x68); 3347 emit_data(imm32, rspec, 0); 3348 } 3349 3350 void Assembler::pusha() { // 32bit 3351 emit_byte(0x60); 3352 } 3353 3354 void Assembler::set_byte_if_not_zero(Register dst) { 3355 emit_byte(0x0F); 3356 emit_byte(0x95); 3357 emit_byte(0xE0 | dst->encoding()); 3358 } 3359 3360 void Assembler::shldl(Register dst, Register src) { 3361 emit_byte(0x0F); 3362 emit_byte(0xA5); 3363 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3364 } 3365 3366 void Assembler::shrdl(Register dst, Register src) { 3367 emit_byte(0x0F); 3368 emit_byte(0xAD); 3369 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3370 } 3371 3372 #else // LP64 3373 3374 void Assembler::set_byte_if_not_zero(Register dst) { 3375 int enc = prefix_and_encode(dst->encoding(), true); 3376 emit_byte(0x0F); 3377 emit_byte(0x95); 3378 emit_byte(0xE0 | enc); 3379 } 3380 3381 // 64bit only pieces of the assembler 3382 // This should only be used by 64bit instructions that can use rip-relative 3383 // it cannot be used by instructions that want an immediate value. 3384 3385 bool Assembler::reachable(AddressLiteral adr) { 3386 int64_t disp; 3387 // None will force a 64bit literal to the code stream. Likely a placeholder 3388 // for something that will be patched later and we need to certain it will 3389 // always be reachable. 3390 if (adr.reloc() == relocInfo::none) { 3391 return false; 3392 } 3393 if (adr.reloc() == relocInfo::internal_word_type) { 3394 // This should be rip relative and easily reachable. 3395 return true; 3396 } 3397 if (adr.reloc() == relocInfo::virtual_call_type || 3398 adr.reloc() == relocInfo::opt_virtual_call_type || 3399 adr.reloc() == relocInfo::static_call_type || 3400 adr.reloc() == relocInfo::static_stub_type ) { 3401 // This should be rip relative within the code cache and easily 3402 // reachable until we get huge code caches. (At which point 3403 // ic code is going to have issues). 3404 return true; 3405 } 3406 if (adr.reloc() != relocInfo::external_word_type && 3407 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special 3408 adr.reloc() != relocInfo::poll_type && // relocs to identify them 3409 adr.reloc() != relocInfo::runtime_call_type ) { 3410 return false; 3411 } 3412 3413 // Stress the correction code 3414 if (ForceUnreachable) { 3415 // Must be runtimecall reloc, see if it is in the codecache 3416 // Flipping stuff in the codecache to be unreachable causes issues 3417 // with things like inline caches where the additional instructions 3418 // are not handled. 3419 if (CodeCache::find_blob(adr._target) == NULL) { 3420 return false; 3421 } 3422 } 3423 // For external_word_type/runtime_call_type if it is reachable from where we 3424 // are now (possibly a temp buffer) and where we might end up 3425 // anywhere in the codeCache then we are always reachable. 3426 // This would have to change if we ever save/restore shared code 3427 // to be more pessimistic. 3428 3429 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int)); 3430 if (!is_simm32(disp)) return false; 3431 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int)); 3432 if (!is_simm32(disp)) return false; 3433 3434 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int)); 3435 3436 // Because rip relative is a disp + address_of_next_instruction and we 3437 // don't know the value of address_of_next_instruction we apply a fudge factor 3438 // to make sure we will be ok no matter the size of the instruction we get placed into. 3439 // We don't have to fudge the checks above here because they are already worst case. 3440 3441 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal 3442 // + 4 because better safe than sorry. 3443 const int fudge = 12 + 4; 3444 if (disp < 0) { 3445 disp -= fudge; 3446 } else { 3447 disp += fudge; 3448 } 3449 return is_simm32(disp); 3450 } 3451 3452 void Assembler::emit_data64(jlong data, 3453 relocInfo::relocType rtype, 3454 int format) { 3455 if (rtype == relocInfo::none) { 3456 emit_long64(data); 3457 } else { 3458 emit_data64(data, Relocation::spec_simple(rtype), format); 3459 } 3460 } 3461 3462 void Assembler::emit_data64(jlong data, 3463 RelocationHolder const& rspec, 3464 int format) { 3465 assert(imm_operand == 0, "default format must be immediate in this file"); 3466 assert(imm_operand == format, "must be immediate"); 3467 assert(inst_mark() != NULL, "must be inside InstructionMark"); 3468 // Do not use AbstractAssembler::relocate, which is not intended for 3469 // embedded words. Instead, relocate to the enclosing instruction. 3470 code_section()->relocate(inst_mark(), rspec, format); 3471 #ifdef ASSERT 3472 check_relocation(rspec, format); 3473 #endif 3474 emit_long64(data); 3475 } 3476 3477 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { 3478 if (reg_enc >= 8) { 3479 prefix(REX_B); 3480 reg_enc -= 8; 3481 } else if (byteinst && reg_enc >= 4) { 3482 prefix(REX); 3483 } 3484 return reg_enc; 3485 } 3486 3487 int Assembler::prefixq_and_encode(int reg_enc) { 3488 if (reg_enc < 8) { 3489 prefix(REX_W); 3490 } else { 3491 prefix(REX_WB); 3492 reg_enc -= 8; 3493 } 3494 return reg_enc; 3495 } 3496 3497 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { 3498 if (dst_enc < 8) { 3499 if (src_enc >= 8) { 3500 prefix(REX_B); 3501 src_enc -= 8; 3502 } else if (byteinst && src_enc >= 4) { 3503 prefix(REX); 3504 } 3505 } else { 3506 if (src_enc < 8) { 3507 prefix(REX_R); 3508 } else { 3509 prefix(REX_RB); 3510 src_enc -= 8; 3511 } 3512 dst_enc -= 8; 3513 } 3514 return dst_enc << 3 | src_enc; 3515 } 3516 3517 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { 3518 if (dst_enc < 8) { 3519 if (src_enc < 8) { 3520 prefix(REX_W); 3521 } else { 3522 prefix(REX_WB); 3523 src_enc -= 8; 3524 } 3525 } else { 3526 if (src_enc < 8) { 3527 prefix(REX_WR); 3528 } else { 3529 prefix(REX_WRB); 3530 src_enc -= 8; 3531 } 3532 dst_enc -= 8; 3533 } 3534 return dst_enc << 3 | src_enc; 3535 } 3536 3537 void Assembler::prefix(Register reg) { 3538 if (reg->encoding() >= 8) { 3539 prefix(REX_B); 3540 } 3541 } 3542 3543 void Assembler::prefix(Address adr) { 3544 if (adr.base_needs_rex()) { 3545 if (adr.index_needs_rex()) { 3546 prefix(REX_XB); 3547 } else { 3548 prefix(REX_B); 3549 } 3550 } else { 3551 if (adr.index_needs_rex()) { 3552 prefix(REX_X); 3553 } 3554 } 3555 } 3556 3557 void Assembler::prefixq(Address adr) { 3558 if (adr.base_needs_rex()) { 3559 if (adr.index_needs_rex()) { 3560 prefix(REX_WXB); 3561 } else { 3562 prefix(REX_WB); 3563 } 3564 } else { 3565 if (adr.index_needs_rex()) { 3566 prefix(REX_WX); 3567 } else { 3568 prefix(REX_W); 3569 } 3570 } 3571 } 3572 3573 3574 void Assembler::prefix(Address adr, Register reg, bool byteinst) { 3575 if (reg->encoding() < 8) { 3576 if (adr.base_needs_rex()) { 3577 if (adr.index_needs_rex()) { 3578 prefix(REX_XB); 3579 } else { 3580 prefix(REX_B); 3581 } 3582 } else { 3583 if (adr.index_needs_rex()) { 3584 prefix(REX_X); 3585 } else if (reg->encoding() >= 4 ) { 3586 prefix(REX); 3587 } 3588 } 3589 } else { 3590 if (adr.base_needs_rex()) { 3591 if (adr.index_needs_rex()) { 3592 prefix(REX_RXB); 3593 } else { 3594 prefix(REX_RB); 3595 } 3596 } else { 3597 if (adr.index_needs_rex()) { 3598 prefix(REX_RX); 3599 } else { 3600 prefix(REX_R); 3601 } 3602 } 3603 } 3604 } 3605 3606 void Assembler::prefixq(Address adr, Register src) { 3607 if (src->encoding() < 8) { 3608 if (adr.base_needs_rex()) { 3609 if (adr.index_needs_rex()) { 3610 prefix(REX_WXB); 3611 } else { 3612 prefix(REX_WB); 3613 } 3614 } else { 3615 if (adr.index_needs_rex()) { 3616 prefix(REX_WX); 3617 } else { 3618 prefix(REX_W); 3619 } 3620 } 3621 } else { 3622 if (adr.base_needs_rex()) { 3623 if (adr.index_needs_rex()) { 3624 prefix(REX_WRXB); 3625 } else { 3626 prefix(REX_WRB); 3627 } 3628 } else { 3629 if (adr.index_needs_rex()) { 3630 prefix(REX_WRX); 3631 } else { 3632 prefix(REX_WR); 3633 } 3634 } 3635 } 3636 } 3637 3638 void Assembler::prefix(Address adr, XMMRegister reg) { 3639 if (reg->encoding() < 8) { 3640 if (adr.base_needs_rex()) { 3641 if (adr.index_needs_rex()) { 3642 prefix(REX_XB); 3643 } else { 3644 prefix(REX_B); 3645 } 3646 } else { 3647 if (adr.index_needs_rex()) { 3648 prefix(REX_X); 3649 } 3650 } 3651 } else { 3652 if (adr.base_needs_rex()) { 3653 if (adr.index_needs_rex()) { 3654 prefix(REX_RXB); 3655 } else { 3656 prefix(REX_RB); 3657 } 3658 } else { 3659 if (adr.index_needs_rex()) { 3660 prefix(REX_RX); 3661 } else { 3662 prefix(REX_R); 3663 } 3664 } 3665 } 3666 } 3667 3668 void Assembler::adcq(Register dst, int32_t imm32) { 3669 (void) prefixq_and_encode(dst->encoding()); 3670 emit_arith(0x81, 0xD0, dst, imm32); 3671 } 3672 3673 void Assembler::adcq(Register dst, Address src) { 3674 InstructionMark im(this); 3675 prefixq(src, dst); 3676 emit_byte(0x13); 3677 emit_operand(dst, src); 3678 } 3679 3680 void Assembler::adcq(Register dst, Register src) { 3681 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 3682 emit_arith(0x13, 0xC0, dst, src); 3683 } 3684 3685 void Assembler::addq(Address dst, int32_t imm32) { 3686 InstructionMark im(this); 3687 prefixq(dst); 3688 emit_arith_operand(0x81, rax, dst,imm32); 3689 } 3690 3691 void Assembler::addq(Address dst, Register src) { 3692 InstructionMark im(this); 3693 prefixq(dst, src); 3694 emit_byte(0x01); 3695 emit_operand(src, dst); 3696 } 3697 3698 void Assembler::addq(Register dst, int32_t imm32) { 3699 (void) prefixq_and_encode(dst->encoding()); 3700 emit_arith(0x81, 0xC0, dst, imm32); 3701 } 3702 3703 void Assembler::addq(Register dst, Address src) { 3704 InstructionMark im(this); 3705 prefixq(src, dst); 3706 emit_byte(0x03); 3707 emit_operand(dst, src); 3708 } 3709 3710 void Assembler::addq(Register dst, Register src) { 3711 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 3712 emit_arith(0x03, 0xC0, dst, src); 3713 } 3714 3715 void Assembler::andq(Register dst, int32_t imm32) { 3716 (void) prefixq_and_encode(dst->encoding()); 3717 emit_arith(0x81, 0xE0, dst, imm32); 3718 } 3719 3720 void Assembler::andq(Register dst, Address src) { 3721 InstructionMark im(this); 3722 prefixq(src, dst); 3723 emit_byte(0x23); 3724 emit_operand(dst, src); 3725 } 3726 3727 void Assembler::andq(Register dst, Register src) { 3728 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 3729 emit_arith(0x23, 0xC0, dst, src); 3730 } 3731 3732 void Assembler::bsfq(Register dst, Register src) { 3733 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3734 emit_byte(0x0F); 3735 emit_byte(0xBC); 3736 emit_byte(0xC0 | encode); 3737 } 3738 3739 void Assembler::bsrq(Register dst, Register src) { 3740 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 3741 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3742 emit_byte(0x0F); 3743 emit_byte(0xBD); 3744 emit_byte(0xC0 | encode); 3745 } 3746 3747 void Assembler::bswapq(Register reg) { 3748 int encode = prefixq_and_encode(reg->encoding()); 3749 emit_byte(0x0F); 3750 emit_byte(0xC8 | encode); 3751 } 3752 3753 void Assembler::cdqq() { 3754 prefix(REX_W); 3755 emit_byte(0x99); 3756 } 3757 3758 void Assembler::clflush(Address adr) { 3759 prefix(adr); 3760 emit_byte(0x0F); 3761 emit_byte(0xAE); 3762 emit_operand(rdi, adr); 3763 } 3764 3765 void Assembler::cmovq(Condition cc, Register dst, Register src) { 3766 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3767 emit_byte(0x0F); 3768 emit_byte(0x40 | cc); 3769 emit_byte(0xC0 | encode); 3770 } 3771 3772 void Assembler::cmovq(Condition cc, Register dst, Address src) { 3773 InstructionMark im(this); 3774 prefixq(src, dst); 3775 emit_byte(0x0F); 3776 emit_byte(0x40 | cc); 3777 emit_operand(dst, src); 3778 } 3779 3780 void Assembler::cmpq(Address dst, int32_t imm32) { 3781 InstructionMark im(this); 3782 prefixq(dst); 3783 emit_byte(0x81); 3784 emit_operand(rdi, dst, 4); 3785 emit_long(imm32); 3786 } 3787 3788 void Assembler::cmpq(Register dst, int32_t imm32) { 3789 (void) prefixq_and_encode(dst->encoding()); 3790 emit_arith(0x81, 0xF8, dst, imm32); 3791 } 3792 3793 void Assembler::cmpq(Address dst, Register src) { 3794 InstructionMark im(this); 3795 prefixq(dst, src); 3796 emit_byte(0x3B); 3797 emit_operand(src, dst); 3798 } 3799 3800 void Assembler::cmpq(Register dst, Register src) { 3801 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 3802 emit_arith(0x3B, 0xC0, dst, src); 3803 } 3804 3805 void Assembler::cmpq(Register dst, Address src) { 3806 InstructionMark im(this); 3807 prefixq(src, dst); 3808 emit_byte(0x3B); 3809 emit_operand(dst, src); 3810 } 3811 3812 void Assembler::cmpxchgq(Register reg, Address adr) { 3813 InstructionMark im(this); 3814 prefixq(adr, reg); 3815 emit_byte(0x0F); 3816 emit_byte(0xB1); 3817 emit_operand(reg, adr); 3818 } 3819 3820 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { 3821 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3822 emit_byte(0xF2); 3823 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3824 emit_byte(0x0F); 3825 emit_byte(0x2A); 3826 emit_byte(0xC0 | encode); 3827 } 3828 3829 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { 3830 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3831 emit_byte(0xF3); 3832 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3833 emit_byte(0x0F); 3834 emit_byte(0x2A); 3835 emit_byte(0xC0 | encode); 3836 } 3837 3838 void Assembler::cvttsd2siq(Register dst, XMMRegister src) { 3839 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3840 emit_byte(0xF2); 3841 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3842 emit_byte(0x0F); 3843 emit_byte(0x2C); 3844 emit_byte(0xC0 | encode); 3845 } 3846 3847 void Assembler::cvttss2siq(Register dst, XMMRegister src) { 3848 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3849 emit_byte(0xF3); 3850 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3851 emit_byte(0x0F); 3852 emit_byte(0x2C); 3853 emit_byte(0xC0 | encode); 3854 } 3855 3856 void Assembler::decl(Register dst) { 3857 // Don't use it directly. Use MacroAssembler::decrementl() instead. 3858 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 3859 int encode = prefix_and_encode(dst->encoding()); 3860 emit_byte(0xFF); 3861 emit_byte(0xC8 | encode); 3862 } 3863 3864 void Assembler::decq(Register dst) { 3865 // Don't use it directly. Use MacroAssembler::decrementq() instead. 3866 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3867 int encode = prefixq_and_encode(dst->encoding()); 3868 emit_byte(0xFF); 3869 emit_byte(0xC8 | encode); 3870 } 3871 3872 void Assembler::decq(Address dst) { 3873 // Don't use it directly. Use MacroAssembler::decrementq() instead. 3874 InstructionMark im(this); 3875 prefixq(dst); 3876 emit_byte(0xFF); 3877 emit_operand(rcx, dst); 3878 } 3879 3880 void Assembler::fxrstor(Address src) { 3881 prefixq(src); 3882 emit_byte(0x0F); 3883 emit_byte(0xAE); 3884 emit_operand(as_Register(1), src); 3885 } 3886 3887 void Assembler::fxsave(Address dst) { 3888 prefixq(dst); 3889 emit_byte(0x0F); 3890 emit_byte(0xAE); 3891 emit_operand(as_Register(0), dst); 3892 } 3893 3894 void Assembler::idivq(Register src) { 3895 int encode = prefixq_and_encode(src->encoding()); 3896 emit_byte(0xF7); 3897 emit_byte(0xF8 | encode); 3898 } 3899 3900 void Assembler::imulq(Register dst, Register src) { 3901 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3902 emit_byte(0x0F); 3903 emit_byte(0xAF); 3904 emit_byte(0xC0 | encode); 3905 } 3906 3907 void Assembler::imulq(Register dst, Register src, int value) { 3908 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3909 if (is8bit(value)) { 3910 emit_byte(0x6B); 3911 emit_byte(0xC0 | encode); 3912 emit_byte(value & 0xFF); 3913 } else { 3914 emit_byte(0x69); 3915 emit_byte(0xC0 | encode); 3916 emit_long(value); 3917 } 3918 } 3919 3920 void Assembler::incl(Register dst) { 3921 // Don't use it directly. Use MacroAssembler::incrementl() instead. 3922 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3923 int encode = prefix_and_encode(dst->encoding()); 3924 emit_byte(0xFF); 3925 emit_byte(0xC0 | encode); 3926 } 3927 3928 void Assembler::incq(Register dst) { 3929 // Don't use it directly. Use MacroAssembler::incrementq() instead. 3930 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 3931 int encode = prefixq_and_encode(dst->encoding()); 3932 emit_byte(0xFF); 3933 emit_byte(0xC0 | encode); 3934 } 3935 3936 void Assembler::incq(Address dst) { 3937 // Don't use it directly. Use MacroAssembler::incrementq() instead. 3938 InstructionMark im(this); 3939 prefixq(dst); 3940 emit_byte(0xFF); 3941 emit_operand(rax, dst); 3942 } 3943 3944 void Assembler::lea(Register dst, Address src) { 3945 leaq(dst, src); 3946 } 3947 3948 void Assembler::leaq(Register dst, Address src) { 3949 InstructionMark im(this); 3950 prefixq(src, dst); 3951 emit_byte(0x8D); 3952 emit_operand(dst, src); 3953 } 3954 3955 void Assembler::mov64(Register dst, int64_t imm64) { 3956 InstructionMark im(this); 3957 int encode = prefixq_and_encode(dst->encoding()); 3958 emit_byte(0xB8 | encode); 3959 emit_long64(imm64); 3960 } 3961 3962 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { 3963 InstructionMark im(this); 3964 int encode = prefixq_and_encode(dst->encoding()); 3965 emit_byte(0xB8 | encode); 3966 emit_data64(imm64, rspec); 3967 } 3968 3969 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) { 3970 InstructionMark im(this); 3971 int encode = prefix_and_encode(dst->encoding()); 3972 emit_byte(0xB8 | encode); 3973 emit_data((int)imm32, rspec, narrow_oop_operand); 3974 } 3975 3976 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) { 3977 InstructionMark im(this); 3978 prefix(dst); 3979 emit_byte(0xC7); 3980 emit_operand(rax, dst, 4); 3981 emit_data((int)imm32, rspec, narrow_oop_operand); 3982 } 3983 3984 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) { 3985 InstructionMark im(this); 3986 int encode = prefix_and_encode(src1->encoding()); 3987 emit_byte(0x81); 3988 emit_byte(0xF8 | encode); 3989 emit_data((int)imm32, rspec, narrow_oop_operand); 3990 } 3991 3992 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) { 3993 InstructionMark im(this); 3994 prefix(src1); 3995 emit_byte(0x81); 3996 emit_operand(rax, src1, 4); 3997 emit_data((int)imm32, rspec, narrow_oop_operand); 3998 } 3999 4000 void Assembler::lzcntq(Register dst, Register src) { 4001 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 4002 emit_byte(0xF3); 4003 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4004 emit_byte(0x0F); 4005 emit_byte(0xBD); 4006 emit_byte(0xC0 | encode); 4007 } 4008 4009 void Assembler::movdq(XMMRegister dst, Register src) { 4010 // table D-1 says MMX/SSE2 4011 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); 4012 emit_byte(0x66); 4013 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4014 emit_byte(0x0F); 4015 emit_byte(0x6E); 4016 emit_byte(0xC0 | encode); 4017 } 4018 4019 void Assembler::movdq(Register dst, XMMRegister src) { 4020 // table D-1 says MMX/SSE2 4021 NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); 4022 emit_byte(0x66); 4023 // swap src/dst to get correct prefix 4024 int encode = prefixq_and_encode(src->encoding(), dst->encoding()); 4025 emit_byte(0x0F); 4026 emit_byte(0x7E); 4027 emit_byte(0xC0 | encode); 4028 } 4029 4030 void Assembler::movq(Register dst, Register src) { 4031 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4032 emit_byte(0x8B); 4033 emit_byte(0xC0 | encode); 4034 } 4035 4036 void Assembler::movq(Register dst, Address src) { 4037 InstructionMark im(this); 4038 prefixq(src, dst); 4039 emit_byte(0x8B); 4040 emit_operand(dst, src); 4041 } 4042 4043 void Assembler::movq(Address dst, Register src) { 4044 InstructionMark im(this); 4045 prefixq(dst, src); 4046 emit_byte(0x89); 4047 emit_operand(src, dst); 4048 } 4049 4050 void Assembler::movsbq(Register dst, Address src) { 4051 InstructionMark im(this); 4052 prefixq(src, dst); 4053 emit_byte(0x0F); 4054 emit_byte(0xBE); 4055 emit_operand(dst, src); 4056 } 4057 4058 void Assembler::movsbq(Register dst, Register src) { 4059 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4060 emit_byte(0x0F); 4061 emit_byte(0xBE); 4062 emit_byte(0xC0 | encode); 4063 } 4064 4065 void Assembler::movslq(Register dst, int32_t imm32) { 4066 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx) 4067 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx) 4068 // as a result we shouldn't use until tested at runtime... 4069 ShouldNotReachHere(); 4070 InstructionMark im(this); 4071 int encode = prefixq_and_encode(dst->encoding()); 4072 emit_byte(0xC7 | encode); 4073 emit_long(imm32); 4074 } 4075 4076 void Assembler::movslq(Address dst, int32_t imm32) { 4077 assert(is_simm32(imm32), "lost bits"); 4078 InstructionMark im(this); 4079 prefixq(dst); 4080 emit_byte(0xC7); 4081 emit_operand(rax, dst, 4); 4082 emit_long(imm32); 4083 } 4084 4085 void Assembler::movslq(Register dst, Address src) { 4086 InstructionMark im(this); 4087 prefixq(src, dst); 4088 emit_byte(0x63); 4089 emit_operand(dst, src); 4090 } 4091 4092 void Assembler::movslq(Register dst, Register src) { 4093 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4094 emit_byte(0x63); 4095 emit_byte(0xC0 | encode); 4096 } 4097 4098 void Assembler::movswq(Register dst, Address src) { 4099 InstructionMark im(this); 4100 prefixq(src, dst); 4101 emit_byte(0x0F); 4102 emit_byte(0xBF); 4103 emit_operand(dst, src); 4104 } 4105 4106 void Assembler::movswq(Register dst, Register src) { 4107 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4108 emit_byte(0x0F); 4109 emit_byte(0xBF); 4110 emit_byte(0xC0 | encode); 4111 } 4112 4113 void Assembler::movzbq(Register dst, Address src) { 4114 InstructionMark im(this); 4115 prefixq(src, dst); 4116 emit_byte(0x0F); 4117 emit_byte(0xB6); 4118 emit_operand(dst, src); 4119 } 4120 4121 void Assembler::movzbq(Register dst, Register src) { 4122 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4123 emit_byte(0x0F); 4124 emit_byte(0xB6); 4125 emit_byte(0xC0 | encode); 4126 } 4127 4128 void Assembler::movzwq(Register dst, Address src) { 4129 InstructionMark im(this); 4130 prefixq(src, dst); 4131 emit_byte(0x0F); 4132 emit_byte(0xB7); 4133 emit_operand(dst, src); 4134 } 4135 4136 void Assembler::movzwq(Register dst, Register src) { 4137 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4138 emit_byte(0x0F); 4139 emit_byte(0xB7); 4140 emit_byte(0xC0 | encode); 4141 } 4142 4143 void Assembler::negq(Register dst) { 4144 int encode = prefixq_and_encode(dst->encoding()); 4145 emit_byte(0xF7); 4146 emit_byte(0xD8 | encode); 4147 } 4148 4149 void Assembler::notq(Register dst) { 4150 int encode = prefixq_and_encode(dst->encoding()); 4151 emit_byte(0xF7); 4152 emit_byte(0xD0 | encode); 4153 } 4154 4155 void Assembler::orq(Address dst, int32_t imm32) { 4156 InstructionMark im(this); 4157 prefixq(dst); 4158 emit_byte(0x81); 4159 emit_operand(rcx, dst, 4); 4160 emit_long(imm32); 4161 } 4162 4163 void Assembler::orq(Register dst, int32_t imm32) { 4164 (void) prefixq_and_encode(dst->encoding()); 4165 emit_arith(0x81, 0xC8, dst, imm32); 4166 } 4167 4168 void Assembler::orq(Register dst, Address src) { 4169 InstructionMark im(this); 4170 prefixq(src, dst); 4171 emit_byte(0x0B); 4172 emit_operand(dst, src); 4173 } 4174 4175 void Assembler::orq(Register dst, Register src) { 4176 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4177 emit_arith(0x0B, 0xC0, dst, src); 4178 } 4179 4180 void Assembler::popa() { // 64bit 4181 movq(r15, Address(rsp, 0)); 4182 movq(r14, Address(rsp, wordSize)); 4183 movq(r13, Address(rsp, 2 * wordSize)); 4184 movq(r12, Address(rsp, 3 * wordSize)); 4185 movq(r11, Address(rsp, 4 * wordSize)); 4186 movq(r10, Address(rsp, 5 * wordSize)); 4187 movq(r9, Address(rsp, 6 * wordSize)); 4188 movq(r8, Address(rsp, 7 * wordSize)); 4189 movq(rdi, Address(rsp, 8 * wordSize)); 4190 movq(rsi, Address(rsp, 9 * wordSize)); 4191 movq(rbp, Address(rsp, 10 * wordSize)); 4192 // skip rsp 4193 movq(rbx, Address(rsp, 12 * wordSize)); 4194 movq(rdx, Address(rsp, 13 * wordSize)); 4195 movq(rcx, Address(rsp, 14 * wordSize)); 4196 movq(rax, Address(rsp, 15 * wordSize)); 4197 4198 addq(rsp, 16 * wordSize); 4199 } 4200 4201 void Assembler::popcntq(Register dst, Address src) { 4202 assert(VM_Version::supports_popcnt(), "must support"); 4203 InstructionMark im(this); 4204 emit_byte(0xF3); 4205 prefixq(src, dst); 4206 emit_byte(0x0F); 4207 emit_byte(0xB8); 4208 emit_operand(dst, src); 4209 } 4210 4211 void Assembler::popcntq(Register dst, Register src) { 4212 assert(VM_Version::supports_popcnt(), "must support"); 4213 emit_byte(0xF3); 4214 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4215 emit_byte(0x0F); 4216 emit_byte(0xB8); 4217 emit_byte(0xC0 | encode); 4218 } 4219 4220 void Assembler::popq(Address dst) { 4221 InstructionMark im(this); 4222 prefixq(dst); 4223 emit_byte(0x8F); 4224 emit_operand(rax, dst); 4225 } 4226 4227 void Assembler::pusha() { // 64bit 4228 // we have to store original rsp. ABI says that 128 bytes 4229 // below rsp are local scratch. 4230 movq(Address(rsp, -5 * wordSize), rsp); 4231 4232 subq(rsp, 16 * wordSize); 4233 4234 movq(Address(rsp, 15 * wordSize), rax); 4235 movq(Address(rsp, 14 * wordSize), rcx); 4236 movq(Address(rsp, 13 * wordSize), rdx); 4237 movq(Address(rsp, 12 * wordSize), rbx); 4238 // skip rsp 4239 movq(Address(rsp, 10 * wordSize), rbp); 4240 movq(Address(rsp, 9 * wordSize), rsi); 4241 movq(Address(rsp, 8 * wordSize), rdi); 4242 movq(Address(rsp, 7 * wordSize), r8); 4243 movq(Address(rsp, 6 * wordSize), r9); 4244 movq(Address(rsp, 5 * wordSize), r10); 4245 movq(Address(rsp, 4 * wordSize), r11); 4246 movq(Address(rsp, 3 * wordSize), r12); 4247 movq(Address(rsp, 2 * wordSize), r13); 4248 movq(Address(rsp, wordSize), r14); 4249 movq(Address(rsp, 0), r15); 4250 } 4251 4252 void Assembler::pushq(Address src) { 4253 InstructionMark im(this); 4254 prefixq(src); 4255 emit_byte(0xFF); 4256 emit_operand(rsi, src); 4257 } 4258 4259 void Assembler::rclq(Register dst, int imm8) { 4260 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4261 int encode = prefixq_and_encode(dst->encoding()); 4262 if (imm8 == 1) { 4263 emit_byte(0xD1); 4264 emit_byte(0xD0 | encode); 4265 } else { 4266 emit_byte(0xC1); 4267 emit_byte(0xD0 | encode); 4268 emit_byte(imm8); 4269 } 4270 } 4271 void Assembler::sarq(Register dst, int imm8) { 4272 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4273 int encode = prefixq_and_encode(dst->encoding()); 4274 if (imm8 == 1) { 4275 emit_byte(0xD1); 4276 emit_byte(0xF8 | encode); 4277 } else { 4278 emit_byte(0xC1); 4279 emit_byte(0xF8 | encode); 4280 emit_byte(imm8); 4281 } 4282 } 4283 4284 void Assembler::sarq(Register dst) { 4285 int encode = prefixq_and_encode(dst->encoding()); 4286 emit_byte(0xD3); 4287 emit_byte(0xF8 | encode); 4288 } 4289 void Assembler::sbbq(Address dst, int32_t imm32) { 4290 InstructionMark im(this); 4291 prefixq(dst); 4292 emit_arith_operand(0x81, rbx, dst, imm32); 4293 } 4294 4295 void Assembler::sbbq(Register dst, int32_t imm32) { 4296 (void) prefixq_and_encode(dst->encoding()); 4297 emit_arith(0x81, 0xD8, dst, imm32); 4298 } 4299 4300 void Assembler::sbbq(Register dst, Address src) { 4301 InstructionMark im(this); 4302 prefixq(src, dst); 4303 emit_byte(0x1B); 4304 emit_operand(dst, src); 4305 } 4306 4307 void Assembler::sbbq(Register dst, Register src) { 4308 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4309 emit_arith(0x1B, 0xC0, dst, src); 4310 } 4311 4312 void Assembler::shlq(Register dst, int imm8) { 4313 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4314 int encode = prefixq_and_encode(dst->encoding()); 4315 if (imm8 == 1) { 4316 emit_byte(0xD1); 4317 emit_byte(0xE0 | encode); 4318 } else { 4319 emit_byte(0xC1); 4320 emit_byte(0xE0 | encode); 4321 emit_byte(imm8); 4322 } 4323 } 4324 4325 void Assembler::shlq(Register dst) { 4326 int encode = prefixq_and_encode(dst->encoding()); 4327 emit_byte(0xD3); 4328 emit_byte(0xE0 | encode); 4329 } 4330 4331 void Assembler::shrq(Register dst, int imm8) { 4332 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4333 int encode = prefixq_and_encode(dst->encoding()); 4334 emit_byte(0xC1); 4335 emit_byte(0xE8 | encode); 4336 emit_byte(imm8); 4337 } 4338 4339 void Assembler::shrq(Register dst) { 4340 int encode = prefixq_and_encode(dst->encoding()); 4341 emit_byte(0xD3); 4342 emit_byte(0xE8 | encode); 4343 } 4344 4345 void Assembler::sqrtsd(XMMRegister dst, Address src) { 4346 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4347 InstructionMark im(this); 4348 emit_byte(0xF2); 4349 prefix(src, dst); 4350 emit_byte(0x0F); 4351 emit_byte(0x51); 4352 emit_operand(dst, src); 4353 } 4354 4355 void Assembler::subq(Address dst, int32_t imm32) { 4356 InstructionMark im(this); 4357 prefixq(dst); 4358 if (is8bit(imm32)) { 4359 emit_byte(0x83); 4360 emit_operand(rbp, dst, 1); 4361 emit_byte(imm32 & 0xFF); 4362 } else { 4363 emit_byte(0x81); 4364 emit_operand(rbp, dst, 4); 4365 emit_long(imm32); 4366 } 4367 } 4368 4369 void Assembler::subq(Register dst, int32_t imm32) { 4370 (void) prefixq_and_encode(dst->encoding()); 4371 emit_arith(0x81, 0xE8, dst, imm32); 4372 } 4373 4374 void Assembler::subq(Address dst, Register src) { 4375 InstructionMark im(this); 4376 prefixq(dst, src); 4377 emit_byte(0x29); 4378 emit_operand(src, dst); 4379 } 4380 4381 void Assembler::subq(Register dst, Address src) { 4382 InstructionMark im(this); 4383 prefixq(src, dst); 4384 emit_byte(0x2B); 4385 emit_operand(dst, src); 4386 } 4387 4388 void Assembler::subq(Register dst, Register src) { 4389 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4390 emit_arith(0x2B, 0xC0, dst, src); 4391 } 4392 4393 void Assembler::testq(Register dst, int32_t imm32) { 4394 // not using emit_arith because test 4395 // doesn't support sign-extension of 4396 // 8bit operands 4397 int encode = dst->encoding(); 4398 if (encode == 0) { 4399 prefix(REX_W); 4400 emit_byte(0xA9); 4401 } else { 4402 encode = prefixq_and_encode(encode); 4403 emit_byte(0xF7); 4404 emit_byte(0xC0 | encode); 4405 } 4406 emit_long(imm32); 4407 } 4408 4409 void Assembler::testq(Register dst, Register src) { 4410 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4411 emit_arith(0x85, 0xC0, dst, src); 4412 } 4413 4414 void Assembler::xaddq(Address dst, Register src) { 4415 InstructionMark im(this); 4416 prefixq(dst, src); 4417 emit_byte(0x0F); 4418 emit_byte(0xC1); 4419 emit_operand(src, dst); 4420 } 4421 4422 void Assembler::xchgq(Register dst, Address src) { 4423 InstructionMark im(this); 4424 prefixq(src, dst); 4425 emit_byte(0x87); 4426 emit_operand(dst, src); 4427 } 4428 4429 void Assembler::xchgq(Register dst, Register src) { 4430 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4431 emit_byte(0x87); 4432 emit_byte(0xc0 | encode); 4433 } 4434 4435 void Assembler::xorq(Register dst, Register src) { 4436 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4437 emit_arith(0x33, 0xC0, dst, src); 4438 } 4439 4440 void Assembler::xorq(Register dst, Address src) { 4441 InstructionMark im(this); 4442 prefixq(src, dst); 4443 emit_byte(0x33); 4444 emit_operand(dst, src); 4445 } 4446 4447 #endif // !LP64 4448 4449 static Assembler::Condition reverse[] = { 4450 Assembler::noOverflow /* overflow = 0x0 */ , 4451 Assembler::overflow /* noOverflow = 0x1 */ , 4452 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 4453 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 4454 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 4455 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 4456 Assembler::above /* belowEqual = 0x6 */ , 4457 Assembler::belowEqual /* above = 0x7 */ , 4458 Assembler::positive /* negative = 0x8 */ , 4459 Assembler::negative /* positive = 0x9 */ , 4460 Assembler::noParity /* parity = 0xa */ , 4461 Assembler::parity /* noParity = 0xb */ , 4462 Assembler::greaterEqual /* less = 0xc */ , 4463 Assembler::less /* greaterEqual = 0xd */ , 4464 Assembler::greater /* lessEqual = 0xe */ , 4465 Assembler::lessEqual /* greater = 0xf, */ 4466 4467 }; 4468 4469 4470 // Implementation of MacroAssembler 4471 4472 // First all the versions that have distinct versions depending on 32/64 bit 4473 // Unless the difference is trivial (1 line or so). 4474 4475 #ifndef _LP64 4476 4477 // 32bit versions 4478 4479 Address MacroAssembler::as_Address(AddressLiteral adr) { 4480 return Address(adr.target(), adr.rspec()); 4481 } 4482 4483 Address MacroAssembler::as_Address(ArrayAddress adr) { 4484 return Address::make_array(adr); 4485 } 4486 4487 int MacroAssembler::biased_locking_enter(Register lock_reg, 4488 Register obj_reg, 4489 Register swap_reg, 4490 Register tmp_reg, 4491 bool swap_reg_contains_mark, 4492 Label& done, 4493 Label* slow_case, 4494 BiasedLockingCounters* counters) { 4495 assert(UseBiasedLocking, "why call this otherwise?"); 4496 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 4497 assert_different_registers(lock_reg, obj_reg, swap_reg); 4498 4499 if (PrintBiasedLockingStatistics && counters == NULL) 4500 counters = BiasedLocking::counters(); 4501 4502 bool need_tmp_reg = false; 4503 if (tmp_reg == noreg) { 4504 need_tmp_reg = true; 4505 tmp_reg = lock_reg; 4506 } else { 4507 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 4508 } 4509 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 4510 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 4511 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 4512 Address saved_mark_addr(lock_reg, 0); 4513 4514 // Biased locking 4515 // See whether the lock is currently biased toward our thread and 4516 // whether the epoch is still valid 4517 // Note that the runtime guarantees sufficient alignment of JavaThread 4518 // pointers to allow age to be placed into low bits 4519 // First check to see whether biasing is even enabled for this object 4520 Label cas_label; 4521 int null_check_offset = -1; 4522 if (!swap_reg_contains_mark) { 4523 null_check_offset = offset(); 4524 movl(swap_reg, mark_addr); 4525 } 4526 if (need_tmp_reg) { 4527 push(tmp_reg); 4528 } 4529 movl(tmp_reg, swap_reg); 4530 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 4531 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 4532 if (need_tmp_reg) { 4533 pop(tmp_reg); 4534 } 4535 jcc(Assembler::notEqual, cas_label); 4536 // The bias pattern is present in the object's header. Need to check 4537 // whether the bias owner and the epoch are both still current. 4538 // Note that because there is no current thread register on x86 we 4539 // need to store off the mark word we read out of the object to 4540 // avoid reloading it and needing to recheck invariants below. This 4541 // store is unfortunate but it makes the overall code shorter and 4542 // simpler. 4543 movl(saved_mark_addr, swap_reg); 4544 if (need_tmp_reg) { 4545 push(tmp_reg); 4546 } 4547 get_thread(tmp_reg); 4548 xorl(swap_reg, tmp_reg); 4549 if (swap_reg_contains_mark) { 4550 null_check_offset = offset(); 4551 } 4552 movl(tmp_reg, klass_addr); 4553 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4554 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 4555 if (need_tmp_reg) { 4556 pop(tmp_reg); 4557 } 4558 if (counters != NULL) { 4559 cond_inc32(Assembler::zero, 4560 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 4561 } 4562 jcc(Assembler::equal, done); 4563 4564 Label try_revoke_bias; 4565 Label try_rebias; 4566 4567 // At this point we know that the header has the bias pattern and 4568 // that we are not the bias owner in the current epoch. We need to 4569 // figure out more details about the state of the header in order to 4570 // know what operations can be legally performed on the object's 4571 // header. 4572 4573 // If the low three bits in the xor result aren't clear, that means 4574 // the prototype header is no longer biased and we have to revoke 4575 // the bias on this object. 4576 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 4577 jcc(Assembler::notZero, try_revoke_bias); 4578 4579 // Biasing is still enabled for this data type. See whether the 4580 // epoch of the current bias is still valid, meaning that the epoch 4581 // bits of the mark word are equal to the epoch bits of the 4582 // prototype header. (Note that the prototype header's epoch bits 4583 // only change at a safepoint.) If not, attempt to rebias the object 4584 // toward the current thread. Note that we must be absolutely sure 4585 // that the current epoch is invalid in order to do this because 4586 // otherwise the manipulations it performs on the mark word are 4587 // illegal. 4588 testl(swap_reg, markOopDesc::epoch_mask_in_place); 4589 jcc(Assembler::notZero, try_rebias); 4590 4591 // The epoch of the current bias is still valid but we know nothing 4592 // about the owner; it might be set or it might be clear. Try to 4593 // acquire the bias of the object using an atomic operation. If this 4594 // fails we will go in to the runtime to revoke the object's bias. 4595 // Note that we first construct the presumed unbiased header so we 4596 // don't accidentally blow away another thread's valid bias. 4597 movl(swap_reg, saved_mark_addr); 4598 andl(swap_reg, 4599 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 4600 if (need_tmp_reg) { 4601 push(tmp_reg); 4602 } 4603 get_thread(tmp_reg); 4604 orl(tmp_reg, swap_reg); 4605 if (os::is_MP()) { 4606 lock(); 4607 } 4608 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4609 if (need_tmp_reg) { 4610 pop(tmp_reg); 4611 } 4612 // If the biasing toward our thread failed, this means that 4613 // another thread succeeded in biasing it toward itself and we 4614 // need to revoke that bias. The revocation will occur in the 4615 // interpreter runtime in the slow case. 4616 if (counters != NULL) { 4617 cond_inc32(Assembler::zero, 4618 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 4619 } 4620 if (slow_case != NULL) { 4621 jcc(Assembler::notZero, *slow_case); 4622 } 4623 jmp(done); 4624 4625 bind(try_rebias); 4626 // At this point we know the epoch has expired, meaning that the 4627 // current "bias owner", if any, is actually invalid. Under these 4628 // circumstances _only_, we are allowed to use the current header's 4629 // value as the comparison value when doing the cas to acquire the 4630 // bias in the current epoch. In other words, we allow transfer of 4631 // the bias from one thread to another directly in this situation. 4632 // 4633 // FIXME: due to a lack of registers we currently blow away the age 4634 // bits in this situation. Should attempt to preserve them. 4635 if (need_tmp_reg) { 4636 push(tmp_reg); 4637 } 4638 get_thread(tmp_reg); 4639 movl(swap_reg, klass_addr); 4640 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4641 movl(swap_reg, saved_mark_addr); 4642 if (os::is_MP()) { 4643 lock(); 4644 } 4645 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4646 if (need_tmp_reg) { 4647 pop(tmp_reg); 4648 } 4649 // If the biasing toward our thread failed, then another thread 4650 // succeeded in biasing it toward itself and we need to revoke that 4651 // bias. The revocation will occur in the runtime in the slow case. 4652 if (counters != NULL) { 4653 cond_inc32(Assembler::zero, 4654 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 4655 } 4656 if (slow_case != NULL) { 4657 jcc(Assembler::notZero, *slow_case); 4658 } 4659 jmp(done); 4660 4661 bind(try_revoke_bias); 4662 // The prototype mark in the klass doesn't have the bias bit set any 4663 // more, indicating that objects of this data type are not supposed 4664 // to be biased any more. We are going to try to reset the mark of 4665 // this object to the prototype value and fall through to the 4666 // CAS-based locking scheme. Note that if our CAS fails, it means 4667 // that another thread raced us for the privilege of revoking the 4668 // bias of this particular object, so it's okay to continue in the 4669 // normal locking code. 4670 // 4671 // FIXME: due to a lack of registers we currently blow away the age 4672 // bits in this situation. Should attempt to preserve them. 4673 movl(swap_reg, saved_mark_addr); 4674 if (need_tmp_reg) { 4675 push(tmp_reg); 4676 } 4677 movl(tmp_reg, klass_addr); 4678 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 4679 if (os::is_MP()) { 4680 lock(); 4681 } 4682 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 4683 if (need_tmp_reg) { 4684 pop(tmp_reg); 4685 } 4686 // Fall through to the normal CAS-based lock, because no matter what 4687 // the result of the above CAS, some thread must have succeeded in 4688 // removing the bias bit from the object's header. 4689 if (counters != NULL) { 4690 cond_inc32(Assembler::zero, 4691 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 4692 } 4693 4694 bind(cas_label); 4695 4696 return null_check_offset; 4697 } 4698 void MacroAssembler::call_VM_leaf_base(address entry_point, 4699 int number_of_arguments) { 4700 call(RuntimeAddress(entry_point)); 4701 increment(rsp, number_of_arguments * wordSize); 4702 } 4703 4704 void MacroAssembler::cmpoop(Address src1, jobject obj) { 4705 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4706 } 4707 4708 void MacroAssembler::cmpoop(Register src1, jobject obj) { 4709 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4710 } 4711 4712 void MacroAssembler::extend_sign(Register hi, Register lo) { 4713 // According to Intel Doc. AP-526, "Integer Divide", p.18. 4714 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 4715 cdql(); 4716 } else { 4717 movl(hi, lo); 4718 sarl(hi, 31); 4719 } 4720 } 4721 4722 void MacroAssembler::fat_nop() { 4723 // A 5 byte nop that is safe for patching (see patch_verified_entry) 4724 emit_byte(0x26); // es: 4725 emit_byte(0x2e); // cs: 4726 emit_byte(0x64); // fs: 4727 emit_byte(0x65); // gs: 4728 emit_byte(0x90); 4729 } 4730 4731 void MacroAssembler::jC2(Register tmp, Label& L) { 4732 // set parity bit if FPU flag C2 is set (via rax) 4733 save_rax(tmp); 4734 fwait(); fnstsw_ax(); 4735 sahf(); 4736 restore_rax(tmp); 4737 // branch 4738 jcc(Assembler::parity, L); 4739 } 4740 4741 void MacroAssembler::jnC2(Register tmp, Label& L) { 4742 // set parity bit if FPU flag C2 is set (via rax) 4743 save_rax(tmp); 4744 fwait(); fnstsw_ax(); 4745 sahf(); 4746 restore_rax(tmp); 4747 // branch 4748 jcc(Assembler::noParity, L); 4749 } 4750 4751 // 32bit can do a case table jump in one instruction but we no longer allow the base 4752 // to be installed in the Address class 4753 void MacroAssembler::jump(ArrayAddress entry) { 4754 jmp(as_Address(entry)); 4755 } 4756 4757 // Note: y_lo will be destroyed 4758 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 4759 // Long compare for Java (semantics as described in JVM spec.) 4760 Label high, low, done; 4761 4762 cmpl(x_hi, y_hi); 4763 jcc(Assembler::less, low); 4764 jcc(Assembler::greater, high); 4765 // x_hi is the return register 4766 xorl(x_hi, x_hi); 4767 cmpl(x_lo, y_lo); 4768 jcc(Assembler::below, low); 4769 jcc(Assembler::equal, done); 4770 4771 bind(high); 4772 xorl(x_hi, x_hi); 4773 increment(x_hi); 4774 jmp(done); 4775 4776 bind(low); 4777 xorl(x_hi, x_hi); 4778 decrementl(x_hi); 4779 4780 bind(done); 4781 } 4782 4783 void MacroAssembler::lea(Register dst, AddressLiteral src) { 4784 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 4785 } 4786 4787 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 4788 // leal(dst, as_Address(adr)); 4789 // see note in movl as to why we must use a move 4790 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 4791 } 4792 4793 void MacroAssembler::leave() { 4794 mov(rsp, rbp); 4795 pop(rbp); 4796 } 4797 4798 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 4799 // Multiplication of two Java long values stored on the stack 4800 // as illustrated below. Result is in rdx:rax. 4801 // 4802 // rsp ---> [ ?? ] \ \ 4803 // .... | y_rsp_offset | 4804 // [ y_lo ] / (in bytes) | x_rsp_offset 4805 // [ y_hi ] | (in bytes) 4806 // .... | 4807 // [ x_lo ] / 4808 // [ x_hi ] 4809 // .... 4810 // 4811 // Basic idea: lo(result) = lo(x_lo * y_lo) 4812 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 4813 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 4814 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 4815 Label quick; 4816 // load x_hi, y_hi and check if quick 4817 // multiplication is possible 4818 movl(rbx, x_hi); 4819 movl(rcx, y_hi); 4820 movl(rax, rbx); 4821 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 4822 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 4823 // do full multiplication 4824 // 1st step 4825 mull(y_lo); // x_hi * y_lo 4826 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 4827 // 2nd step 4828 movl(rax, x_lo); 4829 mull(rcx); // x_lo * y_hi 4830 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 4831 // 3rd step 4832 bind(quick); // note: rbx, = 0 if quick multiply! 4833 movl(rax, x_lo); 4834 mull(y_lo); // x_lo * y_lo 4835 addl(rdx, rbx); // correct hi(x_lo * y_lo) 4836 } 4837 4838 void MacroAssembler::lneg(Register hi, Register lo) { 4839 negl(lo); 4840 adcl(hi, 0); 4841 negl(hi); 4842 } 4843 4844 void MacroAssembler::lshl(Register hi, Register lo) { 4845 // Java shift left long support (semantics as described in JVM spec., p.305) 4846 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 4847 // shift value is in rcx ! 4848 assert(hi != rcx, "must not use rcx"); 4849 assert(lo != rcx, "must not use rcx"); 4850 const Register s = rcx; // shift count 4851 const int n = BitsPerWord; 4852 Label L; 4853 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 4854 cmpl(s, n); // if (s < n) 4855 jcc(Assembler::less, L); // else (s >= n) 4856 movl(hi, lo); // x := x << n 4857 xorl(lo, lo); 4858 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 4859 bind(L); // s (mod n) < n 4860 shldl(hi, lo); // x := x << s 4861 shll(lo); 4862 } 4863 4864 4865 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 4866 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 4867 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 4868 assert(hi != rcx, "must not use rcx"); 4869 assert(lo != rcx, "must not use rcx"); 4870 const Register s = rcx; // shift count 4871 const int n = BitsPerWord; 4872 Label L; 4873 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 4874 cmpl(s, n); // if (s < n) 4875 jcc(Assembler::less, L); // else (s >= n) 4876 movl(lo, hi); // x := x >> n 4877 if (sign_extension) sarl(hi, 31); 4878 else xorl(hi, hi); 4879 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 4880 bind(L); // s (mod n) < n 4881 shrdl(lo, hi); // x := x >> s 4882 if (sign_extension) sarl(hi); 4883 else shrl(hi); 4884 } 4885 4886 void MacroAssembler::movoop(Register dst, jobject obj) { 4887 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4888 } 4889 4890 void MacroAssembler::movoop(Address dst, jobject obj) { 4891 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 4892 } 4893 4894 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 4895 if (src.is_lval()) { 4896 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 4897 } else { 4898 movl(dst, as_Address(src)); 4899 } 4900 } 4901 4902 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 4903 movl(as_Address(dst), src); 4904 } 4905 4906 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 4907 movl(dst, as_Address(src)); 4908 } 4909 4910 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 4911 void MacroAssembler::movptr(Address dst, intptr_t src) { 4912 movl(dst, src); 4913 } 4914 4915 4916 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 4917 movsd(dst, as_Address(src)); 4918 } 4919 4920 void MacroAssembler::pop_callee_saved_registers() { 4921 pop(rcx); 4922 pop(rdx); 4923 pop(rdi); 4924 pop(rsi); 4925 } 4926 4927 void MacroAssembler::pop_fTOS() { 4928 fld_d(Address(rsp, 0)); 4929 addl(rsp, 2 * wordSize); 4930 } 4931 4932 void MacroAssembler::push_callee_saved_registers() { 4933 push(rsi); 4934 push(rdi); 4935 push(rdx); 4936 push(rcx); 4937 } 4938 4939 void MacroAssembler::push_fTOS() { 4940 subl(rsp, 2 * wordSize); 4941 fstp_d(Address(rsp, 0)); 4942 } 4943 4944 4945 void MacroAssembler::pushoop(jobject obj) { 4946 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 4947 } 4948 4949 4950 void MacroAssembler::pushptr(AddressLiteral src) { 4951 if (src.is_lval()) { 4952 push_literal32((int32_t)src.target(), src.rspec()); 4953 } else { 4954 pushl(as_Address(src)); 4955 } 4956 } 4957 4958 void MacroAssembler::set_word_if_not_zero(Register dst) { 4959 xorl(dst, dst); 4960 set_byte_if_not_zero(dst); 4961 } 4962 4963 static void pass_arg0(MacroAssembler* masm, Register arg) { 4964 masm->push(arg); 4965 } 4966 4967 static void pass_arg1(MacroAssembler* masm, Register arg) { 4968 masm->push(arg); 4969 } 4970 4971 static void pass_arg2(MacroAssembler* masm, Register arg) { 4972 masm->push(arg); 4973 } 4974 4975 static void pass_arg3(MacroAssembler* masm, Register arg) { 4976 masm->push(arg); 4977 } 4978 4979 #ifndef PRODUCT 4980 extern "C" void findpc(intptr_t x); 4981 #endif 4982 4983 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 4984 // In order to get locks to work, we need to fake a in_VM state 4985 JavaThread* thread = JavaThread::current(); 4986 JavaThreadState saved_state = thread->thread_state(); 4987 thread->set_thread_state(_thread_in_vm); 4988 if (ShowMessageBoxOnError) { 4989 JavaThread* thread = JavaThread::current(); 4990 JavaThreadState saved_state = thread->thread_state(); 4991 thread->set_thread_state(_thread_in_vm); 4992 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 4993 ttyLocker ttyl; 4994 BytecodeCounter::print(); 4995 } 4996 // To see where a verify_oop failed, get $ebx+40/X for this frame. 4997 // This is the value of eip which points to where verify_oop will return. 4998 if (os::message_box(msg, "Execution stopped, print registers?")) { 4999 ttyLocker ttyl; 5000 tty->print_cr("eip = 0x%08x", eip); 5001 #ifndef PRODUCT 5002 if ((WizardMode || Verbose) && PrintMiscellaneous) { 5003 tty->cr(); 5004 findpc(eip); 5005 tty->cr(); 5006 } 5007 #endif 5008 tty->print_cr("rax = 0x%08x", rax); 5009 tty->print_cr("rbx = 0x%08x", rbx); 5010 tty->print_cr("rcx = 0x%08x", rcx); 5011 tty->print_cr("rdx = 0x%08x", rdx); 5012 tty->print_cr("rdi = 0x%08x", rdi); 5013 tty->print_cr("rsi = 0x%08x", rsi); 5014 tty->print_cr("rbp = 0x%08x", rbp); 5015 tty->print_cr("rsp = 0x%08x", rsp); 5016 BREAKPOINT; 5017 assert(false, "start up GDB"); 5018 } 5019 } else { 5020 ttyLocker ttyl; 5021 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 5022 assert(false, "DEBUG MESSAGE"); 5023 } 5024 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5025 } 5026 5027 void MacroAssembler::stop(const char* msg) { 5028 ExternalAddress message((address)msg); 5029 // push address of message 5030 pushptr(message.addr()); 5031 { Label L; call(L, relocInfo::none); bind(L); } // push eip 5032 pusha(); // push registers 5033 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 5034 hlt(); 5035 } 5036 5037 void MacroAssembler::warn(const char* msg) { 5038 push_CPU_state(); 5039 5040 ExternalAddress message((address) msg); 5041 // push address of message 5042 pushptr(message.addr()); 5043 5044 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 5045 addl(rsp, wordSize); // discard argument 5046 pop_CPU_state(); 5047 } 5048 5049 #else // _LP64 5050 5051 // 64 bit versions 5052 5053 Address MacroAssembler::as_Address(AddressLiteral adr) { 5054 // amd64 always does this as a pc-rel 5055 // we can be absolute or disp based on the instruction type 5056 // jmp/call are displacements others are absolute 5057 assert(!adr.is_lval(), "must be rval"); 5058 assert(reachable(adr), "must be"); 5059 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 5060 5061 } 5062 5063 Address MacroAssembler::as_Address(ArrayAddress adr) { 5064 AddressLiteral base = adr.base(); 5065 lea(rscratch1, base); 5066 Address index = adr.index(); 5067 assert(index._disp == 0, "must not have disp"); // maybe it can? 5068 Address array(rscratch1, index._index, index._scale, index._disp); 5069 return array; 5070 } 5071 5072 int MacroAssembler::biased_locking_enter(Register lock_reg, 5073 Register obj_reg, 5074 Register swap_reg, 5075 Register tmp_reg, 5076 bool swap_reg_contains_mark, 5077 Label& done, 5078 Label* slow_case, 5079 BiasedLockingCounters* counters) { 5080 assert(UseBiasedLocking, "why call this otherwise?"); 5081 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 5082 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 5083 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 5084 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 5085 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 5086 Address saved_mark_addr(lock_reg, 0); 5087 5088 if (PrintBiasedLockingStatistics && counters == NULL) 5089 counters = BiasedLocking::counters(); 5090 5091 // Biased locking 5092 // See whether the lock is currently biased toward our thread and 5093 // whether the epoch is still valid 5094 // Note that the runtime guarantees sufficient alignment of JavaThread 5095 // pointers to allow age to be placed into low bits 5096 // First check to see whether biasing is even enabled for this object 5097 Label cas_label; 5098 int null_check_offset = -1; 5099 if (!swap_reg_contains_mark) { 5100 null_check_offset = offset(); 5101 movq(swap_reg, mark_addr); 5102 } 5103 movq(tmp_reg, swap_reg); 5104 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5105 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 5106 jcc(Assembler::notEqual, cas_label); 5107 // The bias pattern is present in the object's header. Need to check 5108 // whether the bias owner and the epoch are both still current. 5109 load_prototype_header(tmp_reg, obj_reg); 5110 orq(tmp_reg, r15_thread); 5111 xorq(tmp_reg, swap_reg); 5112 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 5113 if (counters != NULL) { 5114 cond_inc32(Assembler::zero, 5115 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5116 } 5117 jcc(Assembler::equal, done); 5118 5119 Label try_revoke_bias; 5120 Label try_rebias; 5121 5122 // At this point we know that the header has the bias pattern and 5123 // that we are not the bias owner in the current epoch. We need to 5124 // figure out more details about the state of the header in order to 5125 // know what operations can be legally performed on the object's 5126 // header. 5127 5128 // If the low three bits in the xor result aren't clear, that means 5129 // the prototype header is no longer biased and we have to revoke 5130 // the bias on this object. 5131 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5132 jcc(Assembler::notZero, try_revoke_bias); 5133 5134 // Biasing is still enabled for this data type. See whether the 5135 // epoch of the current bias is still valid, meaning that the epoch 5136 // bits of the mark word are equal to the epoch bits of the 5137 // prototype header. (Note that the prototype header's epoch bits 5138 // only change at a safepoint.) If not, attempt to rebias the object 5139 // toward the current thread. Note that we must be absolutely sure 5140 // that the current epoch is invalid in order to do this because 5141 // otherwise the manipulations it performs on the mark word are 5142 // illegal. 5143 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 5144 jcc(Assembler::notZero, try_rebias); 5145 5146 // The epoch of the current bias is still valid but we know nothing 5147 // about the owner; it might be set or it might be clear. Try to 5148 // acquire the bias of the object using an atomic operation. If this 5149 // fails we will go in to the runtime to revoke the object's bias. 5150 // Note that we first construct the presumed unbiased header so we 5151 // don't accidentally blow away another thread's valid bias. 5152 andq(swap_reg, 5153 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 5154 movq(tmp_reg, swap_reg); 5155 orq(tmp_reg, r15_thread); 5156 if (os::is_MP()) { 5157 lock(); 5158 } 5159 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5160 // If the biasing toward our thread failed, this means that 5161 // another thread succeeded in biasing it toward itself and we 5162 // need to revoke that bias. The revocation will occur in the 5163 // interpreter runtime in the slow case. 5164 if (counters != NULL) { 5165 cond_inc32(Assembler::zero, 5166 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5167 } 5168 if (slow_case != NULL) { 5169 jcc(Assembler::notZero, *slow_case); 5170 } 5171 jmp(done); 5172 5173 bind(try_rebias); 5174 // At this point we know the epoch has expired, meaning that the 5175 // current "bias owner", if any, is actually invalid. Under these 5176 // circumstances _only_, we are allowed to use the current header's 5177 // value as the comparison value when doing the cas to acquire the 5178 // bias in the current epoch. In other words, we allow transfer of 5179 // the bias from one thread to another directly in this situation. 5180 // 5181 // FIXME: due to a lack of registers we currently blow away the age 5182 // bits in this situation. Should attempt to preserve them. 5183 load_prototype_header(tmp_reg, obj_reg); 5184 orq(tmp_reg, r15_thread); 5185 if (os::is_MP()) { 5186 lock(); 5187 } 5188 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5189 // If the biasing toward our thread failed, then another thread 5190 // succeeded in biasing it toward itself and we need to revoke that 5191 // bias. The revocation will occur in the runtime in the slow case. 5192 if (counters != NULL) { 5193 cond_inc32(Assembler::zero, 5194 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 5195 } 5196 if (slow_case != NULL) { 5197 jcc(Assembler::notZero, *slow_case); 5198 } 5199 jmp(done); 5200 5201 bind(try_revoke_bias); 5202 // The prototype mark in the klass doesn't have the bias bit set any 5203 // more, indicating that objects of this data type are not supposed 5204 // to be biased any more. We are going to try to reset the mark of 5205 // this object to the prototype value and fall through to the 5206 // CAS-based locking scheme. Note that if our CAS fails, it means 5207 // that another thread raced us for the privilege of revoking the 5208 // bias of this particular object, so it's okay to continue in the 5209 // normal locking code. 5210 // 5211 // FIXME: due to a lack of registers we currently blow away the age 5212 // bits in this situation. Should attempt to preserve them. 5213 load_prototype_header(tmp_reg, obj_reg); 5214 if (os::is_MP()) { 5215 lock(); 5216 } 5217 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5218 // Fall through to the normal CAS-based lock, because no matter what 5219 // the result of the above CAS, some thread must have succeeded in 5220 // removing the bias bit from the object's header. 5221 if (counters != NULL) { 5222 cond_inc32(Assembler::zero, 5223 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 5224 } 5225 5226 bind(cas_label); 5227 5228 return null_check_offset; 5229 } 5230 5231 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 5232 Label L, E; 5233 5234 #ifdef _WIN64 5235 // Windows always allocates space for it's register args 5236 assert(num_args <= 4, "only register arguments supported"); 5237 subq(rsp, frame::arg_reg_save_area_bytes); 5238 #endif 5239 5240 // Align stack if necessary 5241 testl(rsp, 15); 5242 jcc(Assembler::zero, L); 5243 5244 subq(rsp, 8); 5245 { 5246 call(RuntimeAddress(entry_point)); 5247 } 5248 addq(rsp, 8); 5249 jmp(E); 5250 5251 bind(L); 5252 { 5253 call(RuntimeAddress(entry_point)); 5254 } 5255 5256 bind(E); 5257 5258 #ifdef _WIN64 5259 // restore stack pointer 5260 addq(rsp, frame::arg_reg_save_area_bytes); 5261 #endif 5262 5263 } 5264 5265 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 5266 assert(!src2.is_lval(), "should use cmpptr"); 5267 5268 if (reachable(src2)) { 5269 cmpq(src1, as_Address(src2)); 5270 } else { 5271 lea(rscratch1, src2); 5272 Assembler::cmpq(src1, Address(rscratch1, 0)); 5273 } 5274 } 5275 5276 int MacroAssembler::corrected_idivq(Register reg) { 5277 // Full implementation of Java ldiv and lrem; checks for special 5278 // case as described in JVM spec., p.243 & p.271. The function 5279 // returns the (pc) offset of the idivl instruction - may be needed 5280 // for implicit exceptions. 5281 // 5282 // normal case special case 5283 // 5284 // input : rax: dividend min_long 5285 // reg: divisor (may not be eax/edx) -1 5286 // 5287 // output: rax: quotient (= rax idiv reg) min_long 5288 // rdx: remainder (= rax irem reg) 0 5289 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 5290 static const int64_t min_long = 0x8000000000000000; 5291 Label normal_case, special_case; 5292 5293 // check for special case 5294 cmp64(rax, ExternalAddress((address) &min_long)); 5295 jcc(Assembler::notEqual, normal_case); 5296 xorl(rdx, rdx); // prepare rdx for possible special case (where 5297 // remainder = 0) 5298 cmpq(reg, -1); 5299 jcc(Assembler::equal, special_case); 5300 5301 // handle normal case 5302 bind(normal_case); 5303 cdqq(); 5304 int idivq_offset = offset(); 5305 idivq(reg); 5306 5307 // normal and special case exit 5308 bind(special_case); 5309 5310 return idivq_offset; 5311 } 5312 5313 void MacroAssembler::decrementq(Register reg, int value) { 5314 if (value == min_jint) { subq(reg, value); return; } 5315 if (value < 0) { incrementq(reg, -value); return; } 5316 if (value == 0) { ; return; } 5317 if (value == 1 && UseIncDec) { decq(reg) ; return; } 5318 /* else */ { subq(reg, value) ; return; } 5319 } 5320 5321 void MacroAssembler::decrementq(Address dst, int value) { 5322 if (value == min_jint) { subq(dst, value); return; } 5323 if (value < 0) { incrementq(dst, -value); return; } 5324 if (value == 0) { ; return; } 5325 if (value == 1 && UseIncDec) { decq(dst) ; return; } 5326 /* else */ { subq(dst, value) ; return; } 5327 } 5328 5329 void MacroAssembler::fat_nop() { 5330 // A 5 byte nop that is safe for patching (see patch_verified_entry) 5331 // Recommened sequence from 'Software Optimization Guide for the AMD 5332 // Hammer Processor' 5333 emit_byte(0x66); 5334 emit_byte(0x66); 5335 emit_byte(0x90); 5336 emit_byte(0x66); 5337 emit_byte(0x90); 5338 } 5339 5340 void MacroAssembler::incrementq(Register reg, int value) { 5341 if (value == min_jint) { addq(reg, value); return; } 5342 if (value < 0) { decrementq(reg, -value); return; } 5343 if (value == 0) { ; return; } 5344 if (value == 1 && UseIncDec) { incq(reg) ; return; } 5345 /* else */ { addq(reg, value) ; return; } 5346 } 5347 5348 void MacroAssembler::incrementq(Address dst, int value) { 5349 if (value == min_jint) { addq(dst, value); return; } 5350 if (value < 0) { decrementq(dst, -value); return; } 5351 if (value == 0) { ; return; } 5352 if (value == 1 && UseIncDec) { incq(dst) ; return; } 5353 /* else */ { addq(dst, value) ; return; } 5354 } 5355 5356 // 32bit can do a case table jump in one instruction but we no longer allow the base 5357 // to be installed in the Address class 5358 void MacroAssembler::jump(ArrayAddress entry) { 5359 lea(rscratch1, entry.base()); 5360 Address dispatch = entry.index(); 5361 assert(dispatch._base == noreg, "must be"); 5362 dispatch._base = rscratch1; 5363 jmp(dispatch); 5364 } 5365 5366 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5367 ShouldNotReachHere(); // 64bit doesn't use two regs 5368 cmpq(x_lo, y_lo); 5369 } 5370 5371 void MacroAssembler::lea(Register dst, AddressLiteral src) { 5372 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5373 } 5374 5375 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5376 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 5377 movptr(dst, rscratch1); 5378 } 5379 5380 void MacroAssembler::leave() { 5381 // %%% is this really better? Why not on 32bit too? 5382 emit_byte(0xC9); // LEAVE 5383 } 5384 5385 void MacroAssembler::lneg(Register hi, Register lo) { 5386 ShouldNotReachHere(); // 64bit doesn't use two regs 5387 negq(lo); 5388 } 5389 5390 void MacroAssembler::movoop(Register dst, jobject obj) { 5391 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5392 } 5393 5394 void MacroAssembler::movoop(Address dst, jobject obj) { 5395 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5396 movq(dst, rscratch1); 5397 } 5398 5399 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5400 if (src.is_lval()) { 5401 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5402 } else { 5403 if (reachable(src)) { 5404 movq(dst, as_Address(src)); 5405 } else { 5406 lea(rscratch1, src); 5407 movq(dst, Address(rscratch1,0)); 5408 } 5409 } 5410 } 5411 5412 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5413 movq(as_Address(dst), src); 5414 } 5415 5416 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5417 movq(dst, as_Address(src)); 5418 } 5419 5420 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 5421 void MacroAssembler::movptr(Address dst, intptr_t src) { 5422 mov64(rscratch1, src); 5423 movq(dst, rscratch1); 5424 } 5425 5426 // These are mostly for initializing NULL 5427 void MacroAssembler::movptr(Address dst, int32_t src) { 5428 movslq(dst, src); 5429 } 5430 5431 void MacroAssembler::movptr(Register dst, int32_t src) { 5432 mov64(dst, (intptr_t)src); 5433 } 5434 5435 void MacroAssembler::pushoop(jobject obj) { 5436 movoop(rscratch1, obj); 5437 push(rscratch1); 5438 } 5439 5440 void MacroAssembler::pushptr(AddressLiteral src) { 5441 lea(rscratch1, src); 5442 if (src.is_lval()) { 5443 push(rscratch1); 5444 } else { 5445 pushq(Address(rscratch1, 0)); 5446 } 5447 } 5448 5449 void MacroAssembler::reset_last_Java_frame(bool clear_fp, 5450 bool clear_pc) { 5451 // we must set sp to zero to clear frame 5452 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 5453 // must clear fp, so that compiled frames are not confused; it is 5454 // possible that we need it only for debugging 5455 if (clear_fp) { 5456 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 5457 } 5458 5459 if (clear_pc) { 5460 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 5461 } 5462 } 5463 5464 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 5465 Register last_java_fp, 5466 address last_java_pc) { 5467 // determine last_java_sp register 5468 if (!last_java_sp->is_valid()) { 5469 last_java_sp = rsp; 5470 } 5471 5472 // last_java_fp is optional 5473 if (last_java_fp->is_valid()) { 5474 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 5475 last_java_fp); 5476 } 5477 5478 // last_java_pc is optional 5479 if (last_java_pc != NULL) { 5480 Address java_pc(r15_thread, 5481 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 5482 lea(rscratch1, InternalAddress(last_java_pc)); 5483 movptr(java_pc, rscratch1); 5484 } 5485 5486 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 5487 } 5488 5489 static void pass_arg0(MacroAssembler* masm, Register arg) { 5490 if (c_rarg0 != arg ) { 5491 masm->mov(c_rarg0, arg); 5492 } 5493 } 5494 5495 static void pass_arg1(MacroAssembler* masm, Register arg) { 5496 if (c_rarg1 != arg ) { 5497 masm->mov(c_rarg1, arg); 5498 } 5499 } 5500 5501 static void pass_arg2(MacroAssembler* masm, Register arg) { 5502 if (c_rarg2 != arg ) { 5503 masm->mov(c_rarg2, arg); 5504 } 5505 } 5506 5507 static void pass_arg3(MacroAssembler* masm, Register arg) { 5508 if (c_rarg3 != arg ) { 5509 masm->mov(c_rarg3, arg); 5510 } 5511 } 5512 5513 void MacroAssembler::stop(const char* msg) { 5514 address rip = pc(); 5515 pusha(); // get regs on stack 5516 lea(c_rarg0, ExternalAddress((address) msg)); 5517 lea(c_rarg1, InternalAddress(rip)); 5518 movq(c_rarg2, rsp); // pass pointer to regs array 5519 andq(rsp, -16); // align stack as required by ABI 5520 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 5521 hlt(); 5522 } 5523 5524 void MacroAssembler::warn(const char* msg) { 5525 push(r12); 5526 movq(r12, rsp); 5527 andq(rsp, -16); // align stack as required by push_CPU_state and call 5528 5529 push_CPU_state(); // keeps alignment at 16 bytes 5530 lea(c_rarg0, ExternalAddress((address) msg)); 5531 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 5532 pop_CPU_state(); 5533 5534 movq(rsp, r12); 5535 pop(r12); 5536 } 5537 5538 #ifndef PRODUCT 5539 extern "C" void findpc(intptr_t x); 5540 #endif 5541 5542 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 5543 // In order to get locks to work, we need to fake a in_VM state 5544 if (ShowMessageBoxOnError ) { 5545 JavaThread* thread = JavaThread::current(); 5546 JavaThreadState saved_state = thread->thread_state(); 5547 thread->set_thread_state(_thread_in_vm); 5548 #ifndef PRODUCT 5549 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5550 ttyLocker ttyl; 5551 BytecodeCounter::print(); 5552 } 5553 #endif 5554 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5555 // XXX correct this offset for amd64 5556 // This is the value of eip which points to where verify_oop will return. 5557 if (os::message_box(msg, "Execution stopped, print registers?")) { 5558 ttyLocker ttyl; 5559 tty->print_cr("rip = 0x%016lx", pc); 5560 #ifndef PRODUCT 5561 tty->cr(); 5562 findpc(pc); 5563 tty->cr(); 5564 #endif 5565 tty->print_cr("rax = 0x%016lx", regs[15]); 5566 tty->print_cr("rbx = 0x%016lx", regs[12]); 5567 tty->print_cr("rcx = 0x%016lx", regs[14]); 5568 tty->print_cr("rdx = 0x%016lx", regs[13]); 5569 tty->print_cr("rdi = 0x%016lx", regs[8]); 5570 tty->print_cr("rsi = 0x%016lx", regs[9]); 5571 tty->print_cr("rbp = 0x%016lx", regs[10]); 5572 tty->print_cr("rsp = 0x%016lx", regs[11]); 5573 tty->print_cr("r8 = 0x%016lx", regs[7]); 5574 tty->print_cr("r9 = 0x%016lx", regs[6]); 5575 tty->print_cr("r10 = 0x%016lx", regs[5]); 5576 tty->print_cr("r11 = 0x%016lx", regs[4]); 5577 tty->print_cr("r12 = 0x%016lx", regs[3]); 5578 tty->print_cr("r13 = 0x%016lx", regs[2]); 5579 tty->print_cr("r14 = 0x%016lx", regs[1]); 5580 tty->print_cr("r15 = 0x%016lx", regs[0]); 5581 BREAKPOINT; 5582 } 5583 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5584 } else { 5585 ttyLocker ttyl; 5586 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 5587 msg); 5588 } 5589 } 5590 5591 #endif // _LP64 5592 5593 // Now versions that are common to 32/64 bit 5594 5595 void MacroAssembler::addptr(Register dst, int32_t imm32) { 5596 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 5597 } 5598 5599 void MacroAssembler::addptr(Register dst, Register src) { 5600 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 5601 } 5602 5603 void MacroAssembler::addptr(Address dst, Register src) { 5604 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 5605 } 5606 5607 void MacroAssembler::align(int modulus) { 5608 if (offset() % modulus != 0) { 5609 nop(modulus - (offset() % modulus)); 5610 } 5611 } 5612 5613 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 5614 if (reachable(src)) { 5615 andpd(dst, as_Address(src)); 5616 } else { 5617 lea(rscratch1, src); 5618 andpd(dst, Address(rscratch1, 0)); 5619 } 5620 } 5621 5622 void MacroAssembler::andptr(Register dst, int32_t imm32) { 5623 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 5624 } 5625 5626 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 5627 pushf(); 5628 if (os::is_MP()) 5629 lock(); 5630 incrementl(counter_addr); 5631 popf(); 5632 } 5633 5634 // Writes to stack successive pages until offset reached to check for 5635 // stack overflow + shadow pages. This clobbers tmp. 5636 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 5637 movptr(tmp, rsp); 5638 // Bang stack for total size given plus shadow page size. 5639 // Bang one page at a time because large size can bang beyond yellow and 5640 // red zones. 5641 Label loop; 5642 bind(loop); 5643 movl(Address(tmp, (-os::vm_page_size())), size ); 5644 subptr(tmp, os::vm_page_size()); 5645 subl(size, os::vm_page_size()); 5646 jcc(Assembler::greater, loop); 5647 5648 // Bang down shadow pages too. 5649 // The -1 because we already subtracted 1 page. 5650 for (int i = 0; i< StackShadowPages-1; i++) { 5651 // this could be any sized move but this is can be a debugging crumb 5652 // so the bigger the better. 5653 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 5654 } 5655 } 5656 5657 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 5658 assert(UseBiasedLocking, "why call this otherwise?"); 5659 5660 // Check for biased locking unlock case, which is a no-op 5661 // Note: we do not have to check the thread ID for two reasons. 5662 // First, the interpreter checks for IllegalMonitorStateException at 5663 // a higher level. Second, if the bias was revoked while we held the 5664 // lock, the object could not be rebiased toward another thread, so 5665 // the bias bit would be clear. 5666 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 5667 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 5668 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 5669 jcc(Assembler::equal, done); 5670 } 5671 5672 void MacroAssembler::c2bool(Register x) { 5673 // implements x == 0 ? 0 : 1 5674 // note: must only look at least-significant byte of x 5675 // since C-style booleans are stored in one byte 5676 // only! (was bug) 5677 andl(x, 0xFF); 5678 setb(Assembler::notZero, x); 5679 } 5680 5681 // Wouldn't need if AddressLiteral version had new name 5682 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 5683 Assembler::call(L, rtype); 5684 } 5685 5686 void MacroAssembler::call(Register entry) { 5687 Assembler::call(entry); 5688 } 5689 5690 void MacroAssembler::call(AddressLiteral entry) { 5691 if (reachable(entry)) { 5692 Assembler::call_literal(entry.target(), entry.rspec()); 5693 } else { 5694 lea(rscratch1, entry); 5695 Assembler::call(rscratch1); 5696 } 5697 } 5698 5699 // Implementation of call_VM versions 5700 5701 void MacroAssembler::call_VM(Register oop_result, 5702 address entry_point, 5703 bool check_exceptions) { 5704 Label C, E; 5705 call(C, relocInfo::none); 5706 jmp(E); 5707 5708 bind(C); 5709 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 5710 ret(0); 5711 5712 bind(E); 5713 } 5714 5715 void MacroAssembler::call_VM(Register oop_result, 5716 address entry_point, 5717 Register arg_1, 5718 bool check_exceptions) { 5719 Label C, E; 5720 call(C, relocInfo::none); 5721 jmp(E); 5722 5723 bind(C); 5724 pass_arg1(this, arg_1); 5725 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 5726 ret(0); 5727 5728 bind(E); 5729 } 5730 5731 void MacroAssembler::call_VM(Register oop_result, 5732 address entry_point, 5733 Register arg_1, 5734 Register arg_2, 5735 bool check_exceptions) { 5736 Label C, E; 5737 call(C, relocInfo::none); 5738 jmp(E); 5739 5740 bind(C); 5741 5742 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5743 5744 pass_arg2(this, arg_2); 5745 pass_arg1(this, arg_1); 5746 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 5747 ret(0); 5748 5749 bind(E); 5750 } 5751 5752 void MacroAssembler::call_VM(Register oop_result, 5753 address entry_point, 5754 Register arg_1, 5755 Register arg_2, 5756 Register arg_3, 5757 bool check_exceptions) { 5758 Label C, E; 5759 call(C, relocInfo::none); 5760 jmp(E); 5761 5762 bind(C); 5763 5764 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 5765 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 5766 pass_arg3(this, arg_3); 5767 5768 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5769 pass_arg2(this, arg_2); 5770 5771 pass_arg1(this, arg_1); 5772 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 5773 ret(0); 5774 5775 bind(E); 5776 } 5777 5778 void MacroAssembler::call_VM(Register oop_result, 5779 Register last_java_sp, 5780 address entry_point, 5781 int number_of_arguments, 5782 bool check_exceptions) { 5783 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 5784 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 5785 } 5786 5787 void MacroAssembler::call_VM(Register oop_result, 5788 Register last_java_sp, 5789 address entry_point, 5790 Register arg_1, 5791 bool check_exceptions) { 5792 pass_arg1(this, arg_1); 5793 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 5794 } 5795 5796 void MacroAssembler::call_VM(Register oop_result, 5797 Register last_java_sp, 5798 address entry_point, 5799 Register arg_1, 5800 Register arg_2, 5801 bool check_exceptions) { 5802 5803 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5804 pass_arg2(this, arg_2); 5805 pass_arg1(this, arg_1); 5806 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 5807 } 5808 5809 void MacroAssembler::call_VM(Register oop_result, 5810 Register last_java_sp, 5811 address entry_point, 5812 Register arg_1, 5813 Register arg_2, 5814 Register arg_3, 5815 bool check_exceptions) { 5816 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 5817 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 5818 pass_arg3(this, arg_3); 5819 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5820 pass_arg2(this, arg_2); 5821 pass_arg1(this, arg_1); 5822 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 5823 } 5824 5825 void MacroAssembler::call_VM_base(Register oop_result, 5826 Register java_thread, 5827 Register last_java_sp, 5828 address entry_point, 5829 int number_of_arguments, 5830 bool check_exceptions) { 5831 // determine java_thread register 5832 if (!java_thread->is_valid()) { 5833 #ifdef _LP64 5834 java_thread = r15_thread; 5835 #else 5836 java_thread = rdi; 5837 get_thread(java_thread); 5838 #endif // LP64 5839 } 5840 // determine last_java_sp register 5841 if (!last_java_sp->is_valid()) { 5842 last_java_sp = rsp; 5843 } 5844 // debugging support 5845 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 5846 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 5847 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 5848 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 5849 5850 // push java thread (becomes first argument of C function) 5851 5852 NOT_LP64(push(java_thread); number_of_arguments++); 5853 LP64_ONLY(mov(c_rarg0, r15_thread)); 5854 5855 // set last Java frame before call 5856 assert(last_java_sp != rbp, "can't use ebp/rbp"); 5857 5858 // Only interpreter should have to set fp 5859 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 5860 5861 // do the call, remove parameters 5862 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 5863 5864 // restore the thread (cannot use the pushed argument since arguments 5865 // may be overwritten by C code generated by an optimizing compiler); 5866 // however can use the register value directly if it is callee saved. 5867 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 5868 // rdi & rsi (also r15) are callee saved -> nothing to do 5869 #ifdef ASSERT 5870 guarantee(java_thread != rax, "change this code"); 5871 push(rax); 5872 { Label L; 5873 get_thread(rax); 5874 cmpptr(java_thread, rax); 5875 jcc(Assembler::equal, L); 5876 stop("MacroAssembler::call_VM_base: rdi not callee saved?"); 5877 bind(L); 5878 } 5879 pop(rax); 5880 #endif 5881 } else { 5882 get_thread(java_thread); 5883 } 5884 // reset last Java frame 5885 // Only interpreter should have to clear fp 5886 reset_last_Java_frame(java_thread, true, false); 5887 5888 #ifndef CC_INTERP 5889 // C++ interp handles this in the interpreter 5890 check_and_handle_popframe(java_thread); 5891 check_and_handle_earlyret(java_thread); 5892 #endif /* CC_INTERP */ 5893 5894 if (check_exceptions) { 5895 // check for pending exceptions (java_thread is set upon return) 5896 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 5897 #ifndef _LP64 5898 jump_cc(Assembler::notEqual, 5899 RuntimeAddress(StubRoutines::forward_exception_entry())); 5900 #else 5901 // This used to conditionally jump to forward_exception however it is 5902 // possible if we relocate that the branch will not reach. So we must jump 5903 // around so we can always reach 5904 5905 Label ok; 5906 jcc(Assembler::equal, ok); 5907 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 5908 bind(ok); 5909 #endif // LP64 5910 } 5911 5912 // get oop result if there is one and reset the value in the thread 5913 if (oop_result->is_valid()) { 5914 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 5915 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 5916 verify_oop(oop_result, "broken oop in call_VM_base"); 5917 } 5918 } 5919 5920 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 5921 5922 // Calculate the value for last_Java_sp 5923 // somewhat subtle. call_VM does an intermediate call 5924 // which places a return address on the stack just under the 5925 // stack pointer as the user finsihed with it. This allows 5926 // use to retrieve last_Java_pc from last_Java_sp[-1]. 5927 // On 32bit we then have to push additional args on the stack to accomplish 5928 // the actual requested call. On 64bit call_VM only can use register args 5929 // so the only extra space is the return address that call_VM created. 5930 // This hopefully explains the calculations here. 5931 5932 #ifdef _LP64 5933 // We've pushed one address, correct last_Java_sp 5934 lea(rax, Address(rsp, wordSize)); 5935 #else 5936 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 5937 #endif // LP64 5938 5939 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 5940 5941 } 5942 5943 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 5944 call_VM_leaf_base(entry_point, number_of_arguments); 5945 } 5946 5947 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 5948 pass_arg0(this, arg_0); 5949 call_VM_leaf(entry_point, 1); 5950 } 5951 5952 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 5953 5954 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 5955 pass_arg1(this, arg_1); 5956 pass_arg0(this, arg_0); 5957 call_VM_leaf(entry_point, 2); 5958 } 5959 5960 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 5961 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 5962 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 5963 pass_arg2(this, arg_2); 5964 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 5965 pass_arg1(this, arg_1); 5966 pass_arg0(this, arg_0); 5967 call_VM_leaf(entry_point, 3); 5968 } 5969 5970 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 5971 } 5972 5973 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 5974 } 5975 5976 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 5977 if (reachable(src1)) { 5978 cmpl(as_Address(src1), imm); 5979 } else { 5980 lea(rscratch1, src1); 5981 cmpl(Address(rscratch1, 0), imm); 5982 } 5983 } 5984 5985 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 5986 assert(!src2.is_lval(), "use cmpptr"); 5987 if (reachable(src2)) { 5988 cmpl(src1, as_Address(src2)); 5989 } else { 5990 lea(rscratch1, src2); 5991 cmpl(src1, Address(rscratch1, 0)); 5992 } 5993 } 5994 5995 void MacroAssembler::cmp32(Register src1, int32_t imm) { 5996 Assembler::cmpl(src1, imm); 5997 } 5998 5999 void MacroAssembler::cmp32(Register src1, Address src2) { 6000 Assembler::cmpl(src1, src2); 6001 } 6002 6003 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6004 ucomisd(opr1, opr2); 6005 6006 Label L; 6007 if (unordered_is_less) { 6008 movl(dst, -1); 6009 jcc(Assembler::parity, L); 6010 jcc(Assembler::below , L); 6011 movl(dst, 0); 6012 jcc(Assembler::equal , L); 6013 increment(dst); 6014 } else { // unordered is greater 6015 movl(dst, 1); 6016 jcc(Assembler::parity, L); 6017 jcc(Assembler::above , L); 6018 movl(dst, 0); 6019 jcc(Assembler::equal , L); 6020 decrementl(dst); 6021 } 6022 bind(L); 6023 } 6024 6025 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6026 ucomiss(opr1, opr2); 6027 6028 Label L; 6029 if (unordered_is_less) { 6030 movl(dst, -1); 6031 jcc(Assembler::parity, L); 6032 jcc(Assembler::below , L); 6033 movl(dst, 0); 6034 jcc(Assembler::equal , L); 6035 increment(dst); 6036 } else { // unordered is greater 6037 movl(dst, 1); 6038 jcc(Assembler::parity, L); 6039 jcc(Assembler::above , L); 6040 movl(dst, 0); 6041 jcc(Assembler::equal , L); 6042 decrementl(dst); 6043 } 6044 bind(L); 6045 } 6046 6047 6048 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 6049 if (reachable(src1)) { 6050 cmpb(as_Address(src1), imm); 6051 } else { 6052 lea(rscratch1, src1); 6053 cmpb(Address(rscratch1, 0), imm); 6054 } 6055 } 6056 6057 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 6058 #ifdef _LP64 6059 if (src2.is_lval()) { 6060 movptr(rscratch1, src2); 6061 Assembler::cmpq(src1, rscratch1); 6062 } else if (reachable(src2)) { 6063 cmpq(src1, as_Address(src2)); 6064 } else { 6065 lea(rscratch1, src2); 6066 Assembler::cmpq(src1, Address(rscratch1, 0)); 6067 } 6068 #else 6069 if (src2.is_lval()) { 6070 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 6071 } else { 6072 cmpl(src1, as_Address(src2)); 6073 } 6074 #endif // _LP64 6075 } 6076 6077 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 6078 assert(src2.is_lval(), "not a mem-mem compare"); 6079 #ifdef _LP64 6080 // moves src2's literal address 6081 movptr(rscratch1, src2); 6082 Assembler::cmpq(src1, rscratch1); 6083 #else 6084 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 6085 #endif // _LP64 6086 } 6087 6088 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 6089 if (reachable(adr)) { 6090 if (os::is_MP()) 6091 lock(); 6092 cmpxchgptr(reg, as_Address(adr)); 6093 } else { 6094 lea(rscratch1, adr); 6095 if (os::is_MP()) 6096 lock(); 6097 cmpxchgptr(reg, Address(rscratch1, 0)); 6098 } 6099 } 6100 6101 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 6102 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 6103 } 6104 6105 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 6106 if (reachable(src)) { 6107 comisd(dst, as_Address(src)); 6108 } else { 6109 lea(rscratch1, src); 6110 comisd(dst, Address(rscratch1, 0)); 6111 } 6112 } 6113 6114 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 6115 if (reachable(src)) { 6116 comiss(dst, as_Address(src)); 6117 } else { 6118 lea(rscratch1, src); 6119 comiss(dst, Address(rscratch1, 0)); 6120 } 6121 } 6122 6123 6124 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 6125 Condition negated_cond = negate_condition(cond); 6126 Label L; 6127 jcc(negated_cond, L); 6128 atomic_incl(counter_addr); 6129 bind(L); 6130 } 6131 6132 int MacroAssembler::corrected_idivl(Register reg) { 6133 // Full implementation of Java idiv and irem; checks for 6134 // special case as described in JVM spec., p.243 & p.271. 6135 // The function returns the (pc) offset of the idivl 6136 // instruction - may be needed for implicit exceptions. 6137 // 6138 // normal case special case 6139 // 6140 // input : rax,: dividend min_int 6141 // reg: divisor (may not be rax,/rdx) -1 6142 // 6143 // output: rax,: quotient (= rax, idiv reg) min_int 6144 // rdx: remainder (= rax, irem reg) 0 6145 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 6146 const int min_int = 0x80000000; 6147 Label normal_case, special_case; 6148 6149 // check for special case 6150 cmpl(rax, min_int); 6151 jcc(Assembler::notEqual, normal_case); 6152 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 6153 cmpl(reg, -1); 6154 jcc(Assembler::equal, special_case); 6155 6156 // handle normal case 6157 bind(normal_case); 6158 cdql(); 6159 int idivl_offset = offset(); 6160 idivl(reg); 6161 6162 // normal and special case exit 6163 bind(special_case); 6164 6165 return idivl_offset; 6166 } 6167 6168 6169 6170 void MacroAssembler::decrementl(Register reg, int value) { 6171 if (value == min_jint) {subl(reg, value) ; return; } 6172 if (value < 0) { incrementl(reg, -value); return; } 6173 if (value == 0) { ; return; } 6174 if (value == 1 && UseIncDec) { decl(reg) ; return; } 6175 /* else */ { subl(reg, value) ; return; } 6176 } 6177 6178 void MacroAssembler::decrementl(Address dst, int value) { 6179 if (value == min_jint) {subl(dst, value) ; return; } 6180 if (value < 0) { incrementl(dst, -value); return; } 6181 if (value == 0) { ; return; } 6182 if (value == 1 && UseIncDec) { decl(dst) ; return; } 6183 /* else */ { subl(dst, value) ; return; } 6184 } 6185 6186 void MacroAssembler::division_with_shift (Register reg, int shift_value) { 6187 assert (shift_value > 0, "illegal shift value"); 6188 Label _is_positive; 6189 testl (reg, reg); 6190 jcc (Assembler::positive, _is_positive); 6191 int offset = (1 << shift_value) - 1 ; 6192 6193 if (offset == 1) { 6194 incrementl(reg); 6195 } else { 6196 addl(reg, offset); 6197 } 6198 6199 bind (_is_positive); 6200 sarl(reg, shift_value); 6201 } 6202 6203 // !defined(COMPILER2) is because of stupid core builds 6204 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 6205 void MacroAssembler::empty_FPU_stack() { 6206 if (VM_Version::supports_mmx()) { 6207 emms(); 6208 } else { 6209 for (int i = 8; i-- > 0; ) ffree(i); 6210 } 6211 } 6212 #endif // !LP64 || C1 || !C2 6213 6214 6215 // Defines obj, preserves var_size_in_bytes 6216 void MacroAssembler::eden_allocate(Register obj, 6217 Register var_size_in_bytes, 6218 int con_size_in_bytes, 6219 Register t1, 6220 Label& slow_case) { 6221 assert(obj == rax, "obj must be in rax, for cmpxchg"); 6222 assert_different_registers(obj, var_size_in_bytes, t1); 6223 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 6224 jmp(slow_case); 6225 } else { 6226 Register end = t1; 6227 Label retry; 6228 bind(retry); 6229 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 6230 movptr(obj, heap_top); 6231 if (var_size_in_bytes == noreg) { 6232 lea(end, Address(obj, con_size_in_bytes)); 6233 } else { 6234 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 6235 } 6236 // if end < obj then we wrapped around => object too long => slow case 6237 cmpptr(end, obj); 6238 jcc(Assembler::below, slow_case); 6239 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 6240 jcc(Assembler::above, slow_case); 6241 // Compare obj with the top addr, and if still equal, store the new top addr in 6242 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 6243 // it otherwise. Use lock prefix for atomicity on MPs. 6244 locked_cmpxchgptr(end, heap_top); 6245 jcc(Assembler::notEqual, retry); 6246 } 6247 } 6248 6249 void MacroAssembler::enter() { 6250 push(rbp); 6251 mov(rbp, rsp); 6252 } 6253 6254 void MacroAssembler::fcmp(Register tmp) { 6255 fcmp(tmp, 1, true, true); 6256 } 6257 6258 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 6259 assert(!pop_right || pop_left, "usage error"); 6260 if (VM_Version::supports_cmov()) { 6261 assert(tmp == noreg, "unneeded temp"); 6262 if (pop_left) { 6263 fucomip(index); 6264 } else { 6265 fucomi(index); 6266 } 6267 if (pop_right) { 6268 fpop(); 6269 } 6270 } else { 6271 assert(tmp != noreg, "need temp"); 6272 if (pop_left) { 6273 if (pop_right) { 6274 fcompp(); 6275 } else { 6276 fcomp(index); 6277 } 6278 } else { 6279 fcom(index); 6280 } 6281 // convert FPU condition into eflags condition via rax, 6282 save_rax(tmp); 6283 fwait(); fnstsw_ax(); 6284 sahf(); 6285 restore_rax(tmp); 6286 } 6287 // condition codes set as follows: 6288 // 6289 // CF (corresponds to C0) if x < y 6290 // PF (corresponds to C2) if unordered 6291 // ZF (corresponds to C3) if x = y 6292 } 6293 6294 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 6295 fcmp2int(dst, unordered_is_less, 1, true, true); 6296 } 6297 6298 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 6299 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 6300 Label L; 6301 if (unordered_is_less) { 6302 movl(dst, -1); 6303 jcc(Assembler::parity, L); 6304 jcc(Assembler::below , L); 6305 movl(dst, 0); 6306 jcc(Assembler::equal , L); 6307 increment(dst); 6308 } else { // unordered is greater 6309 movl(dst, 1); 6310 jcc(Assembler::parity, L); 6311 jcc(Assembler::above , L); 6312 movl(dst, 0); 6313 jcc(Assembler::equal , L); 6314 decrementl(dst); 6315 } 6316 bind(L); 6317 } 6318 6319 void MacroAssembler::fld_d(AddressLiteral src) { 6320 fld_d(as_Address(src)); 6321 } 6322 6323 void MacroAssembler::fld_s(AddressLiteral src) { 6324 fld_s(as_Address(src)); 6325 } 6326 6327 void MacroAssembler::fld_x(AddressLiteral src) { 6328 Assembler::fld_x(as_Address(src)); 6329 } 6330 6331 void MacroAssembler::fldcw(AddressLiteral src) { 6332 Assembler::fldcw(as_Address(src)); 6333 } 6334 6335 void MacroAssembler::fpop() { 6336 ffree(); 6337 fincstp(); 6338 } 6339 6340 void MacroAssembler::fremr(Register tmp) { 6341 save_rax(tmp); 6342 { Label L; 6343 bind(L); 6344 fprem(); 6345 fwait(); fnstsw_ax(); 6346 #ifdef _LP64 6347 testl(rax, 0x400); 6348 jcc(Assembler::notEqual, L); 6349 #else 6350 sahf(); 6351 jcc(Assembler::parity, L); 6352 #endif // _LP64 6353 } 6354 restore_rax(tmp); 6355 // Result is in ST0. 6356 // Note: fxch & fpop to get rid of ST1 6357 // (otherwise FPU stack could overflow eventually) 6358 fxch(1); 6359 fpop(); 6360 } 6361 6362 6363 void MacroAssembler::incrementl(AddressLiteral dst) { 6364 if (reachable(dst)) { 6365 incrementl(as_Address(dst)); 6366 } else { 6367 lea(rscratch1, dst); 6368 incrementl(Address(rscratch1, 0)); 6369 } 6370 } 6371 6372 void MacroAssembler::incrementl(ArrayAddress dst) { 6373 incrementl(as_Address(dst)); 6374 } 6375 6376 void MacroAssembler::incrementl(Register reg, int value) { 6377 if (value == min_jint) {addl(reg, value) ; return; } 6378 if (value < 0) { decrementl(reg, -value); return; } 6379 if (value == 0) { ; return; } 6380 if (value == 1 && UseIncDec) { incl(reg) ; return; } 6381 /* else */ { addl(reg, value) ; return; } 6382 } 6383 6384 void MacroAssembler::incrementl(Address dst, int value) { 6385 if (value == min_jint) {addl(dst, value) ; return; } 6386 if (value < 0) { decrementl(dst, -value); return; } 6387 if (value == 0) { ; return; } 6388 if (value == 1 && UseIncDec) { incl(dst) ; return; } 6389 /* else */ { addl(dst, value) ; return; } 6390 } 6391 6392 void MacroAssembler::jump(AddressLiteral dst) { 6393 if (reachable(dst)) { 6394 jmp_literal(dst.target(), dst.rspec()); 6395 } else { 6396 lea(rscratch1, dst); 6397 jmp(rscratch1); 6398 } 6399 } 6400 6401 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 6402 if (reachable(dst)) { 6403 InstructionMark im(this); 6404 relocate(dst.reloc()); 6405 const int short_size = 2; 6406 const int long_size = 6; 6407 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos); 6408 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 6409 // 0111 tttn #8-bit disp 6410 emit_byte(0x70 | cc); 6411 emit_byte((offs - short_size) & 0xFF); 6412 } else { 6413 // 0000 1111 1000 tttn #32-bit disp 6414 emit_byte(0x0F); 6415 emit_byte(0x80 | cc); 6416 emit_long(offs - long_size); 6417 } 6418 } else { 6419 #ifdef ASSERT 6420 warning("reversing conditional branch"); 6421 #endif /* ASSERT */ 6422 Label skip; 6423 jccb(reverse[cc], skip); 6424 lea(rscratch1, dst); 6425 Assembler::jmp(rscratch1); 6426 bind(skip); 6427 } 6428 } 6429 6430 void MacroAssembler::ldmxcsr(AddressLiteral src) { 6431 if (reachable(src)) { 6432 Assembler::ldmxcsr(as_Address(src)); 6433 } else { 6434 lea(rscratch1, src); 6435 Assembler::ldmxcsr(Address(rscratch1, 0)); 6436 } 6437 } 6438 6439 int MacroAssembler::load_signed_byte(Register dst, Address src) { 6440 int off; 6441 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6442 off = offset(); 6443 movsbl(dst, src); // movsxb 6444 } else { 6445 off = load_unsigned_byte(dst, src); 6446 shll(dst, 24); 6447 sarl(dst, 24); 6448 } 6449 return off; 6450 } 6451 6452 // Note: load_signed_short used to be called load_signed_word. 6453 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler 6454 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. 6455 // The term "word" in HotSpot means a 32- or 64-bit machine word. 6456 int MacroAssembler::load_signed_short(Register dst, Address src) { 6457 int off; 6458 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6459 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 6460 // version but this is what 64bit has always done. This seems to imply 6461 // that users are only using 32bits worth. 6462 off = offset(); 6463 movswl(dst, src); // movsxw 6464 } else { 6465 off = load_unsigned_short(dst, src); 6466 shll(dst, 16); 6467 sarl(dst, 16); 6468 } 6469 return off; 6470 } 6471 6472 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 6473 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 6474 // and "3.9 Partial Register Penalties", p. 22). 6475 int off; 6476 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 6477 off = offset(); 6478 movzbl(dst, src); // movzxb 6479 } else { 6480 xorl(dst, dst); 6481 off = offset(); 6482 movb(dst, src); 6483 } 6484 return off; 6485 } 6486 6487 // Note: load_unsigned_short used to be called load_unsigned_word. 6488 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 6489 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 6490 // and "3.9 Partial Register Penalties", p. 22). 6491 int off; 6492 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 6493 off = offset(); 6494 movzwl(dst, src); // movzxw 6495 } else { 6496 xorl(dst, dst); 6497 off = offset(); 6498 movw(dst, src); 6499 } 6500 return off; 6501 } 6502 6503 void MacroAssembler::load_sized_value(Register dst, Address src, 6504 size_t size_in_bytes, bool is_signed) { 6505 switch (size_in_bytes) { 6506 #ifndef _LP64 6507 // For case 8, caller is responsible for manually loading 6508 // the second word into another register. 6509 case 8: movl(dst, src); break; 6510 #else 6511 case 8: movq(dst, src); break; 6512 #endif 6513 case 4: movl(dst, src); break; 6514 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 6515 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 6516 default: ShouldNotReachHere(); 6517 } 6518 } 6519 6520 void MacroAssembler::mov32(AddressLiteral dst, Register src) { 6521 if (reachable(dst)) { 6522 movl(as_Address(dst), src); 6523 } else { 6524 lea(rscratch1, dst); 6525 movl(Address(rscratch1, 0), src); 6526 } 6527 } 6528 6529 void MacroAssembler::mov32(Register dst, AddressLiteral src) { 6530 if (reachable(src)) { 6531 movl(dst, as_Address(src)); 6532 } else { 6533 lea(rscratch1, src); 6534 movl(dst, Address(rscratch1, 0)); 6535 } 6536 } 6537 6538 // C++ bool manipulation 6539 6540 void MacroAssembler::movbool(Register dst, Address src) { 6541 if(sizeof(bool) == 1) 6542 movb(dst, src); 6543 else if(sizeof(bool) == 2) 6544 movw(dst, src); 6545 else if(sizeof(bool) == 4) 6546 movl(dst, src); 6547 else 6548 // unsupported 6549 ShouldNotReachHere(); 6550 } 6551 6552 void MacroAssembler::movbool(Address dst, bool boolconst) { 6553 if(sizeof(bool) == 1) 6554 movb(dst, (int) boolconst); 6555 else if(sizeof(bool) == 2) 6556 movw(dst, (int) boolconst); 6557 else if(sizeof(bool) == 4) 6558 movl(dst, (int) boolconst); 6559 else 6560 // unsupported 6561 ShouldNotReachHere(); 6562 } 6563 6564 void MacroAssembler::movbool(Address dst, Register src) { 6565 if(sizeof(bool) == 1) 6566 movb(dst, src); 6567 else if(sizeof(bool) == 2) 6568 movw(dst, src); 6569 else if(sizeof(bool) == 4) 6570 movl(dst, src); 6571 else 6572 // unsupported 6573 ShouldNotReachHere(); 6574 } 6575 6576 void MacroAssembler::movbyte(ArrayAddress dst, int src) { 6577 movb(as_Address(dst), src); 6578 } 6579 6580 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 6581 if (reachable(src)) { 6582 if (UseXmmLoadAndClearUpper) { 6583 movsd (dst, as_Address(src)); 6584 } else { 6585 movlpd(dst, as_Address(src)); 6586 } 6587 } else { 6588 lea(rscratch1, src); 6589 if (UseXmmLoadAndClearUpper) { 6590 movsd (dst, Address(rscratch1, 0)); 6591 } else { 6592 movlpd(dst, Address(rscratch1, 0)); 6593 } 6594 } 6595 } 6596 6597 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 6598 if (reachable(src)) { 6599 movss(dst, as_Address(src)); 6600 } else { 6601 lea(rscratch1, src); 6602 movss(dst, Address(rscratch1, 0)); 6603 } 6604 } 6605 6606 void MacroAssembler::movptr(Register dst, Register src) { 6607 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6608 } 6609 6610 void MacroAssembler::movptr(Register dst, Address src) { 6611 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6612 } 6613 6614 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 6615 void MacroAssembler::movptr(Register dst, intptr_t src) { 6616 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 6617 } 6618 6619 void MacroAssembler::movptr(Address dst, Register src) { 6620 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 6621 } 6622 6623 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 6624 if (reachable(src)) { 6625 movss(dst, as_Address(src)); 6626 } else { 6627 lea(rscratch1, src); 6628 movss(dst, Address(rscratch1, 0)); 6629 } 6630 } 6631 6632 void MacroAssembler::null_check(Register reg, int offset) { 6633 if (needs_explicit_null_check(offset)) { 6634 // provoke OS NULL exception if reg = NULL by 6635 // accessing M[reg] w/o changing any (non-CC) registers 6636 // NOTE: cmpl is plenty here to provoke a segv 6637 cmpptr(rax, Address(reg, 0)); 6638 // Note: should probably use testl(rax, Address(reg, 0)); 6639 // may be shorter code (however, this version of 6640 // testl needs to be implemented first) 6641 } else { 6642 // nothing to do, (later) access of M[reg + offset] 6643 // will provoke OS NULL exception if reg = NULL 6644 } 6645 } 6646 6647 void MacroAssembler::os_breakpoint() { 6648 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 6649 // (e.g., MSVC can't call ps() otherwise) 6650 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 6651 } 6652 6653 void MacroAssembler::pop_CPU_state() { 6654 pop_FPU_state(); 6655 pop_IU_state(); 6656 } 6657 6658 void MacroAssembler::pop_FPU_state() { 6659 NOT_LP64(frstor(Address(rsp, 0));) 6660 LP64_ONLY(fxrstor(Address(rsp, 0));) 6661 addptr(rsp, FPUStateSizeInWords * wordSize); 6662 } 6663 6664 void MacroAssembler::pop_IU_state() { 6665 popa(); 6666 LP64_ONLY(addq(rsp, 8)); 6667 popf(); 6668 } 6669 6670 // Save Integer and Float state 6671 // Warning: Stack must be 16 byte aligned (64bit) 6672 void MacroAssembler::push_CPU_state() { 6673 push_IU_state(); 6674 push_FPU_state(); 6675 } 6676 6677 void MacroAssembler::push_FPU_state() { 6678 subptr(rsp, FPUStateSizeInWords * wordSize); 6679 #ifndef _LP64 6680 fnsave(Address(rsp, 0)); 6681 fwait(); 6682 #else 6683 fxsave(Address(rsp, 0)); 6684 #endif // LP64 6685 } 6686 6687 void MacroAssembler::push_IU_state() { 6688 // Push flags first because pusha kills them 6689 pushf(); 6690 // Make sure rsp stays 16-byte aligned 6691 LP64_ONLY(subq(rsp, 8)); 6692 pusha(); 6693 } 6694 6695 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 6696 // determine java_thread register 6697 if (!java_thread->is_valid()) { 6698 java_thread = rdi; 6699 get_thread(java_thread); 6700 } 6701 // we must set sp to zero to clear frame 6702 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 6703 if (clear_fp) { 6704 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 6705 } 6706 6707 if (clear_pc) 6708 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 6709 6710 } 6711 6712 void MacroAssembler::restore_rax(Register tmp) { 6713 if (tmp == noreg) pop(rax); 6714 else if (tmp != rax) mov(rax, tmp); 6715 } 6716 6717 void MacroAssembler::round_to(Register reg, int modulus) { 6718 addptr(reg, modulus - 1); 6719 andptr(reg, -modulus); 6720 } 6721 6722 void MacroAssembler::save_rax(Register tmp) { 6723 if (tmp == noreg) push(rax); 6724 else if (tmp != rax) mov(tmp, rax); 6725 } 6726 6727 // Write serialization page so VM thread can do a pseudo remote membar. 6728 // We use the current thread pointer to calculate a thread specific 6729 // offset to write to within the page. This minimizes bus traffic 6730 // due to cache line collision. 6731 void MacroAssembler::serialize_memory(Register thread, Register tmp) { 6732 movl(tmp, thread); 6733 shrl(tmp, os::get_serialize_page_shift_count()); 6734 andl(tmp, (os::vm_page_size() - sizeof(int))); 6735 6736 Address index(noreg, tmp, Address::times_1); 6737 ExternalAddress page(os::get_memory_serialize_page()); 6738 6739 // Size of store must match masking code above 6740 movl(as_Address(ArrayAddress(page, index)), tmp); 6741 } 6742 6743 // Calls to C land 6744 // 6745 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded 6746 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 6747 // has to be reset to 0. This is required to allow proper stack traversal. 6748 void MacroAssembler::set_last_Java_frame(Register java_thread, 6749 Register last_java_sp, 6750 Register last_java_fp, 6751 address last_java_pc) { 6752 // determine java_thread register 6753 if (!java_thread->is_valid()) { 6754 java_thread = rdi; 6755 get_thread(java_thread); 6756 } 6757 // determine last_java_sp register 6758 if (!last_java_sp->is_valid()) { 6759 last_java_sp = rsp; 6760 } 6761 6762 // last_java_fp is optional 6763 6764 if (last_java_fp->is_valid()) { 6765 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 6766 } 6767 6768 // last_java_pc is optional 6769 6770 if (last_java_pc != NULL) { 6771 lea(Address(java_thread, 6772 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 6773 InternalAddress(last_java_pc)); 6774 6775 } 6776 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 6777 } 6778 6779 void MacroAssembler::shlptr(Register dst, int imm8) { 6780 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 6781 } 6782 6783 void MacroAssembler::shrptr(Register dst, int imm8) { 6784 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 6785 } 6786 6787 void MacroAssembler::sign_extend_byte(Register reg) { 6788 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 6789 movsbl(reg, reg); // movsxb 6790 } else { 6791 shll(reg, 24); 6792 sarl(reg, 24); 6793 } 6794 } 6795 6796 void MacroAssembler::sign_extend_short(Register reg) { 6797 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 6798 movswl(reg, reg); // movsxw 6799 } else { 6800 shll(reg, 16); 6801 sarl(reg, 16); 6802 } 6803 } 6804 6805 ////////////////////////////////////////////////////////////////////////////////// 6806 #ifndef SERIALGC 6807 6808 void MacroAssembler::g1_write_barrier_pre(Register obj, 6809 #ifndef _LP64 6810 Register thread, 6811 #endif 6812 Register tmp, 6813 Register tmp2, 6814 bool tosca_live) { 6815 LP64_ONLY(Register thread = r15_thread;) 6816 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 6817 PtrQueue::byte_offset_of_active())); 6818 6819 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 6820 PtrQueue::byte_offset_of_index())); 6821 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 6822 PtrQueue::byte_offset_of_buf())); 6823 6824 6825 Label done; 6826 Label runtime; 6827 6828 // if (!marking_in_progress) goto done; 6829 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 6830 cmpl(in_progress, 0); 6831 } else { 6832 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 6833 cmpb(in_progress, 0); 6834 } 6835 jcc(Assembler::equal, done); 6836 6837 // if (x.f == NULL) goto done; 6838 #ifdef _LP64 6839 load_heap_oop(tmp2, Address(obj, 0)); 6840 #else 6841 movptr(tmp2, Address(obj, 0)); 6842 #endif 6843 cmpptr(tmp2, (int32_t) NULL_WORD); 6844 jcc(Assembler::equal, done); 6845 6846 // Can we store original value in the thread's buffer? 6847 6848 #ifdef _LP64 6849 movslq(tmp, index); 6850 cmpq(tmp, 0); 6851 #else 6852 cmpl(index, 0); 6853 #endif 6854 jcc(Assembler::equal, runtime); 6855 #ifdef _LP64 6856 subq(tmp, wordSize); 6857 movl(index, tmp); 6858 addq(tmp, buffer); 6859 #else 6860 subl(index, wordSize); 6861 movl(tmp, buffer); 6862 addl(tmp, index); 6863 #endif 6864 movptr(Address(tmp, 0), tmp2); 6865 jmp(done); 6866 bind(runtime); 6867 // save the live input values 6868 if(tosca_live) push(rax); 6869 push(obj); 6870 #ifdef _LP64 6871 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, r15_thread); 6872 #else 6873 push(thread); 6874 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread); 6875 pop(thread); 6876 #endif 6877 pop(obj); 6878 if(tosca_live) pop(rax); 6879 bind(done); 6880 6881 } 6882 6883 void MacroAssembler::g1_write_barrier_post(Register store_addr, 6884 Register new_val, 6885 #ifndef _LP64 6886 Register thread, 6887 #endif 6888 Register tmp, 6889 Register tmp2) { 6890 6891 LP64_ONLY(Register thread = r15_thread;) 6892 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 6893 PtrQueue::byte_offset_of_index())); 6894 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 6895 PtrQueue::byte_offset_of_buf())); 6896 BarrierSet* bs = Universe::heap()->barrier_set(); 6897 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 6898 Label done; 6899 Label runtime; 6900 6901 // Does store cross heap regions? 6902 6903 movptr(tmp, store_addr); 6904 xorptr(tmp, new_val); 6905 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 6906 jcc(Assembler::equal, done); 6907 6908 // crosses regions, storing NULL? 6909 6910 cmpptr(new_val, (int32_t) NULL_WORD); 6911 jcc(Assembler::equal, done); 6912 6913 // storing region crossing non-NULL, is card already dirty? 6914 6915 ExternalAddress cardtable((address) ct->byte_map_base); 6916 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 6917 #ifdef _LP64 6918 const Register card_addr = tmp; 6919 6920 movq(card_addr, store_addr); 6921 shrq(card_addr, CardTableModRefBS::card_shift); 6922 6923 lea(tmp2, cardtable); 6924 6925 // get the address of the card 6926 addq(card_addr, tmp2); 6927 #else 6928 const Register card_index = tmp; 6929 6930 movl(card_index, store_addr); 6931 shrl(card_index, CardTableModRefBS::card_shift); 6932 6933 Address index(noreg, card_index, Address::times_1); 6934 const Register card_addr = tmp; 6935 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 6936 #endif 6937 cmpb(Address(card_addr, 0), 0); 6938 jcc(Assembler::equal, done); 6939 6940 // storing a region crossing, non-NULL oop, card is clean. 6941 // dirty card and log. 6942 6943 movb(Address(card_addr, 0), 0); 6944 6945 cmpl(queue_index, 0); 6946 jcc(Assembler::equal, runtime); 6947 subl(queue_index, wordSize); 6948 movptr(tmp2, buffer); 6949 #ifdef _LP64 6950 movslq(rscratch1, queue_index); 6951 addq(tmp2, rscratch1); 6952 movq(Address(tmp2, 0), card_addr); 6953 #else 6954 addl(tmp2, queue_index); 6955 movl(Address(tmp2, 0), card_index); 6956 #endif 6957 jmp(done); 6958 6959 bind(runtime); 6960 // save the live input values 6961 push(store_addr); 6962 push(new_val); 6963 #ifdef _LP64 6964 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 6965 #else 6966 push(thread); 6967 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 6968 pop(thread); 6969 #endif 6970 pop(new_val); 6971 pop(store_addr); 6972 6973 bind(done); 6974 6975 } 6976 6977 #endif // SERIALGC 6978 ////////////////////////////////////////////////////////////////////////////////// 6979 6980 6981 void MacroAssembler::store_check(Register obj) { 6982 // Does a store check for the oop in register obj. The content of 6983 // register obj is destroyed afterwards. 6984 store_check_part_1(obj); 6985 store_check_part_2(obj); 6986 } 6987 6988 void MacroAssembler::store_check(Register obj, Address dst) { 6989 store_check(obj); 6990 } 6991 6992 6993 // split the store check operation so that other instructions can be scheduled inbetween 6994 void MacroAssembler::store_check_part_1(Register obj) { 6995 BarrierSet* bs = Universe::heap()->barrier_set(); 6996 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 6997 shrptr(obj, CardTableModRefBS::card_shift); 6998 } 6999 7000 void MacroAssembler::store_check_part_2(Register obj) { 7001 BarrierSet* bs = Universe::heap()->barrier_set(); 7002 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 7003 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 7004 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 7005 7006 // The calculation for byte_map_base is as follows: 7007 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 7008 // So this essentially converts an address to a displacement and 7009 // it will never need to be relocated. On 64bit however the value may be too 7010 // large for a 32bit displacement 7011 7012 intptr_t disp = (intptr_t) ct->byte_map_base; 7013 if (is_simm32(disp)) { 7014 Address cardtable(noreg, obj, Address::times_1, disp); 7015 movb(cardtable, 0); 7016 } else { 7017 // By doing it as an ExternalAddress disp could be converted to a rip-relative 7018 // displacement and done in a single instruction given favorable mapping and 7019 // a smarter version of as_Address. Worst case it is two instructions which 7020 // is no worse off then loading disp into a register and doing as a simple 7021 // Address() as above. 7022 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 7023 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 7024 // in some cases we'll get a single instruction version. 7025 7026 ExternalAddress cardtable((address)disp); 7027 Address index(noreg, obj, Address::times_1); 7028 movb(as_Address(ArrayAddress(cardtable, index)), 0); 7029 } 7030 } 7031 7032 void MacroAssembler::subptr(Register dst, int32_t imm32) { 7033 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 7034 } 7035 7036 void MacroAssembler::subptr(Register dst, Register src) { 7037 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 7038 } 7039 7040 void MacroAssembler::test32(Register src1, AddressLiteral src2) { 7041 // src2 must be rval 7042 7043 if (reachable(src2)) { 7044 testl(src1, as_Address(src2)); 7045 } else { 7046 lea(rscratch1, src2); 7047 testl(src1, Address(rscratch1, 0)); 7048 } 7049 } 7050 7051 // C++ bool manipulation 7052 void MacroAssembler::testbool(Register dst) { 7053 if(sizeof(bool) == 1) 7054 testb(dst, 0xff); 7055 else if(sizeof(bool) == 2) { 7056 // testw implementation needed for two byte bools 7057 ShouldNotReachHere(); 7058 } else if(sizeof(bool) == 4) 7059 testl(dst, dst); 7060 else 7061 // unsupported 7062 ShouldNotReachHere(); 7063 } 7064 7065 void MacroAssembler::testptr(Register dst, Register src) { 7066 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 7067 } 7068 7069 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 7070 void MacroAssembler::tlab_allocate(Register obj, 7071 Register var_size_in_bytes, 7072 int con_size_in_bytes, 7073 Register t1, 7074 Register t2, 7075 Label& slow_case) { 7076 assert_different_registers(obj, t1, t2); 7077 assert_different_registers(obj, var_size_in_bytes, t1); 7078 Register end = t2; 7079 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 7080 7081 verify_tlab(); 7082 7083 NOT_LP64(get_thread(thread)); 7084 7085 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 7086 if (var_size_in_bytes == noreg) { 7087 lea(end, Address(obj, con_size_in_bytes)); 7088 } else { 7089 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 7090 } 7091 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 7092 jcc(Assembler::above, slow_case); 7093 7094 // update the tlab top pointer 7095 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 7096 7097 // recover var_size_in_bytes if necessary 7098 if (var_size_in_bytes == end) { 7099 subptr(var_size_in_bytes, obj); 7100 } 7101 verify_tlab(); 7102 } 7103 7104 // Preserves rbx, and rdx. 7105 void MacroAssembler::tlab_refill(Label& retry, 7106 Label& try_eden, 7107 Label& slow_case) { 7108 Register top = rax; 7109 Register t1 = rcx; 7110 Register t2 = rsi; 7111 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 7112 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 7113 Label do_refill, discard_tlab; 7114 7115 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 7116 // No allocation in the shared eden. 7117 jmp(slow_case); 7118 } 7119 7120 NOT_LP64(get_thread(thread_reg)); 7121 7122 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 7123 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 7124 7125 // calculate amount of free space 7126 subptr(t1, top); 7127 shrptr(t1, LogHeapWordSize); 7128 7129 // Retain tlab and allocate object in shared space if 7130 // the amount free in the tlab is too large to discard. 7131 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 7132 jcc(Assembler::lessEqual, discard_tlab); 7133 7134 // Retain 7135 // %%% yuck as movptr... 7136 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 7137 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 7138 if (TLABStats) { 7139 // increment number of slow_allocations 7140 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 7141 } 7142 jmp(try_eden); 7143 7144 bind(discard_tlab); 7145 if (TLABStats) { 7146 // increment number of refills 7147 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 7148 // accumulate wastage -- t1 is amount free in tlab 7149 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 7150 } 7151 7152 // if tlab is currently allocated (top or end != null) then 7153 // fill [top, end + alignment_reserve) with array object 7154 testptr (top, top); 7155 jcc(Assembler::zero, do_refill); 7156 7157 // set up the mark word 7158 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 7159 // set the length to the remaining space 7160 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 7161 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 7162 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 7163 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 7164 // set klass to intArrayKlass 7165 // dubious reloc why not an oop reloc? 7166 movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr())); 7167 // store klass last. concurrent gcs assumes klass length is valid if 7168 // klass field is not null. 7169 store_klass(top, t1); 7170 7171 // refill the tlab with an eden allocation 7172 bind(do_refill); 7173 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 7174 shlptr(t1, LogHeapWordSize); 7175 // add object_size ?? 7176 eden_allocate(top, t1, 0, t2, slow_case); 7177 7178 // Check that t1 was preserved in eden_allocate. 7179 #ifdef ASSERT 7180 if (UseTLAB) { 7181 Label ok; 7182 Register tsize = rsi; 7183 assert_different_registers(tsize, thread_reg, t1); 7184 push(tsize); 7185 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 7186 shlptr(tsize, LogHeapWordSize); 7187 cmpptr(t1, tsize); 7188 jcc(Assembler::equal, ok); 7189 stop("assert(t1 != tlab size)"); 7190 should_not_reach_here(); 7191 7192 bind(ok); 7193 pop(tsize); 7194 } 7195 #endif 7196 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 7197 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 7198 addptr(top, t1); 7199 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 7200 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 7201 verify_tlab(); 7202 jmp(retry); 7203 } 7204 7205 static const double pi_4 = 0.7853981633974483; 7206 7207 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 7208 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 7209 // was attempted in this code; unfortunately it appears that the 7210 // switch to 80-bit precision and back causes this to be 7211 // unprofitable compared with simply performing a runtime call if 7212 // the argument is out of the (-pi/4, pi/4) range. 7213 7214 Register tmp = noreg; 7215 if (!VM_Version::supports_cmov()) { 7216 // fcmp needs a temporary so preserve rbx, 7217 tmp = rbx; 7218 push(tmp); 7219 } 7220 7221 Label slow_case, done; 7222 7223 ExternalAddress pi4_adr = (address)&pi_4; 7224 if (reachable(pi4_adr)) { 7225 // x ?<= pi/4 7226 fld_d(pi4_adr); 7227 fld_s(1); // Stack: X PI/4 X 7228 fabs(); // Stack: |X| PI/4 X 7229 fcmp(tmp); 7230 jcc(Assembler::above, slow_case); 7231 7232 // fastest case: -pi/4 <= x <= pi/4 7233 switch(trig) { 7234 case 's': 7235 fsin(); 7236 break; 7237 case 'c': 7238 fcos(); 7239 break; 7240 case 't': 7241 ftan(); 7242 break; 7243 default: 7244 assert(false, "bad intrinsic"); 7245 break; 7246 } 7247 jmp(done); 7248 } 7249 7250 // slow case: runtime call 7251 bind(slow_case); 7252 // Preserve registers across runtime call 7253 pusha(); 7254 int incoming_argument_and_return_value_offset = -1; 7255 if (num_fpu_regs_in_use > 1) { 7256 // Must preserve all other FPU regs (could alternatively convert 7257 // SharedRuntime::dsin and dcos into assembly routines known not to trash 7258 // FPU state, but can not trust C compiler) 7259 NEEDS_CLEANUP; 7260 // NOTE that in this case we also push the incoming argument to 7261 // the stack and restore it later; we also use this stack slot to 7262 // hold the return value from dsin or dcos. 7263 for (int i = 0; i < num_fpu_regs_in_use; i++) { 7264 subptr(rsp, sizeof(jdouble)); 7265 fstp_d(Address(rsp, 0)); 7266 } 7267 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 7268 fld_d(Address(rsp, incoming_argument_and_return_value_offset)); 7269 } 7270 subptr(rsp, sizeof(jdouble)); 7271 fstp_d(Address(rsp, 0)); 7272 #ifdef _LP64 7273 movdbl(xmm0, Address(rsp, 0)); 7274 #endif // _LP64 7275 7276 // NOTE: we must not use call_VM_leaf here because that requires a 7277 // complete interpreter frame in debug mode -- same bug as 4387334 7278 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 7279 // do proper 64bit abi 7280 7281 NEEDS_CLEANUP; 7282 // Need to add stack banging before this runtime call if it needs to 7283 // be taken; however, there is no generic stack banging routine at 7284 // the MacroAssembler level 7285 switch(trig) { 7286 case 's': 7287 { 7288 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0); 7289 } 7290 break; 7291 case 'c': 7292 { 7293 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0); 7294 } 7295 break; 7296 case 't': 7297 { 7298 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0); 7299 } 7300 break; 7301 default: 7302 assert(false, "bad intrinsic"); 7303 break; 7304 } 7305 #ifdef _LP64 7306 movsd(Address(rsp, 0), xmm0); 7307 fld_d(Address(rsp, 0)); 7308 #endif // _LP64 7309 addptr(rsp, sizeof(jdouble)); 7310 if (num_fpu_regs_in_use > 1) { 7311 // Must save return value to stack and then restore entire FPU stack 7312 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 7313 for (int i = 0; i < num_fpu_regs_in_use; i++) { 7314 fld_d(Address(rsp, 0)); 7315 addptr(rsp, sizeof(jdouble)); 7316 } 7317 } 7318 popa(); 7319 7320 // Come here with result in F-TOS 7321 bind(done); 7322 7323 if (tmp != noreg) { 7324 pop(tmp); 7325 } 7326 } 7327 7328 7329 // Look up the method for a megamorphic invokeinterface call. 7330 // The target method is determined by <intf_klass, itable_index>. 7331 // The receiver klass is in recv_klass. 7332 // On success, the result will be in method_result, and execution falls through. 7333 // On failure, execution transfers to the given label. 7334 void MacroAssembler::lookup_interface_method(Register recv_klass, 7335 Register intf_klass, 7336 RegisterOrConstant itable_index, 7337 Register method_result, 7338 Register scan_temp, 7339 Label& L_no_such_interface) { 7340 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 7341 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 7342 "caller must use same register for non-constant itable index as for method"); 7343 7344 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 7345 int vtable_base = instanceKlass::vtable_start_offset() * wordSize; 7346 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 7347 int scan_step = itableOffsetEntry::size() * wordSize; 7348 int vte_size = vtableEntry::size() * wordSize; 7349 Address::ScaleFactor times_vte_scale = Address::times_ptr; 7350 assert(vte_size == wordSize, "else adjust times_vte_scale"); 7351 7352 movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize)); 7353 7354 // %%% Could store the aligned, prescaled offset in the klassoop. 7355 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 7356 if (HeapWordsPerLong > 1) { 7357 // Round up to align_object_offset boundary 7358 // see code for instanceKlass::start_of_itable! 7359 round_to(scan_temp, BytesPerLong); 7360 } 7361 7362 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 7363 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 7364 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 7365 7366 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 7367 // if (scan->interface() == intf) { 7368 // result = (klass + scan->offset() + itable_index); 7369 // } 7370 // } 7371 Label search, found_method; 7372 7373 for (int peel = 1; peel >= 0; peel--) { 7374 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 7375 cmpptr(intf_klass, method_result); 7376 7377 if (peel) { 7378 jccb(Assembler::equal, found_method); 7379 } else { 7380 jccb(Assembler::notEqual, search); 7381 // (invert the test to fall through to found_method...) 7382 } 7383 7384 if (!peel) break; 7385 7386 bind(search); 7387 7388 // Check that the previous entry is non-null. A null entry means that 7389 // the receiver class doesn't implement the interface, and wasn't the 7390 // same as when the caller was compiled. 7391 testptr(method_result, method_result); 7392 jcc(Assembler::zero, L_no_such_interface); 7393 addptr(scan_temp, scan_step); 7394 } 7395 7396 bind(found_method); 7397 7398 // Got a hit. 7399 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 7400 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 7401 } 7402 7403 7404 void MacroAssembler::check_klass_subtype(Register sub_klass, 7405 Register super_klass, 7406 Register temp_reg, 7407 Label& L_success) { 7408 Label L_failure; 7409 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 7410 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 7411 bind(L_failure); 7412 } 7413 7414 7415 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 7416 Register super_klass, 7417 Register temp_reg, 7418 Label* L_success, 7419 Label* L_failure, 7420 Label* L_slow_path, 7421 RegisterOrConstant super_check_offset) { 7422 assert_different_registers(sub_klass, super_klass, temp_reg); 7423 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 7424 if (super_check_offset.is_register()) { 7425 assert_different_registers(sub_klass, super_klass, 7426 super_check_offset.as_register()); 7427 } else if (must_load_sco) { 7428 assert(temp_reg != noreg, "supply either a temp or a register offset"); 7429 } 7430 7431 Label L_fallthrough; 7432 int label_nulls = 0; 7433 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 7434 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 7435 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 7436 assert(label_nulls <= 1, "at most one NULL in the batch"); 7437 7438 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 7439 Klass::secondary_super_cache_offset_in_bytes()); 7440 int sco_offset = (klassOopDesc::header_size() * HeapWordSize + 7441 Klass::super_check_offset_offset_in_bytes()); 7442 Address super_check_offset_addr(super_klass, sco_offset); 7443 7444 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 7445 // range of a jccb. If this routine grows larger, reconsider at 7446 // least some of these. 7447 #define local_jcc(assembler_cond, label) \ 7448 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 7449 else jcc( assembler_cond, label) /*omit semi*/ 7450 7451 // Hacked jmp, which may only be used just before L_fallthrough. 7452 #define final_jmp(label) \ 7453 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 7454 else jmp(label) /*omit semi*/ 7455 7456 // If the pointers are equal, we are done (e.g., String[] elements). 7457 // This self-check enables sharing of secondary supertype arrays among 7458 // non-primary types such as array-of-interface. Otherwise, each such 7459 // type would need its own customized SSA. 7460 // We move this check to the front of the fast path because many 7461 // type checks are in fact trivially successful in this manner, 7462 // so we get a nicely predicted branch right at the start of the check. 7463 cmpptr(sub_klass, super_klass); 7464 local_jcc(Assembler::equal, *L_success); 7465 7466 // Check the supertype display: 7467 if (must_load_sco) { 7468 // Positive movl does right thing on LP64. 7469 movl(temp_reg, super_check_offset_addr); 7470 super_check_offset = RegisterOrConstant(temp_reg); 7471 } 7472 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 7473 cmpptr(super_klass, super_check_addr); // load displayed supertype 7474 7475 // This check has worked decisively for primary supers. 7476 // Secondary supers are sought in the super_cache ('super_cache_addr'). 7477 // (Secondary supers are interfaces and very deeply nested subtypes.) 7478 // This works in the same check above because of a tricky aliasing 7479 // between the super_cache and the primary super display elements. 7480 // (The 'super_check_addr' can address either, as the case requires.) 7481 // Note that the cache is updated below if it does not help us find 7482 // what we need immediately. 7483 // So if it was a primary super, we can just fail immediately. 7484 // Otherwise, it's the slow path for us (no success at this point). 7485 7486 if (super_check_offset.is_register()) { 7487 local_jcc(Assembler::equal, *L_success); 7488 cmpl(super_check_offset.as_register(), sc_offset); 7489 if (L_failure == &L_fallthrough) { 7490 local_jcc(Assembler::equal, *L_slow_path); 7491 } else { 7492 local_jcc(Assembler::notEqual, *L_failure); 7493 final_jmp(*L_slow_path); 7494 } 7495 } else if (super_check_offset.as_constant() == sc_offset) { 7496 // Need a slow path; fast failure is impossible. 7497 if (L_slow_path == &L_fallthrough) { 7498 local_jcc(Assembler::equal, *L_success); 7499 } else { 7500 local_jcc(Assembler::notEqual, *L_slow_path); 7501 final_jmp(*L_success); 7502 } 7503 } else { 7504 // No slow path; it's a fast decision. 7505 if (L_failure == &L_fallthrough) { 7506 local_jcc(Assembler::equal, *L_success); 7507 } else { 7508 local_jcc(Assembler::notEqual, *L_failure); 7509 final_jmp(*L_success); 7510 } 7511 } 7512 7513 bind(L_fallthrough); 7514 7515 #undef local_jcc 7516 #undef final_jmp 7517 } 7518 7519 7520 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 7521 Register super_klass, 7522 Register temp_reg, 7523 Register temp2_reg, 7524 Label* L_success, 7525 Label* L_failure, 7526 bool set_cond_codes) { 7527 assert_different_registers(sub_klass, super_klass, temp_reg); 7528 if (temp2_reg != noreg) 7529 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 7530 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 7531 7532 Label L_fallthrough; 7533 int label_nulls = 0; 7534 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 7535 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 7536 assert(label_nulls <= 1, "at most one NULL in the batch"); 7537 7538 // a couple of useful fields in sub_klass: 7539 int ss_offset = (klassOopDesc::header_size() * HeapWordSize + 7540 Klass::secondary_supers_offset_in_bytes()); 7541 int sc_offset = (klassOopDesc::header_size() * HeapWordSize + 7542 Klass::secondary_super_cache_offset_in_bytes()); 7543 Address secondary_supers_addr(sub_klass, ss_offset); 7544 Address super_cache_addr( sub_klass, sc_offset); 7545 7546 // Do a linear scan of the secondary super-klass chain. 7547 // This code is rarely used, so simplicity is a virtue here. 7548 // The repne_scan instruction uses fixed registers, which we must spill. 7549 // Don't worry too much about pre-existing connections with the input regs. 7550 7551 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 7552 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 7553 7554 // Get super_klass value into rax (even if it was in rdi or rcx). 7555 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 7556 if (super_klass != rax || UseCompressedOops) { 7557 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 7558 mov(rax, super_klass); 7559 } 7560 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 7561 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 7562 7563 #ifndef PRODUCT 7564 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 7565 ExternalAddress pst_counter_addr((address) pst_counter); 7566 NOT_LP64( incrementl(pst_counter_addr) ); 7567 LP64_ONLY( lea(rcx, pst_counter_addr) ); 7568 LP64_ONLY( incrementl(Address(rcx, 0)) ); 7569 #endif //PRODUCT 7570 7571 // We will consult the secondary-super array. 7572 movptr(rdi, secondary_supers_addr); 7573 // Load the array length. (Positive movl does right thing on LP64.) 7574 movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 7575 // Skip to start of data. 7576 addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 7577 7578 // Scan RCX words at [RDI] for an occurrence of RAX. 7579 // Set NZ/Z based on last compare. 7580 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 7581 // not change flags (only scas instruction which is repeated sets flags). 7582 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 7583 #ifdef _LP64 7584 // This part is tricky, as values in supers array could be 32 or 64 bit wide 7585 // and we store values in objArrays always encoded, thus we need to encode 7586 // the value of rax before repne. Note that rax is dead after the repne. 7587 if (UseCompressedOops) { 7588 encode_heap_oop_not_null(rax); // Changes flags. 7589 // The superclass is never null; it would be a basic system error if a null 7590 // pointer were to sneak in here. Note that we have already loaded the 7591 // Klass::super_check_offset from the super_klass in the fast path, 7592 // so if there is a null in that register, we are already in the afterlife. 7593 testl(rax,rax); // Set Z = 0 7594 repne_scanl(); 7595 } else 7596 #endif // _LP64 7597 { 7598 testptr(rax,rax); // Set Z = 0 7599 repne_scan(); 7600 } 7601 // Unspill the temp. registers: 7602 if (pushed_rdi) pop(rdi); 7603 if (pushed_rcx) pop(rcx); 7604 if (pushed_rax) pop(rax); 7605 7606 if (set_cond_codes) { 7607 // Special hack for the AD files: rdi is guaranteed non-zero. 7608 assert(!pushed_rdi, "rdi must be left non-NULL"); 7609 // Also, the condition codes are properly set Z/NZ on succeed/failure. 7610 } 7611 7612 if (L_failure == &L_fallthrough) 7613 jccb(Assembler::notEqual, *L_failure); 7614 else jcc(Assembler::notEqual, *L_failure); 7615 7616 // Success. Cache the super we found and proceed in triumph. 7617 movptr(super_cache_addr, super_klass); 7618 7619 if (L_success != &L_fallthrough) { 7620 jmp(*L_success); 7621 } 7622 7623 #undef IS_A_TEMP 7624 7625 bind(L_fallthrough); 7626 } 7627 7628 7629 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 7630 ucomisd(dst, as_Address(src)); 7631 } 7632 7633 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 7634 ucomiss(dst, as_Address(src)); 7635 } 7636 7637 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 7638 if (reachable(src)) { 7639 xorpd(dst, as_Address(src)); 7640 } else { 7641 lea(rscratch1, src); 7642 xorpd(dst, Address(rscratch1, 0)); 7643 } 7644 } 7645 7646 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 7647 if (reachable(src)) { 7648 xorps(dst, as_Address(src)); 7649 } else { 7650 lea(rscratch1, src); 7651 xorps(dst, Address(rscratch1, 0)); 7652 } 7653 } 7654 7655 void MacroAssembler::verify_oop(Register reg, const char* s) { 7656 if (!VerifyOops) return; 7657 7658 // Pass register number to verify_oop_subroutine 7659 char* b = new char[strlen(s) + 50]; 7660 sprintf(b, "verify_oop: %s: %s", reg->name(), s); 7661 #ifdef _LP64 7662 push(rscratch1); // save r10, trashed by movptr() 7663 #endif 7664 push(rax); // save rax, 7665 push(reg); // pass register argument 7666 ExternalAddress buffer((address) b); 7667 // avoid using pushptr, as it modifies scratch registers 7668 // and our contract is not to modify anything 7669 movptr(rax, buffer.addr()); 7670 push(rax); 7671 // call indirectly to solve generation ordering problem 7672 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 7673 call(rax); 7674 // Caller pops the arguments (oop, message) and restores rax, r10 7675 } 7676 7677 7678 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 7679 Register tmp, 7680 int offset) { 7681 intptr_t value = *delayed_value_addr; 7682 if (value != 0) 7683 return RegisterOrConstant(value + offset); 7684 7685 // load indirectly to solve generation ordering problem 7686 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 7687 7688 #ifdef ASSERT 7689 { Label L; 7690 testptr(tmp, tmp); 7691 if (WizardMode) { 7692 jcc(Assembler::notZero, L); 7693 char* buf = new char[40]; 7694 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 7695 stop(buf); 7696 } else { 7697 jccb(Assembler::notZero, L); 7698 hlt(); 7699 } 7700 bind(L); 7701 } 7702 #endif 7703 7704 if (offset != 0) 7705 addptr(tmp, offset); 7706 7707 return RegisterOrConstant(tmp); 7708 } 7709 7710 7711 // registers on entry: 7712 // - rax ('check' register): required MethodType 7713 // - rcx: method handle 7714 // - rdx, rsi, or ?: killable temp 7715 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg, 7716 Register temp_reg, 7717 Label& wrong_method_type) { 7718 Address type_addr(mh_reg, delayed_value(java_dyn_MethodHandle::type_offset_in_bytes, temp_reg)); 7719 // compare method type against that of the receiver 7720 if (UseCompressedOops) { 7721 load_heap_oop(temp_reg, type_addr); 7722 cmpptr(mtype_reg, temp_reg); 7723 } else { 7724 cmpptr(mtype_reg, type_addr); 7725 } 7726 jcc(Assembler::notEqual, wrong_method_type); 7727 } 7728 7729 7730 // A method handle has a "vmslots" field which gives the size of its 7731 // argument list in JVM stack slots. This field is either located directly 7732 // in every method handle, or else is indirectly accessed through the 7733 // method handle's MethodType. This macro hides the distinction. 7734 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg, 7735 Register temp_reg) { 7736 assert_different_registers(vmslots_reg, mh_reg, temp_reg); 7737 // load mh.type.form.vmslots 7738 if (java_dyn_MethodHandle::vmslots_offset_in_bytes() != 0) { 7739 // hoist vmslots into every mh to avoid dependent load chain 7740 movl(vmslots_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::vmslots_offset_in_bytes, temp_reg))); 7741 } else { 7742 Register temp2_reg = vmslots_reg; 7743 load_heap_oop(temp2_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::type_offset_in_bytes, temp_reg))); 7744 load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_dyn_MethodType::form_offset_in_bytes, temp_reg))); 7745 movl(vmslots_reg, Address(temp2_reg, delayed_value(java_dyn_MethodTypeForm::vmslots_offset_in_bytes, temp_reg))); 7746 } 7747 } 7748 7749 7750 // registers on entry: 7751 // - rcx: method handle 7752 // - rdx: killable temp (interpreted only) 7753 // - rax: killable temp (compiled only) 7754 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) { 7755 assert(mh_reg == rcx, "caller must put MH object in rcx"); 7756 assert_different_registers(mh_reg, temp_reg); 7757 7758 // pick out the interpreted side of the handler 7759 // NOTE: vmentry is not an oop! 7760 movptr(temp_reg, Address(mh_reg, delayed_value(java_dyn_MethodHandle::vmentry_offset_in_bytes, temp_reg))); 7761 7762 // off we go... 7763 jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes())); 7764 7765 // for the various stubs which take control at this point, 7766 // see MethodHandles::generate_method_handle_stub 7767 } 7768 7769 7770 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 7771 int extra_slot_offset) { 7772 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 7773 int stackElementSize = Interpreter::stackElementSize; 7774 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 7775 #ifdef ASSERT 7776 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 7777 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 7778 #endif 7779 Register scale_reg = noreg; 7780 Address::ScaleFactor scale_factor = Address::no_scale; 7781 if (arg_slot.is_constant()) { 7782 offset += arg_slot.as_constant() * stackElementSize; 7783 } else { 7784 scale_reg = arg_slot.as_register(); 7785 scale_factor = Address::times(stackElementSize); 7786 } 7787 offset += wordSize; // return PC is on stack 7788 return Address(rsp, scale_reg, scale_factor, offset); 7789 } 7790 7791 7792 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 7793 if (!VerifyOops) return; 7794 7795 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 7796 // Pass register number to verify_oop_subroutine 7797 char* b = new char[strlen(s) + 50]; 7798 sprintf(b, "verify_oop_addr: %s", s); 7799 7800 #ifdef _LP64 7801 push(rscratch1); // save r10, trashed by movptr() 7802 #endif 7803 push(rax); // save rax, 7804 // addr may contain rsp so we will have to adjust it based on the push 7805 // we just did 7806 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 7807 // stores rax into addr which is backwards of what was intended. 7808 if (addr.uses(rsp)) { 7809 lea(rax, addr); 7810 pushptr(Address(rax, BytesPerWord)); 7811 } else { 7812 pushptr(addr); 7813 } 7814 7815 ExternalAddress buffer((address) b); 7816 // pass msg argument 7817 // avoid using pushptr, as it modifies scratch registers 7818 // and our contract is not to modify anything 7819 movptr(rax, buffer.addr()); 7820 push(rax); 7821 7822 // call indirectly to solve generation ordering problem 7823 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 7824 call(rax); 7825 // Caller pops the arguments (addr, message) and restores rax, r10. 7826 } 7827 7828 void MacroAssembler::verify_tlab() { 7829 #ifdef ASSERT 7830 if (UseTLAB && VerifyOops) { 7831 Label next, ok; 7832 Register t1 = rsi; 7833 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 7834 7835 push(t1); 7836 NOT_LP64(push(thread_reg)); 7837 NOT_LP64(get_thread(thread_reg)); 7838 7839 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 7840 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 7841 jcc(Assembler::aboveEqual, next); 7842 stop("assert(top >= start)"); 7843 should_not_reach_here(); 7844 7845 bind(next); 7846 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 7847 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 7848 jcc(Assembler::aboveEqual, ok); 7849 stop("assert(top <= end)"); 7850 should_not_reach_here(); 7851 7852 bind(ok); 7853 NOT_LP64(pop(thread_reg)); 7854 pop(t1); 7855 } 7856 #endif 7857 } 7858 7859 class ControlWord { 7860 public: 7861 int32_t _value; 7862 7863 int rounding_control() const { return (_value >> 10) & 3 ; } 7864 int precision_control() const { return (_value >> 8) & 3 ; } 7865 bool precision() const { return ((_value >> 5) & 1) != 0; } 7866 bool underflow() const { return ((_value >> 4) & 1) != 0; } 7867 bool overflow() const { return ((_value >> 3) & 1) != 0; } 7868 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 7869 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 7870 bool invalid() const { return ((_value >> 0) & 1) != 0; } 7871 7872 void print() const { 7873 // rounding control 7874 const char* rc; 7875 switch (rounding_control()) { 7876 case 0: rc = "round near"; break; 7877 case 1: rc = "round down"; break; 7878 case 2: rc = "round up "; break; 7879 case 3: rc = "chop "; break; 7880 }; 7881 // precision control 7882 const char* pc; 7883 switch (precision_control()) { 7884 case 0: pc = "24 bits "; break; 7885 case 1: pc = "reserved"; break; 7886 case 2: pc = "53 bits "; break; 7887 case 3: pc = "64 bits "; break; 7888 }; 7889 // flags 7890 char f[9]; 7891 f[0] = ' '; 7892 f[1] = ' '; 7893 f[2] = (precision ()) ? 'P' : 'p'; 7894 f[3] = (underflow ()) ? 'U' : 'u'; 7895 f[4] = (overflow ()) ? 'O' : 'o'; 7896 f[5] = (zero_divide ()) ? 'Z' : 'z'; 7897 f[6] = (denormalized()) ? 'D' : 'd'; 7898 f[7] = (invalid ()) ? 'I' : 'i'; 7899 f[8] = '\x0'; 7900 // output 7901 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 7902 } 7903 7904 }; 7905 7906 class StatusWord { 7907 public: 7908 int32_t _value; 7909 7910 bool busy() const { return ((_value >> 15) & 1) != 0; } 7911 bool C3() const { return ((_value >> 14) & 1) != 0; } 7912 bool C2() const { return ((_value >> 10) & 1) != 0; } 7913 bool C1() const { return ((_value >> 9) & 1) != 0; } 7914 bool C0() const { return ((_value >> 8) & 1) != 0; } 7915 int top() const { return (_value >> 11) & 7 ; } 7916 bool error_status() const { return ((_value >> 7) & 1) != 0; } 7917 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 7918 bool precision() const { return ((_value >> 5) & 1) != 0; } 7919 bool underflow() const { return ((_value >> 4) & 1) != 0; } 7920 bool overflow() const { return ((_value >> 3) & 1) != 0; } 7921 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 7922 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 7923 bool invalid() const { return ((_value >> 0) & 1) != 0; } 7924 7925 void print() const { 7926 // condition codes 7927 char c[5]; 7928 c[0] = (C3()) ? '3' : '-'; 7929 c[1] = (C2()) ? '2' : '-'; 7930 c[2] = (C1()) ? '1' : '-'; 7931 c[3] = (C0()) ? '0' : '-'; 7932 c[4] = '\x0'; 7933 // flags 7934 char f[9]; 7935 f[0] = (error_status()) ? 'E' : '-'; 7936 f[1] = (stack_fault ()) ? 'S' : '-'; 7937 f[2] = (precision ()) ? 'P' : '-'; 7938 f[3] = (underflow ()) ? 'U' : '-'; 7939 f[4] = (overflow ()) ? 'O' : '-'; 7940 f[5] = (zero_divide ()) ? 'Z' : '-'; 7941 f[6] = (denormalized()) ? 'D' : '-'; 7942 f[7] = (invalid ()) ? 'I' : '-'; 7943 f[8] = '\x0'; 7944 // output 7945 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 7946 } 7947 7948 }; 7949 7950 class TagWord { 7951 public: 7952 int32_t _value; 7953 7954 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 7955 7956 void print() const { 7957 printf("%04x", _value & 0xFFFF); 7958 } 7959 7960 }; 7961 7962 class FPU_Register { 7963 public: 7964 int32_t _m0; 7965 int32_t _m1; 7966 int16_t _ex; 7967 7968 bool is_indefinite() const { 7969 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 7970 } 7971 7972 void print() const { 7973 char sign = (_ex < 0) ? '-' : '+'; 7974 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 7975 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 7976 }; 7977 7978 }; 7979 7980 class FPU_State { 7981 public: 7982 enum { 7983 register_size = 10, 7984 number_of_registers = 8, 7985 register_mask = 7 7986 }; 7987 7988 ControlWord _control_word; 7989 StatusWord _status_word; 7990 TagWord _tag_word; 7991 int32_t _error_offset; 7992 int32_t _error_selector; 7993 int32_t _data_offset; 7994 int32_t _data_selector; 7995 int8_t _register[register_size * number_of_registers]; 7996 7997 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 7998 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 7999 8000 const char* tag_as_string(int tag) const { 8001 switch (tag) { 8002 case 0: return "valid"; 8003 case 1: return "zero"; 8004 case 2: return "special"; 8005 case 3: return "empty"; 8006 } 8007 ShouldNotReachHere(); 8008 return NULL; 8009 } 8010 8011 void print() const { 8012 // print computation registers 8013 { int t = _status_word.top(); 8014 for (int i = 0; i < number_of_registers; i++) { 8015 int j = (i - t) & register_mask; 8016 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 8017 st(j)->print(); 8018 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 8019 } 8020 } 8021 printf("\n"); 8022 // print control registers 8023 printf("ctrl = "); _control_word.print(); printf("\n"); 8024 printf("stat = "); _status_word .print(); printf("\n"); 8025 printf("tags = "); _tag_word .print(); printf("\n"); 8026 } 8027 8028 }; 8029 8030 class Flag_Register { 8031 public: 8032 int32_t _value; 8033 8034 bool overflow() const { return ((_value >> 11) & 1) != 0; } 8035 bool direction() const { return ((_value >> 10) & 1) != 0; } 8036 bool sign() const { return ((_value >> 7) & 1) != 0; } 8037 bool zero() const { return ((_value >> 6) & 1) != 0; } 8038 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 8039 bool parity() const { return ((_value >> 2) & 1) != 0; } 8040 bool carry() const { return ((_value >> 0) & 1) != 0; } 8041 8042 void print() const { 8043 // flags 8044 char f[8]; 8045 f[0] = (overflow ()) ? 'O' : '-'; 8046 f[1] = (direction ()) ? 'D' : '-'; 8047 f[2] = (sign ()) ? 'S' : '-'; 8048 f[3] = (zero ()) ? 'Z' : '-'; 8049 f[4] = (auxiliary_carry()) ? 'A' : '-'; 8050 f[5] = (parity ()) ? 'P' : '-'; 8051 f[6] = (carry ()) ? 'C' : '-'; 8052 f[7] = '\x0'; 8053 // output 8054 printf("%08x flags = %s", _value, f); 8055 } 8056 8057 }; 8058 8059 class IU_Register { 8060 public: 8061 int32_t _value; 8062 8063 void print() const { 8064 printf("%08x %11d", _value, _value); 8065 } 8066 8067 }; 8068 8069 class IU_State { 8070 public: 8071 Flag_Register _eflags; 8072 IU_Register _rdi; 8073 IU_Register _rsi; 8074 IU_Register _rbp; 8075 IU_Register _rsp; 8076 IU_Register _rbx; 8077 IU_Register _rdx; 8078 IU_Register _rcx; 8079 IU_Register _rax; 8080 8081 void print() const { 8082 // computation registers 8083 printf("rax, = "); _rax.print(); printf("\n"); 8084 printf("rbx, = "); _rbx.print(); printf("\n"); 8085 printf("rcx = "); _rcx.print(); printf("\n"); 8086 printf("rdx = "); _rdx.print(); printf("\n"); 8087 printf("rdi = "); _rdi.print(); printf("\n"); 8088 printf("rsi = "); _rsi.print(); printf("\n"); 8089 printf("rbp, = "); _rbp.print(); printf("\n"); 8090 printf("rsp = "); _rsp.print(); printf("\n"); 8091 printf("\n"); 8092 // control registers 8093 printf("flgs = "); _eflags.print(); printf("\n"); 8094 } 8095 }; 8096 8097 8098 class CPU_State { 8099 public: 8100 FPU_State _fpu_state; 8101 IU_State _iu_state; 8102 8103 void print() const { 8104 printf("--------------------------------------------------\n"); 8105 _iu_state .print(); 8106 printf("\n"); 8107 _fpu_state.print(); 8108 printf("--------------------------------------------------\n"); 8109 } 8110 8111 }; 8112 8113 8114 static void _print_CPU_state(CPU_State* state) { 8115 state->print(); 8116 }; 8117 8118 8119 void MacroAssembler::print_CPU_state() { 8120 push_CPU_state(); 8121 push(rsp); // pass CPU state 8122 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 8123 addptr(rsp, wordSize); // discard argument 8124 pop_CPU_state(); 8125 } 8126 8127 8128 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 8129 static int counter = 0; 8130 FPU_State* fs = &state->_fpu_state; 8131 counter++; 8132 // For leaf calls, only verify that the top few elements remain empty. 8133 // We only need 1 empty at the top for C2 code. 8134 if( stack_depth < 0 ) { 8135 if( fs->tag_for_st(7) != 3 ) { 8136 printf("FPR7 not empty\n"); 8137 state->print(); 8138 assert(false, "error"); 8139 return false; 8140 } 8141 return true; // All other stack states do not matter 8142 } 8143 8144 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 8145 "bad FPU control word"); 8146 8147 // compute stack depth 8148 int i = 0; 8149 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 8150 int d = i; 8151 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 8152 // verify findings 8153 if (i != FPU_State::number_of_registers) { 8154 // stack not contiguous 8155 printf("%s: stack not contiguous at ST%d\n", s, i); 8156 state->print(); 8157 assert(false, "error"); 8158 return false; 8159 } 8160 // check if computed stack depth corresponds to expected stack depth 8161 if (stack_depth < 0) { 8162 // expected stack depth is -stack_depth or less 8163 if (d > -stack_depth) { 8164 // too many elements on the stack 8165 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 8166 state->print(); 8167 assert(false, "error"); 8168 return false; 8169 } 8170 } else { 8171 // expected stack depth is stack_depth 8172 if (d != stack_depth) { 8173 // wrong stack depth 8174 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 8175 state->print(); 8176 assert(false, "error"); 8177 return false; 8178 } 8179 } 8180 // everything is cool 8181 return true; 8182 } 8183 8184 8185 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 8186 if (!VerifyFPU) return; 8187 push_CPU_state(); 8188 push(rsp); // pass CPU state 8189 ExternalAddress msg((address) s); 8190 // pass message string s 8191 pushptr(msg.addr()); 8192 push(stack_depth); // pass stack depth 8193 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 8194 addptr(rsp, 3 * wordSize); // discard arguments 8195 // check for error 8196 { Label L; 8197 testl(rax, rax); 8198 jcc(Assembler::notZero, L); 8199 int3(); // break if error condition 8200 bind(L); 8201 } 8202 pop_CPU_state(); 8203 } 8204 8205 void MacroAssembler::load_klass(Register dst, Register src) { 8206 #ifdef _LP64 8207 if (UseCompressedOops) { 8208 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8209 decode_heap_oop_not_null(dst); 8210 } else 8211 #endif 8212 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8213 } 8214 8215 void MacroAssembler::load_prototype_header(Register dst, Register src) { 8216 #ifdef _LP64 8217 if (UseCompressedOops) { 8218 assert (Universe::heap() != NULL, "java heap should be initialized"); 8219 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8220 if (Universe::narrow_oop_shift() != 0) { 8221 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8222 if (LogMinObjAlignmentInBytes == Address::times_8) { 8223 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8224 } else { 8225 // OK to use shift since we don't need to preserve flags. 8226 shlq(dst, LogMinObjAlignmentInBytes); 8227 movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8228 } 8229 } else { 8230 movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8231 } 8232 } else 8233 #endif 8234 { 8235 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 8236 movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); 8237 } 8238 } 8239 8240 void MacroAssembler::store_klass(Register dst, Register src) { 8241 #ifdef _LP64 8242 if (UseCompressedOops) { 8243 encode_heap_oop_not_null(src); 8244 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 8245 } else 8246 #endif 8247 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 8248 } 8249 8250 void MacroAssembler::load_heap_oop(Register dst, Address src) { 8251 #ifdef _LP64 8252 if (UseCompressedOops) { 8253 movl(dst, src); 8254 decode_heap_oop(dst); 8255 } else 8256 #endif 8257 movptr(dst, src); 8258 } 8259 8260 void MacroAssembler::store_heap_oop(Address dst, Register src) { 8261 #ifdef _LP64 8262 if (UseCompressedOops) { 8263 assert(!dst.uses(src), "not enough registers"); 8264 encode_heap_oop(src); 8265 movl(dst, src); 8266 } else 8267 #endif 8268 movptr(dst, src); 8269 } 8270 8271 // Used for storing NULLs. 8272 void MacroAssembler::store_heap_oop_null(Address dst) { 8273 #ifdef _LP64 8274 if (UseCompressedOops) { 8275 movl(dst, (int32_t)NULL_WORD); 8276 } else { 8277 movslq(dst, (int32_t)NULL_WORD); 8278 } 8279 #else 8280 movl(dst, (int32_t)NULL_WORD); 8281 #endif 8282 } 8283 8284 #ifdef _LP64 8285 void MacroAssembler::store_klass_gap(Register dst, Register src) { 8286 if (UseCompressedOops) { 8287 // Store to klass gap in destination 8288 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 8289 } 8290 } 8291 8292 #ifdef ASSERT 8293 void MacroAssembler::verify_heapbase(const char* msg) { 8294 assert (UseCompressedOops, "should be compressed"); 8295 assert (Universe::heap() != NULL, "java heap should be initialized"); 8296 if (CheckCompressedOops) { 8297 Label ok; 8298 push(rscratch1); // cmpptr trashes rscratch1 8299 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 8300 jcc(Assembler::equal, ok); 8301 stop(msg); 8302 bind(ok); 8303 pop(rscratch1); 8304 } 8305 } 8306 #endif 8307 8308 // Algorithm must match oop.inline.hpp encode_heap_oop. 8309 void MacroAssembler::encode_heap_oop(Register r) { 8310 #ifdef ASSERT 8311 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 8312 #endif 8313 verify_oop(r, "broken oop in encode_heap_oop"); 8314 if (Universe::narrow_oop_base() == NULL) { 8315 if (Universe::narrow_oop_shift() != 0) { 8316 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8317 shrq(r, LogMinObjAlignmentInBytes); 8318 } 8319 return; 8320 } 8321 testq(r, r); 8322 cmovq(Assembler::equal, r, r12_heapbase); 8323 subq(r, r12_heapbase); 8324 shrq(r, LogMinObjAlignmentInBytes); 8325 } 8326 8327 void MacroAssembler::encode_heap_oop_not_null(Register r) { 8328 #ifdef ASSERT 8329 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 8330 if (CheckCompressedOops) { 8331 Label ok; 8332 testq(r, r); 8333 jcc(Assembler::notEqual, ok); 8334 stop("null oop passed to encode_heap_oop_not_null"); 8335 bind(ok); 8336 } 8337 #endif 8338 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 8339 if (Universe::narrow_oop_base() != NULL) { 8340 subq(r, r12_heapbase); 8341 } 8342 if (Universe::narrow_oop_shift() != 0) { 8343 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8344 shrq(r, LogMinObjAlignmentInBytes); 8345 } 8346 } 8347 8348 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 8349 #ifdef ASSERT 8350 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 8351 if (CheckCompressedOops) { 8352 Label ok; 8353 testq(src, src); 8354 jcc(Assembler::notEqual, ok); 8355 stop("null oop passed to encode_heap_oop_not_null2"); 8356 bind(ok); 8357 } 8358 #endif 8359 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 8360 if (dst != src) { 8361 movq(dst, src); 8362 } 8363 if (Universe::narrow_oop_base() != NULL) { 8364 subq(dst, r12_heapbase); 8365 } 8366 if (Universe::narrow_oop_shift() != 0) { 8367 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8368 shrq(dst, LogMinObjAlignmentInBytes); 8369 } 8370 } 8371 8372 void MacroAssembler::decode_heap_oop(Register r) { 8373 #ifdef ASSERT 8374 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 8375 #endif 8376 if (Universe::narrow_oop_base() == NULL) { 8377 if (Universe::narrow_oop_shift() != 0) { 8378 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8379 shlq(r, LogMinObjAlignmentInBytes); 8380 } 8381 } else { 8382 Label done; 8383 shlq(r, LogMinObjAlignmentInBytes); 8384 jccb(Assembler::equal, done); 8385 addq(r, r12_heapbase); 8386 bind(done); 8387 } 8388 verify_oop(r, "broken oop in decode_heap_oop"); 8389 } 8390 8391 void MacroAssembler::decode_heap_oop_not_null(Register r) { 8392 // Note: it will change flags 8393 assert (UseCompressedOops, "should only be used for compressed headers"); 8394 assert (Universe::heap() != NULL, "java heap should be initialized"); 8395 // Cannot assert, unverified entry point counts instructions (see .ad file) 8396 // vtableStubs also counts instructions in pd_code_size_limit. 8397 // Also do not verify_oop as this is called by verify_oop. 8398 if (Universe::narrow_oop_shift() != 0) { 8399 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8400 shlq(r, LogMinObjAlignmentInBytes); 8401 if (Universe::narrow_oop_base() != NULL) { 8402 addq(r, r12_heapbase); 8403 } 8404 } else { 8405 assert (Universe::narrow_oop_base() == NULL, "sanity"); 8406 } 8407 } 8408 8409 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 8410 // Note: it will change flags 8411 assert (UseCompressedOops, "should only be used for compressed headers"); 8412 assert (Universe::heap() != NULL, "java heap should be initialized"); 8413 // Cannot assert, unverified entry point counts instructions (see .ad file) 8414 // vtableStubs also counts instructions in pd_code_size_limit. 8415 // Also do not verify_oop as this is called by verify_oop. 8416 if (Universe::narrow_oop_shift() != 0) { 8417 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 8418 if (LogMinObjAlignmentInBytes == Address::times_8) { 8419 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 8420 } else { 8421 if (dst != src) { 8422 movq(dst, src); 8423 } 8424 shlq(dst, LogMinObjAlignmentInBytes); 8425 if (Universe::narrow_oop_base() != NULL) { 8426 addq(dst, r12_heapbase); 8427 } 8428 } 8429 } else { 8430 assert (Universe::narrow_oop_base() == NULL, "sanity"); 8431 if (dst != src) { 8432 movq(dst, src); 8433 } 8434 } 8435 } 8436 8437 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 8438 assert (UseCompressedOops, "should only be used for compressed headers"); 8439 assert (Universe::heap() != NULL, "java heap should be initialized"); 8440 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8441 int oop_index = oop_recorder()->find_index(obj); 8442 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8443 mov_narrow_oop(dst, oop_index, rspec); 8444 } 8445 8446 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 8447 assert (UseCompressedOops, "should only be used for compressed headers"); 8448 assert (Universe::heap() != NULL, "java heap should be initialized"); 8449 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8450 int oop_index = oop_recorder()->find_index(obj); 8451 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8452 mov_narrow_oop(dst, oop_index, rspec); 8453 } 8454 8455 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 8456 assert (UseCompressedOops, "should only be used for compressed headers"); 8457 assert (Universe::heap() != NULL, "java heap should be initialized"); 8458 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8459 int oop_index = oop_recorder()->find_index(obj); 8460 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8461 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 8462 } 8463 8464 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 8465 assert (UseCompressedOops, "should only be used for compressed headers"); 8466 assert (Universe::heap() != NULL, "java heap should be initialized"); 8467 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 8468 int oop_index = oop_recorder()->find_index(obj); 8469 RelocationHolder rspec = oop_Relocation::spec(oop_index); 8470 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 8471 } 8472 8473 void MacroAssembler::reinit_heapbase() { 8474 if (UseCompressedOops) { 8475 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 8476 } 8477 } 8478 #endif // _LP64 8479 8480 // IndexOf substring. 8481 void MacroAssembler::string_indexof(Register str1, Register str2, 8482 Register cnt1, Register cnt2, Register result, 8483 XMMRegister vec, Register tmp) { 8484 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 8485 8486 Label RELOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR, 8487 SCAN_SUBSTR, RET_NOT_FOUND, CLEANUP; 8488 8489 push(str1); // string addr 8490 push(str2); // substr addr 8491 push(cnt2); // substr count 8492 jmpb(PREP_FOR_SCAN); 8493 8494 // Substr count saved at sp 8495 // Substr saved at sp+1*wordSize 8496 // String saved at sp+2*wordSize 8497 8498 // Reload substr for rescan 8499 bind(RELOAD_SUBSTR); 8500 movl(cnt2, Address(rsp, 0)); 8501 movptr(str2, Address(rsp, wordSize)); 8502 // We came here after the beginninig of the substring was 8503 // matched but the rest of it was not so we need to search 8504 // again. Start from the next element after the previous match. 8505 subptr(str1, result); // Restore counter 8506 shrl(str1, 1); 8507 addl(cnt1, str1); 8508 decrementl(cnt1); 8509 lea(str1, Address(result, 2)); // Reload string 8510 8511 // Load substr 8512 bind(PREP_FOR_SCAN); 8513 movdqu(vec, Address(str2, 0)); 8514 addl(cnt1, 8); // prime the loop 8515 subptr(str1, 16); 8516 8517 // Scan string for substr in 16-byte vectors 8518 bind(SCAN_TO_SUBSTR); 8519 subl(cnt1, 8); 8520 addptr(str1, 16); 8521 8522 // pcmpestri 8523 // inputs: 8524 // xmm - substring 8525 // rax - substring length (elements count) 8526 // mem - scaned string 8527 // rdx - string length (elements count) 8528 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 8529 // outputs: 8530 // rcx - matched index in string 8531 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 8532 8533 pcmpestri(vec, Address(str1, 0), 0x0d); 8534 jcc(Assembler::above, SCAN_TO_SUBSTR); // CF == 0 && ZF == 0 8535 jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0 8536 8537 // Fallthrough: found a potential substr 8538 8539 // Make sure string is still long enough 8540 subl(cnt1, tmp); 8541 cmpl(cnt1, cnt2); 8542 jccb(Assembler::negative, RET_NOT_FOUND); 8543 // Compute start addr of substr 8544 lea(str1, Address(str1, tmp, Address::times_2)); 8545 movptr(result, str1); // save 8546 8547 // Compare potential substr 8548 addl(cnt1, 8); // prime the loop 8549 addl(cnt2, 8); 8550 subptr(str1, 16); 8551 subptr(str2, 16); 8552 8553 // Scan 16-byte vectors of string and substr 8554 bind(SCAN_SUBSTR); 8555 subl(cnt1, 8); 8556 subl(cnt2, 8); 8557 addptr(str1, 16); 8558 addptr(str2, 16); 8559 movdqu(vec, Address(str2, 0)); 8560 pcmpestri(vec, Address(str1, 0), 0x0d); 8561 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 8562 jcc(Assembler::positive, SCAN_SUBSTR); // SF == 0 8563 8564 // Compute substr offset 8565 subptr(result, Address(rsp, 2*wordSize)); 8566 shrl(result, 1); // index 8567 jmpb(CLEANUP); 8568 8569 bind(RET_NOT_FOUND); 8570 movl(result, -1); 8571 8572 bind(CLEANUP); 8573 addptr(rsp, 3*wordSize); 8574 } 8575 8576 // Compare strings. 8577 void MacroAssembler::string_compare(Register str1, Register str2, 8578 Register cnt1, Register cnt2, Register result, 8579 XMMRegister vec1, XMMRegister vec2) { 8580 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 8581 8582 // Compute the minimum of the string lengths and the 8583 // difference of the string lengths (stack). 8584 // Do the conditional move stuff 8585 movl(result, cnt1); 8586 subl(cnt1, cnt2); 8587 push(cnt1); 8588 if (VM_Version::supports_cmov()) { 8589 cmovl(Assembler::lessEqual, cnt2, result); 8590 } else { 8591 Label GT_LABEL; 8592 jccb(Assembler::greater, GT_LABEL); 8593 movl(cnt2, result); 8594 bind(GT_LABEL); 8595 } 8596 8597 // Is the minimum length zero? 8598 testl(cnt2, cnt2); 8599 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 8600 8601 // Load first characters 8602 load_unsigned_short(result, Address(str1, 0)); 8603 load_unsigned_short(cnt1, Address(str2, 0)); 8604 8605 // Compare first characters 8606 subl(result, cnt1); 8607 jcc(Assembler::notZero, POP_LABEL); 8608 decrementl(cnt2); 8609 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 8610 8611 { 8612 // Check after comparing first character to see if strings are equivalent 8613 Label LSkip2; 8614 // Check if the strings start at same location 8615 cmpptr(str1, str2); 8616 jccb(Assembler::notEqual, LSkip2); 8617 8618 // Check if the length difference is zero (from stack) 8619 cmpl(Address(rsp, 0), 0x0); 8620 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 8621 8622 // Strings might not be equivalent 8623 bind(LSkip2); 8624 } 8625 8626 // Advance to next character 8627 addptr(str1, 2); 8628 addptr(str2, 2); 8629 8630 if (UseSSE42Intrinsics) { 8631 // With SSE4.2, use double quad vector compare 8632 Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 8633 // Setup to compare 16-byte vectors 8634 movl(cnt1, cnt2); 8635 andl(cnt2, 0xfffffff8); // cnt2 holds the vector count 8636 andl(cnt1, 0x00000007); // cnt1 holds the tail count 8637 testl(cnt2, cnt2); 8638 jccb(Assembler::zero, COMPARE_TAIL); 8639 8640 lea(str2, Address(str2, cnt2, Address::times_2)); 8641 lea(str1, Address(str1, cnt2, Address::times_2)); 8642 negptr(cnt2); 8643 8644 bind(COMPARE_VECTORS); 8645 movdqu(vec1, Address(str1, cnt2, Address::times_2)); 8646 movdqu(vec2, Address(str2, cnt2, Address::times_2)); 8647 pxor(vec1, vec2); 8648 ptest(vec1, vec1); 8649 jccb(Assembler::notZero, VECTOR_NOT_EQUAL); 8650 addptr(cnt2, 8); 8651 jcc(Assembler::notZero, COMPARE_VECTORS); 8652 jmpb(COMPARE_TAIL); 8653 8654 // Mismatched characters in the vectors 8655 bind(VECTOR_NOT_EQUAL); 8656 lea(str1, Address(str1, cnt2, Address::times_2)); 8657 lea(str2, Address(str2, cnt2, Address::times_2)); 8658 movl(cnt1, 8); 8659 8660 // Compare tail (< 8 chars), or rescan last vectors to 8661 // find 1st mismatched characters 8662 bind(COMPARE_TAIL); 8663 testl(cnt1, cnt1); 8664 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 8665 movl(cnt2, cnt1); 8666 // Fallthru to tail compare 8667 } 8668 8669 // Shift str2 and str1 to the end of the arrays, negate min 8670 lea(str1, Address(str1, cnt2, Address::times_2, 0)); 8671 lea(str2, Address(str2, cnt2, Address::times_2, 0)); 8672 negptr(cnt2); 8673 8674 // Compare the rest of the characters 8675 bind(WHILE_HEAD_LABEL); 8676 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, 0)); 8677 load_unsigned_short(cnt1, Address(str2, cnt2, Address::times_2, 0)); 8678 subl(result, cnt1); 8679 jccb(Assembler::notZero, POP_LABEL); 8680 increment(cnt2); 8681 jcc(Assembler::notZero, WHILE_HEAD_LABEL); 8682 8683 // Strings are equal up to min length. Return the length difference. 8684 bind(LENGTH_DIFF_LABEL); 8685 pop(result); 8686 jmpb(DONE_LABEL); 8687 8688 // Discard the stored length difference 8689 bind(POP_LABEL); 8690 addptr(rsp, wordSize); 8691 8692 // That's it 8693 bind(DONE_LABEL); 8694 } 8695 8696 // Compare char[] arrays aligned to 4 bytes or substrings. 8697 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 8698 Register limit, Register result, Register chr, 8699 XMMRegister vec1, XMMRegister vec2) { 8700 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 8701 8702 int length_offset = arrayOopDesc::length_offset_in_bytes(); 8703 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 8704 8705 // Check the input args 8706 cmpptr(ary1, ary2); 8707 jcc(Assembler::equal, TRUE_LABEL); 8708 8709 if (is_array_equ) { 8710 // Need additional checks for arrays_equals. 8711 testptr(ary1, ary1); 8712 jcc(Assembler::zero, FALSE_LABEL); 8713 testptr(ary2, ary2); 8714 jcc(Assembler::zero, FALSE_LABEL); 8715 8716 // Check the lengths 8717 movl(limit, Address(ary1, length_offset)); 8718 cmpl(limit, Address(ary2, length_offset)); 8719 jcc(Assembler::notEqual, FALSE_LABEL); 8720 } 8721 8722 // count == 0 8723 testl(limit, limit); 8724 jcc(Assembler::zero, TRUE_LABEL); 8725 8726 if (is_array_equ) { 8727 // Load array address 8728 lea(ary1, Address(ary1, base_offset)); 8729 lea(ary2, Address(ary2, base_offset)); 8730 } 8731 8732 shll(limit, 1); // byte count != 0 8733 movl(result, limit); // copy 8734 8735 if (UseSSE42Intrinsics) { 8736 // With SSE4.2, use double quad vector compare 8737 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 8738 // Compare 16-byte vectors 8739 andl(result, 0x0000000e); // tail count (in bytes) 8740 andl(limit, 0xfffffff0); // vector count (in bytes) 8741 jccb(Assembler::zero, COMPARE_TAIL); 8742 8743 lea(ary1, Address(ary1, limit, Address::times_1)); 8744 lea(ary2, Address(ary2, limit, Address::times_1)); 8745 negptr(limit); 8746 8747 bind(COMPARE_WIDE_VECTORS); 8748 movdqu(vec1, Address(ary1, limit, Address::times_1)); 8749 movdqu(vec2, Address(ary2, limit, Address::times_1)); 8750 pxor(vec1, vec2); 8751 ptest(vec1, vec1); 8752 jccb(Assembler::notZero, FALSE_LABEL); 8753 addptr(limit, 16); 8754 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 8755 8756 bind(COMPARE_TAIL); // limit is zero 8757 movl(limit, result); 8758 // Fallthru to tail compare 8759 } 8760 8761 // Compare 4-byte vectors 8762 andl(limit, 0xfffffffc); // vector count (in bytes) 8763 jccb(Assembler::zero, COMPARE_CHAR); 8764 8765 lea(ary1, Address(ary1, limit, Address::times_1)); 8766 lea(ary2, Address(ary2, limit, Address::times_1)); 8767 negptr(limit); 8768 8769 bind(COMPARE_VECTORS); 8770 movl(chr, Address(ary1, limit, Address::times_1)); 8771 cmpl(chr, Address(ary2, limit, Address::times_1)); 8772 jccb(Assembler::notEqual, FALSE_LABEL); 8773 addptr(limit, 4); 8774 jcc(Assembler::notZero, COMPARE_VECTORS); 8775 8776 // Compare trailing char (final 2 bytes), if any 8777 bind(COMPARE_CHAR); 8778 testl(result, 0x2); // tail char 8779 jccb(Assembler::zero, TRUE_LABEL); 8780 load_unsigned_short(chr, Address(ary1, 0)); 8781 load_unsigned_short(limit, Address(ary2, 0)); 8782 cmpl(chr, limit); 8783 jccb(Assembler::notEqual, FALSE_LABEL); 8784 8785 bind(TRUE_LABEL); 8786 movl(result, 1); // return true 8787 jmpb(DONE); 8788 8789 bind(FALSE_LABEL); 8790 xorl(result, result); // return false 8791 8792 // That's it 8793 bind(DONE); 8794 } 8795 8796 #ifdef PRODUCT 8797 #define BLOCK_COMMENT(str) /* nothing */ 8798 #else 8799 #define BLOCK_COMMENT(str) block_comment(str) 8800 #endif 8801 8802 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 8803 void MacroAssembler::generate_fill(BasicType t, bool aligned, 8804 Register to, Register value, Register count, 8805 Register rtmp, XMMRegister xtmp) { 8806 assert_different_registers(to, value, count, rtmp); 8807 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 8808 Label L_fill_2_bytes, L_fill_4_bytes; 8809 8810 int shift = -1; 8811 switch (t) { 8812 case T_BYTE: 8813 shift = 2; 8814 break; 8815 case T_SHORT: 8816 shift = 1; 8817 break; 8818 case T_INT: 8819 shift = 0; 8820 break; 8821 default: ShouldNotReachHere(); 8822 } 8823 8824 if (t == T_BYTE) { 8825 andl(value, 0xff); 8826 movl(rtmp, value); 8827 shll(rtmp, 8); 8828 orl(value, rtmp); 8829 } 8830 if (t == T_SHORT) { 8831 andl(value, 0xffff); 8832 } 8833 if (t == T_BYTE || t == T_SHORT) { 8834 movl(rtmp, value); 8835 shll(rtmp, 16); 8836 orl(value, rtmp); 8837 } 8838 8839 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 8840 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 8841 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 8842 // align source address at 4 bytes address boundary 8843 if (t == T_BYTE) { 8844 // One byte misalignment happens only for byte arrays 8845 testptr(to, 1); 8846 jccb(Assembler::zero, L_skip_align1); 8847 movb(Address(to, 0), value); 8848 increment(to); 8849 decrement(count); 8850 BIND(L_skip_align1); 8851 } 8852 // Two bytes misalignment happens only for byte and short (char) arrays 8853 testptr(to, 2); 8854 jccb(Assembler::zero, L_skip_align2); 8855 movw(Address(to, 0), value); 8856 addptr(to, 2); 8857 subl(count, 1<<(shift-1)); 8858 BIND(L_skip_align2); 8859 } 8860 if (UseSSE < 2) { 8861 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 8862 // Fill 32-byte chunks 8863 subl(count, 8 << shift); 8864 jcc(Assembler::less, L_check_fill_8_bytes); 8865 align(16); 8866 8867 BIND(L_fill_32_bytes_loop); 8868 8869 for (int i = 0; i < 32; i += 4) { 8870 movl(Address(to, i), value); 8871 } 8872 8873 addptr(to, 32); 8874 subl(count, 8 << shift); 8875 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 8876 BIND(L_check_fill_8_bytes); 8877 addl(count, 8 << shift); 8878 jccb(Assembler::zero, L_exit); 8879 jmpb(L_fill_8_bytes); 8880 8881 // 8882 // length is too short, just fill qwords 8883 // 8884 BIND(L_fill_8_bytes_loop); 8885 movl(Address(to, 0), value); 8886 movl(Address(to, 4), value); 8887 addptr(to, 8); 8888 BIND(L_fill_8_bytes); 8889 subl(count, 1 << (shift + 1)); 8890 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 8891 // fall through to fill 4 bytes 8892 } else { 8893 Label L_fill_32_bytes; 8894 if (!UseUnalignedLoadStores) { 8895 // align to 8 bytes, we know we are 4 byte aligned to start 8896 testptr(to, 4); 8897 jccb(Assembler::zero, L_fill_32_bytes); 8898 movl(Address(to, 0), value); 8899 addptr(to, 4); 8900 subl(count, 1<<shift); 8901 } 8902 BIND(L_fill_32_bytes); 8903 { 8904 assert( UseSSE >= 2, "supported cpu only" ); 8905 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 8906 // Fill 32-byte chunks 8907 movdl(xtmp, value); 8908 pshufd(xtmp, xtmp, 0); 8909 8910 subl(count, 8 << shift); 8911 jcc(Assembler::less, L_check_fill_8_bytes); 8912 align(16); 8913 8914 BIND(L_fill_32_bytes_loop); 8915 8916 if (UseUnalignedLoadStores) { 8917 movdqu(Address(to, 0), xtmp); 8918 movdqu(Address(to, 16), xtmp); 8919 } else { 8920 movq(Address(to, 0), xtmp); 8921 movq(Address(to, 8), xtmp); 8922 movq(Address(to, 16), xtmp); 8923 movq(Address(to, 24), xtmp); 8924 } 8925 8926 addptr(to, 32); 8927 subl(count, 8 << shift); 8928 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 8929 BIND(L_check_fill_8_bytes); 8930 addl(count, 8 << shift); 8931 jccb(Assembler::zero, L_exit); 8932 jmpb(L_fill_8_bytes); 8933 8934 // 8935 // length is too short, just fill qwords 8936 // 8937 BIND(L_fill_8_bytes_loop); 8938 movq(Address(to, 0), xtmp); 8939 addptr(to, 8); 8940 BIND(L_fill_8_bytes); 8941 subl(count, 1 << (shift + 1)); 8942 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 8943 } 8944 } 8945 // fill trailing 4 bytes 8946 BIND(L_fill_4_bytes); 8947 testl(count, 1<<shift); 8948 jccb(Assembler::zero, L_fill_2_bytes); 8949 movl(Address(to, 0), value); 8950 if (t == T_BYTE || t == T_SHORT) { 8951 addptr(to, 4); 8952 BIND(L_fill_2_bytes); 8953 // fill trailing 2 bytes 8954 testl(count, 1<<(shift-1)); 8955 jccb(Assembler::zero, L_fill_byte); 8956 movw(Address(to, 0), value); 8957 if (t == T_BYTE) { 8958 addptr(to, 2); 8959 BIND(L_fill_byte); 8960 // fill trailing byte 8961 testl(count, 1); 8962 jccb(Assembler::zero, L_exit); 8963 movb(Address(to, 0), value); 8964 } else { 8965 BIND(L_fill_byte); 8966 } 8967 } else { 8968 BIND(L_fill_2_bytes); 8969 } 8970 BIND(L_exit); 8971 } 8972 #undef BIND 8973 #undef BLOCK_COMMENT 8974 8975 8976 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 8977 switch (cond) { 8978 // Note some conditions are synonyms for others 8979 case Assembler::zero: return Assembler::notZero; 8980 case Assembler::notZero: return Assembler::zero; 8981 case Assembler::less: return Assembler::greaterEqual; 8982 case Assembler::lessEqual: return Assembler::greater; 8983 case Assembler::greater: return Assembler::lessEqual; 8984 case Assembler::greaterEqual: return Assembler::less; 8985 case Assembler::below: return Assembler::aboveEqual; 8986 case Assembler::belowEqual: return Assembler::above; 8987 case Assembler::above: return Assembler::belowEqual; 8988 case Assembler::aboveEqual: return Assembler::below; 8989 case Assembler::overflow: return Assembler::noOverflow; 8990 case Assembler::noOverflow: return Assembler::overflow; 8991 case Assembler::negative: return Assembler::positive; 8992 case Assembler::positive: return Assembler::negative; 8993 case Assembler::parity: return Assembler::noParity; 8994 case Assembler::noParity: return Assembler::parity; 8995 } 8996 ShouldNotReachHere(); return Assembler::overflow; 8997 } 8998 8999 SkipIfEqual::SkipIfEqual( 9000 MacroAssembler* masm, const bool* flag_addr, bool value) { 9001 _masm = masm; 9002 _masm->cmp8(ExternalAddress((address)flag_addr), value); 9003 _masm->jcc(Assembler::equal, _label); 9004 } 9005 9006 SkipIfEqual::~SkipIfEqual() { 9007 _masm->bind(_label); 9008 }