1 /* 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "assembler_x86.inline.hpp" 27 #include "gc_interface/collectedHeap.inline.hpp" 28 #include "interpreter/interpreter.hpp" 29 #include "memory/cardTableModRefBS.hpp" 30 #include "memory/resourceArea.hpp" 31 #include "prims/methodHandles.hpp" 32 #include "runtime/biasedLocking.hpp" 33 #include "runtime/interfaceSupport.hpp" 34 #include "runtime/objectMonitor.hpp" 35 #include "runtime/os.hpp" 36 #include "runtime/sharedRuntime.hpp" 37 #include "runtime/stubRoutines.hpp" 38 #ifndef SERIALGC 39 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 40 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 41 #include "gc_implementation/g1/heapRegion.hpp" 42 #endif 43 44 // Implementation of AddressLiteral 45 46 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { 47 _is_lval = false; 48 _target = target; 49 switch (rtype) { 50 case relocInfo::oop_type: 51 // Oops are a special case. Normally they would be their own section 52 // but in cases like icBuffer they are literals in the code stream that 53 // we don't have a section for. We use none so that we get a literal address 54 // which is always patchable. 55 break; 56 case relocInfo::external_word_type: 57 _rspec = external_word_Relocation::spec(target); 58 break; 59 case relocInfo::internal_word_type: 60 _rspec = internal_word_Relocation::spec(target); 61 break; 62 case relocInfo::opt_virtual_call_type: 63 _rspec = opt_virtual_call_Relocation::spec(); 64 break; 65 case relocInfo::static_call_type: 66 _rspec = static_call_Relocation::spec(); 67 break; 68 case relocInfo::runtime_call_type: 69 _rspec = runtime_call_Relocation::spec(); 70 break; 71 case relocInfo::poll_type: 72 case relocInfo::poll_return_type: 73 _rspec = Relocation::spec_simple(rtype); 74 break; 75 case relocInfo::none: 76 break; 77 default: 78 ShouldNotReachHere(); 79 break; 80 } 81 } 82 83 // Implementation of Address 84 85 #ifdef _LP64 86 87 Address Address::make_array(ArrayAddress adr) { 88 // Not implementable on 64bit machines 89 // Should have been handled higher up the call chain. 90 ShouldNotReachHere(); 91 return Address(); 92 } 93 94 // exceedingly dangerous constructor 95 Address::Address(int disp, address loc, relocInfo::relocType rtype) { 96 _base = noreg; 97 _index = noreg; 98 _scale = no_scale; 99 _disp = disp; 100 switch (rtype) { 101 case relocInfo::external_word_type: 102 _rspec = external_word_Relocation::spec(loc); 103 break; 104 case relocInfo::internal_word_type: 105 _rspec = internal_word_Relocation::spec(loc); 106 break; 107 case relocInfo::runtime_call_type: 108 // HMM 109 _rspec = runtime_call_Relocation::spec(); 110 break; 111 case relocInfo::poll_type: 112 case relocInfo::poll_return_type: 113 _rspec = Relocation::spec_simple(rtype); 114 break; 115 case relocInfo::none: 116 break; 117 default: 118 ShouldNotReachHere(); 119 } 120 } 121 #else // LP64 122 123 Address Address::make_array(ArrayAddress adr) { 124 AddressLiteral base = adr.base(); 125 Address index = adr.index(); 126 assert(index._disp == 0, "must not have disp"); // maybe it can? 127 Address array(index._base, index._index, index._scale, (intptr_t) base.target()); 128 array._rspec = base._rspec; 129 return array; 130 } 131 132 // exceedingly dangerous constructor 133 Address::Address(address loc, RelocationHolder spec) { 134 _base = noreg; 135 _index = noreg; 136 _scale = no_scale; 137 _disp = (intptr_t) loc; 138 _rspec = spec; 139 } 140 141 #endif // _LP64 142 143 144 145 // Convert the raw encoding form into the form expected by the constructor for 146 // Address. An index of 4 (rsp) corresponds to having no index, so convert 147 // that to noreg for the Address constructor. 148 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) { 149 RelocationHolder rspec; 150 if (disp_is_oop) { 151 rspec = Relocation::spec_simple(relocInfo::oop_type); 152 } 153 bool valid_index = index != rsp->encoding(); 154 if (valid_index) { 155 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); 156 madr._rspec = rspec; 157 return madr; 158 } else { 159 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); 160 madr._rspec = rspec; 161 return madr; 162 } 163 } 164 165 // Implementation of Assembler 166 167 int AbstractAssembler::code_fill_byte() { 168 return (u_char)'\xF4'; // hlt 169 } 170 171 // make this go away someday 172 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { 173 if (rtype == relocInfo::none) 174 emit_long(data); 175 else emit_data(data, Relocation::spec_simple(rtype), format); 176 } 177 178 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { 179 assert(imm_operand == 0, "default format must be immediate in this file"); 180 assert(inst_mark() != NULL, "must be inside InstructionMark"); 181 if (rspec.type() != relocInfo::none) { 182 #ifdef ASSERT 183 check_relocation(rspec, format); 184 #endif 185 // Do not use AbstractAssembler::relocate, which is not intended for 186 // embedded words. Instead, relocate to the enclosing instruction. 187 188 // hack. call32 is too wide for mask so use disp32 189 if (format == call32_operand) 190 code_section()->relocate(inst_mark(), rspec, disp32_operand); 191 else 192 code_section()->relocate(inst_mark(), rspec, format); 193 } 194 emit_long(data); 195 } 196 197 static int encode(Register r) { 198 int enc = r->encoding(); 199 if (enc >= 8) { 200 enc -= 8; 201 } 202 return enc; 203 } 204 205 static int encode(XMMRegister r) { 206 int enc = r->encoding(); 207 if (enc >= 8) { 208 enc -= 8; 209 } 210 return enc; 211 } 212 213 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { 214 assert(dst->has_byte_register(), "must have byte register"); 215 assert(isByte(op1) && isByte(op2), "wrong opcode"); 216 assert(isByte(imm8), "not a byte"); 217 assert((op1 & 0x01) == 0, "should be 8bit operation"); 218 emit_byte(op1); 219 emit_byte(op2 | encode(dst)); 220 emit_byte(imm8); 221 } 222 223 224 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 225 assert(isByte(op1) && isByte(op2), "wrong opcode"); 226 assert((op1 & 0x01) == 1, "should be 32bit operation"); 227 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 228 if (is8bit(imm32)) { 229 emit_byte(op1 | 0x02); // set sign bit 230 emit_byte(op2 | encode(dst)); 231 emit_byte(imm32 & 0xFF); 232 } else { 233 emit_byte(op1); 234 emit_byte(op2 | encode(dst)); 235 emit_long(imm32); 236 } 237 } 238 239 // Force generation of a 4 byte immediate value even if it fits into 8bit 240 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) { 241 assert(isByte(op1) && isByte(op2), "wrong opcode"); 242 assert((op1 & 0x01) == 1, "should be 32bit operation"); 243 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 244 emit_byte(op1); 245 emit_byte(op2 | encode(dst)); 246 emit_long(imm32); 247 } 248 249 // immediate-to-memory forms 250 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 251 assert((op1 & 0x01) == 1, "should be 32bit operation"); 252 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 253 if (is8bit(imm32)) { 254 emit_byte(op1 | 0x02); // set sign bit 255 emit_operand(rm, adr, 1); 256 emit_byte(imm32 & 0xFF); 257 } else { 258 emit_byte(op1); 259 emit_operand(rm, adr, 4); 260 emit_long(imm32); 261 } 262 } 263 264 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) { 265 LP64_ONLY(ShouldNotReachHere()); 266 assert(isByte(op1) && isByte(op2), "wrong opcode"); 267 assert((op1 & 0x01) == 1, "should be 32bit operation"); 268 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 269 InstructionMark im(this); 270 emit_byte(op1); 271 emit_byte(op2 | encode(dst)); 272 emit_data((intptr_t)obj, relocInfo::oop_type, 0); 273 } 274 275 276 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { 277 assert(isByte(op1) && isByte(op2), "wrong opcode"); 278 emit_byte(op1); 279 emit_byte(op2 | encode(dst) << 3 | encode(src)); 280 } 281 282 283 void Assembler::emit_operand(Register reg, Register base, Register index, 284 Address::ScaleFactor scale, int disp, 285 RelocationHolder const& rspec, 286 int rip_relative_correction) { 287 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type(); 288 289 // Encode the registers as needed in the fields they are used in 290 291 int regenc = encode(reg) << 3; 292 int indexenc = index->is_valid() ? encode(index) << 3 : 0; 293 int baseenc = base->is_valid() ? encode(base) : 0; 294 295 if (base->is_valid()) { 296 if (index->is_valid()) { 297 assert(scale != Address::no_scale, "inconsistent address"); 298 // [base + index*scale + disp] 299 if (disp == 0 && rtype == relocInfo::none && 300 base != rbp LP64_ONLY(&& base != r13)) { 301 // [base + index*scale] 302 // [00 reg 100][ss index base] 303 assert(index != rsp, "illegal addressing mode"); 304 emit_byte(0x04 | regenc); 305 emit_byte(scale << 6 | indexenc | baseenc); 306 } else if (is8bit(disp) && rtype == relocInfo::none) { 307 // [base + index*scale + imm8] 308 // [01 reg 100][ss index base] imm8 309 assert(index != rsp, "illegal addressing mode"); 310 emit_byte(0x44 | regenc); 311 emit_byte(scale << 6 | indexenc | baseenc); 312 emit_byte(disp & 0xFF); 313 } else { 314 // [base + index*scale + disp32] 315 // [10 reg 100][ss index base] disp32 316 assert(index != rsp, "illegal addressing mode"); 317 emit_byte(0x84 | regenc); 318 emit_byte(scale << 6 | indexenc | baseenc); 319 emit_data(disp, rspec, disp32_operand); 320 } 321 } else if (base == rsp LP64_ONLY(|| base == r12)) { 322 // [rsp + disp] 323 if (disp == 0 && rtype == relocInfo::none) { 324 // [rsp] 325 // [00 reg 100][00 100 100] 326 emit_byte(0x04 | regenc); 327 emit_byte(0x24); 328 } else if (is8bit(disp) && rtype == relocInfo::none) { 329 // [rsp + imm8] 330 // [01 reg 100][00 100 100] disp8 331 emit_byte(0x44 | regenc); 332 emit_byte(0x24); 333 emit_byte(disp & 0xFF); 334 } else { 335 // [rsp + imm32] 336 // [10 reg 100][00 100 100] disp32 337 emit_byte(0x84 | regenc); 338 emit_byte(0x24); 339 emit_data(disp, rspec, disp32_operand); 340 } 341 } else { 342 // [base + disp] 343 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode"); 344 if (disp == 0 && rtype == relocInfo::none && 345 base != rbp LP64_ONLY(&& base != r13)) { 346 // [base] 347 // [00 reg base] 348 emit_byte(0x00 | regenc | baseenc); 349 } else if (is8bit(disp) && rtype == relocInfo::none) { 350 // [base + disp8] 351 // [01 reg base] disp8 352 emit_byte(0x40 | regenc | baseenc); 353 emit_byte(disp & 0xFF); 354 } else { 355 // [base + disp32] 356 // [10 reg base] disp32 357 emit_byte(0x80 | regenc | baseenc); 358 emit_data(disp, rspec, disp32_operand); 359 } 360 } 361 } else { 362 if (index->is_valid()) { 363 assert(scale != Address::no_scale, "inconsistent address"); 364 // [index*scale + disp] 365 // [00 reg 100][ss index 101] disp32 366 assert(index != rsp, "illegal addressing mode"); 367 emit_byte(0x04 | regenc); 368 emit_byte(scale << 6 | indexenc | 0x05); 369 emit_data(disp, rspec, disp32_operand); 370 } else if (rtype != relocInfo::none ) { 371 // [disp] (64bit) RIP-RELATIVE (32bit) abs 372 // [00 000 101] disp32 373 374 emit_byte(0x05 | regenc); 375 // Note that the RIP-rel. correction applies to the generated 376 // disp field, but _not_ to the target address in the rspec. 377 378 // disp was created by converting the target address minus the pc 379 // at the start of the instruction. That needs more correction here. 380 // intptr_t disp = target - next_ip; 381 assert(inst_mark() != NULL, "must be inside InstructionMark"); 382 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction; 383 int64_t adjusted = disp; 384 // Do rip-rel adjustment for 64bit 385 LP64_ONLY(adjusted -= (next_ip - inst_mark())); 386 assert(is_simm32(adjusted), 387 "must be 32bit offset (RIP relative address)"); 388 emit_data((int32_t) adjusted, rspec, disp32_operand); 389 390 } else { 391 // 32bit never did this, did everything as the rip-rel/disp code above 392 // [disp] ABSOLUTE 393 // [00 reg 100][00 100 101] disp32 394 emit_byte(0x04 | regenc); 395 emit_byte(0x25); 396 emit_data(disp, rspec, disp32_operand); 397 } 398 } 399 } 400 401 void Assembler::emit_operand(XMMRegister reg, Register base, Register index, 402 Address::ScaleFactor scale, int disp, 403 RelocationHolder const& rspec) { 404 emit_operand((Register)reg, base, index, scale, disp, rspec); 405 } 406 407 // Secret local extension to Assembler::WhichOperand: 408 #define end_pc_operand (_WhichOperand_limit) 409 410 address Assembler::locate_operand(address inst, WhichOperand which) { 411 // Decode the given instruction, and return the address of 412 // an embedded 32-bit operand word. 413 414 // If "which" is disp32_operand, selects the displacement portion 415 // of an effective address specifier. 416 // If "which" is imm64_operand, selects the trailing immediate constant. 417 // If "which" is call32_operand, selects the displacement of a call or jump. 418 // Caller is responsible for ensuring that there is such an operand, 419 // and that it is 32/64 bits wide. 420 421 // If "which" is end_pc_operand, find the end of the instruction. 422 423 address ip = inst; 424 bool is_64bit = false; 425 426 debug_only(bool has_disp32 = false); 427 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn 428 429 again_after_prefix: 430 switch (0xFF & *ip++) { 431 432 // These convenience macros generate groups of "case" labels for the switch. 433 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3 434 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \ 435 case (x)+4: case (x)+5: case (x)+6: case (x)+7 436 #define REP16(x) REP8((x)+0): \ 437 case REP8((x)+8) 438 439 case CS_segment: 440 case SS_segment: 441 case DS_segment: 442 case ES_segment: 443 case FS_segment: 444 case GS_segment: 445 // Seems dubious 446 LP64_ONLY(assert(false, "shouldn't have that prefix")); 447 assert(ip == inst+1, "only one prefix allowed"); 448 goto again_after_prefix; 449 450 case 0x67: 451 case REX: 452 case REX_B: 453 case REX_X: 454 case REX_XB: 455 case REX_R: 456 case REX_RB: 457 case REX_RX: 458 case REX_RXB: 459 NOT_LP64(assert(false, "64bit prefixes")); 460 goto again_after_prefix; 461 462 case REX_W: 463 case REX_WB: 464 case REX_WX: 465 case REX_WXB: 466 case REX_WR: 467 case REX_WRB: 468 case REX_WRX: 469 case REX_WRXB: 470 NOT_LP64(assert(false, "64bit prefixes")); 471 is_64bit = true; 472 goto again_after_prefix; 473 474 case 0xFF: // pushq a; decl a; incl a; call a; jmp a 475 case 0x88: // movb a, r 476 case 0x89: // movl a, r 477 case 0x8A: // movb r, a 478 case 0x8B: // movl r, a 479 case 0x8F: // popl a 480 debug_only(has_disp32 = true); 481 break; 482 483 case 0x68: // pushq #32 484 if (which == end_pc_operand) { 485 return ip + 4; 486 } 487 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate"); 488 return ip; // not produced by emit_operand 489 490 case 0x66: // movw ... (size prefix) 491 again_after_size_prefix2: 492 switch (0xFF & *ip++) { 493 case REX: 494 case REX_B: 495 case REX_X: 496 case REX_XB: 497 case REX_R: 498 case REX_RB: 499 case REX_RX: 500 case REX_RXB: 501 case REX_W: 502 case REX_WB: 503 case REX_WX: 504 case REX_WXB: 505 case REX_WR: 506 case REX_WRB: 507 case REX_WRX: 508 case REX_WRXB: 509 NOT_LP64(assert(false, "64bit prefix found")); 510 goto again_after_size_prefix2; 511 case 0x8B: // movw r, a 512 case 0x89: // movw a, r 513 debug_only(has_disp32 = true); 514 break; 515 case 0xC7: // movw a, #16 516 debug_only(has_disp32 = true); 517 tail_size = 2; // the imm16 518 break; 519 case 0x0F: // several SSE/SSE2 variants 520 ip--; // reparse the 0x0F 521 goto again_after_prefix; 522 default: 523 ShouldNotReachHere(); 524 } 525 break; 526 527 case REP8(0xB8): // movl/q r, #32/#64(oop?) 528 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4); 529 // these asserts are somewhat nonsensical 530 #ifndef _LP64 531 assert(which == imm_operand || which == disp32_operand, ""); 532 #else 533 assert((which == call32_operand || which == imm_operand) && is_64bit || 534 which == narrow_oop_operand && !is_64bit, ""); 535 #endif // _LP64 536 return ip; 537 538 case 0x69: // imul r, a, #32 539 case 0xC7: // movl a, #32(oop?) 540 tail_size = 4; 541 debug_only(has_disp32 = true); // has both kinds of operands! 542 break; 543 544 case 0x0F: // movx..., etc. 545 switch (0xFF & *ip++) { 546 case 0x3A: // pcmpestri 547 tail_size = 1; 548 case 0x38: // ptest, pmovzxbw 549 ip++; // skip opcode 550 debug_only(has_disp32 = true); // has both kinds of operands! 551 break; 552 553 case 0x70: // pshufd r, r/a, #8 554 debug_only(has_disp32 = true); // has both kinds of operands! 555 case 0x73: // psrldq r, #8 556 tail_size = 1; 557 break; 558 559 case 0x12: // movlps 560 case 0x28: // movaps 561 case 0x2E: // ucomiss 562 case 0x2F: // comiss 563 case 0x54: // andps 564 case 0x55: // andnps 565 case 0x56: // orps 566 case 0x57: // xorps 567 case 0x6E: // movd 568 case 0x7E: // movd 569 case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush 570 debug_only(has_disp32 = true); 571 break; 572 573 case 0xAD: // shrd r, a, %cl 574 case 0xAF: // imul r, a 575 case 0xBE: // movsbl r, a (movsxb) 576 case 0xBF: // movswl r, a (movsxw) 577 case 0xB6: // movzbl r, a (movzxb) 578 case 0xB7: // movzwl r, a (movzxw) 579 case REP16(0x40): // cmovl cc, r, a 580 case 0xB0: // cmpxchgb 581 case 0xB1: // cmpxchg 582 case 0xC1: // xaddl 583 case 0xC7: // cmpxchg8 584 case REP16(0x90): // setcc a 585 debug_only(has_disp32 = true); 586 // fall out of the switch to decode the address 587 break; 588 589 case 0xC4: // pinsrw r, a, #8 590 debug_only(has_disp32 = true); 591 case 0xC5: // pextrw r, r, #8 592 tail_size = 1; // the imm8 593 break; 594 595 case 0xAC: // shrd r, a, #8 596 debug_only(has_disp32 = true); 597 tail_size = 1; // the imm8 598 break; 599 600 case REP16(0x80): // jcc rdisp32 601 if (which == end_pc_operand) return ip + 4; 602 assert(which == call32_operand, "jcc has no disp32 or imm"); 603 return ip; 604 default: 605 ShouldNotReachHere(); 606 } 607 break; 608 609 case 0x81: // addl a, #32; addl r, #32 610 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 611 // on 32bit in the case of cmpl, the imm might be an oop 612 tail_size = 4; 613 debug_only(has_disp32 = true); // has both kinds of operands! 614 break; 615 616 case 0x83: // addl a, #8; addl r, #8 617 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 618 debug_only(has_disp32 = true); // has both kinds of operands! 619 tail_size = 1; 620 break; 621 622 case 0x9B: 623 switch (0xFF & *ip++) { 624 case 0xD9: // fnstcw a 625 debug_only(has_disp32 = true); 626 break; 627 default: 628 ShouldNotReachHere(); 629 } 630 break; 631 632 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a 633 case REP4(0x10): // adc... 634 case REP4(0x20): // and... 635 case REP4(0x30): // xor... 636 case REP4(0x08): // or... 637 case REP4(0x18): // sbb... 638 case REP4(0x28): // sub... 639 case 0xF7: // mull a 640 case 0x8D: // lea r, a 641 case 0x87: // xchg r, a 642 case REP4(0x38): // cmp... 643 case 0x85: // test r, a 644 debug_only(has_disp32 = true); // has both kinds of operands! 645 break; 646 647 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8 648 case 0xC6: // movb a, #8 649 case 0x80: // cmpb a, #8 650 case 0x6B: // imul r, a, #8 651 debug_only(has_disp32 = true); // has both kinds of operands! 652 tail_size = 1; // the imm8 653 break; 654 655 case 0xC4: // VEX_3bytes 656 case 0xC5: // VEX_2bytes 657 assert((UseAVX > 0), "shouldn't have VEX prefix"); 658 assert(ip == inst+1, "no prefixes allowed"); 659 // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions 660 // but they have prefix 0x0F and processed when 0x0F processed above. 661 // 662 // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES 663 // instructions (these instructions are not supported in 64-bit mode). 664 // To distinguish them bits [7:6] are set in the VEX second byte since 665 // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set 666 // those VEX bits REX and vvvv bits are inverted. 667 // 668 // Fortunately C2 doesn't generate these instructions so we don't need 669 // to check for them in product version. 670 671 // Check second byte 672 NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions")); 673 674 // First byte 675 if ((0xFF & *inst) == VEX_3bytes) { 676 ip++; // third byte 677 is_64bit = ((VEX_W & *ip) == VEX_W); 678 } 679 ip++; // opcode 680 // To find the end of instruction (which == end_pc_operand). 681 switch (0xFF & *ip) { 682 case 0x61: // pcmpestri r, r/a, #8 683 case 0x70: // pshufd r, r/a, #8 684 case 0x73: // psrldq r, #8 685 tail_size = 1; // the imm8 686 break; 687 default: 688 break; 689 } 690 ip++; // skip opcode 691 debug_only(has_disp32 = true); // has both kinds of operands! 692 break; 693 694 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 695 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 696 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a 697 case 0xDD: // fld_d a; fst_d a; fstp_d a 698 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a 699 case 0xDF: // fild_d a; fistp_d a 700 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a 701 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a 702 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a 703 debug_only(has_disp32 = true); 704 break; 705 706 case 0xE8: // call rdisp32 707 case 0xE9: // jmp rdisp32 708 if (which == end_pc_operand) return ip + 4; 709 assert(which == call32_operand, "call has no disp32 or imm"); 710 return ip; 711 712 case 0xF0: // Lock 713 assert(os::is_MP(), "only on MP"); 714 goto again_after_prefix; 715 716 case 0xF3: // For SSE 717 case 0xF2: // For SSE2 718 switch (0xFF & *ip++) { 719 case REX: 720 case REX_B: 721 case REX_X: 722 case REX_XB: 723 case REX_R: 724 case REX_RB: 725 case REX_RX: 726 case REX_RXB: 727 case REX_W: 728 case REX_WB: 729 case REX_WX: 730 case REX_WXB: 731 case REX_WR: 732 case REX_WRB: 733 case REX_WRX: 734 case REX_WRXB: 735 NOT_LP64(assert(false, "found 64bit prefix")); 736 ip++; 737 default: 738 ip++; 739 } 740 debug_only(has_disp32 = true); // has both kinds of operands! 741 break; 742 743 default: 744 ShouldNotReachHere(); 745 746 #undef REP8 747 #undef REP16 748 } 749 750 assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); 751 #ifdef _LP64 752 assert(which != imm_operand, "instruction is not a movq reg, imm64"); 753 #else 754 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field"); 755 assert(which != imm_operand || has_disp32, "instruction has no imm32 field"); 756 #endif // LP64 757 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field"); 758 759 // parse the output of emit_operand 760 int op2 = 0xFF & *ip++; 761 int base = op2 & 0x07; 762 int op3 = -1; 763 const int b100 = 4; 764 const int b101 = 5; 765 if (base == b100 && (op2 >> 6) != 3) { 766 op3 = 0xFF & *ip++; 767 base = op3 & 0x07; // refetch the base 768 } 769 // now ip points at the disp (if any) 770 771 switch (op2 >> 6) { 772 case 0: 773 // [00 reg 100][ss index base] 774 // [00 reg 100][00 100 esp] 775 // [00 reg base] 776 // [00 reg 100][ss index 101][disp32] 777 // [00 reg 101] [disp32] 778 779 if (base == b101) { 780 if (which == disp32_operand) 781 return ip; // caller wants the disp32 782 ip += 4; // skip the disp32 783 } 784 break; 785 786 case 1: 787 // [01 reg 100][ss index base][disp8] 788 // [01 reg 100][00 100 esp][disp8] 789 // [01 reg base] [disp8] 790 ip += 1; // skip the disp8 791 break; 792 793 case 2: 794 // [10 reg 100][ss index base][disp32] 795 // [10 reg 100][00 100 esp][disp32] 796 // [10 reg base] [disp32] 797 if (which == disp32_operand) 798 return ip; // caller wants the disp32 799 ip += 4; // skip the disp32 800 break; 801 802 case 3: 803 // [11 reg base] (not a memory addressing mode) 804 break; 805 } 806 807 if (which == end_pc_operand) { 808 return ip + tail_size; 809 } 810 811 #ifdef _LP64 812 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32"); 813 #else 814 assert(which == imm_operand, "instruction has only an imm field"); 815 #endif // LP64 816 return ip; 817 } 818 819 address Assembler::locate_next_instruction(address inst) { 820 // Secretly share code with locate_operand: 821 return locate_operand(inst, end_pc_operand); 822 } 823 824 825 #ifdef ASSERT 826 void Assembler::check_relocation(RelocationHolder const& rspec, int format) { 827 address inst = inst_mark(); 828 assert(inst != NULL && inst < pc(), "must point to beginning of instruction"); 829 address opnd; 830 831 Relocation* r = rspec.reloc(); 832 if (r->type() == relocInfo::none) { 833 return; 834 } else if (r->is_call() || format == call32_operand) { 835 // assert(format == imm32_operand, "cannot specify a nonzero format"); 836 opnd = locate_operand(inst, call32_operand); 837 } else if (r->is_data()) { 838 assert(format == imm_operand || format == disp32_operand 839 LP64_ONLY(|| format == narrow_oop_operand), "format ok"); 840 opnd = locate_operand(inst, (WhichOperand)format); 841 } else { 842 assert(format == imm_operand, "cannot specify a format"); 843 return; 844 } 845 assert(opnd == pc(), "must put operand where relocs can find it"); 846 } 847 #endif // ASSERT 848 849 void Assembler::emit_operand32(Register reg, Address adr) { 850 assert(reg->encoding() < 8, "no extended registers"); 851 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 852 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 853 adr._rspec); 854 } 855 856 void Assembler::emit_operand(Register reg, Address adr, 857 int rip_relative_correction) { 858 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 859 adr._rspec, 860 rip_relative_correction); 861 } 862 863 void Assembler::emit_operand(XMMRegister reg, Address adr) { 864 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 865 adr._rspec); 866 } 867 868 // MMX operations 869 void Assembler::emit_operand(MMXRegister reg, Address adr) { 870 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 871 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 872 } 873 874 // work around gcc (3.2.1-7a) bug 875 void Assembler::emit_operand(Address adr, MMXRegister reg) { 876 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 877 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 878 } 879 880 881 void Assembler::emit_farith(int b1, int b2, int i) { 882 assert(isByte(b1) && isByte(b2), "wrong opcode"); 883 assert(0 <= i && i < 8, "illegal stack offset"); 884 emit_byte(b1); 885 emit_byte(b2 + i); 886 } 887 888 889 // Now the Assembler instructions (identical for 32/64 bits) 890 891 void Assembler::adcl(Address dst, int32_t imm32) { 892 InstructionMark im(this); 893 prefix(dst); 894 emit_arith_operand(0x81, rdx, dst, imm32); 895 } 896 897 void Assembler::adcl(Address dst, Register src) { 898 InstructionMark im(this); 899 prefix(dst, src); 900 emit_byte(0x11); 901 emit_operand(src, dst); 902 } 903 904 void Assembler::adcl(Register dst, int32_t imm32) { 905 prefix(dst); 906 emit_arith(0x81, 0xD0, dst, imm32); 907 } 908 909 void Assembler::adcl(Register dst, Address src) { 910 InstructionMark im(this); 911 prefix(src, dst); 912 emit_byte(0x13); 913 emit_operand(dst, src); 914 } 915 916 void Assembler::adcl(Register dst, Register src) { 917 (void) prefix_and_encode(dst->encoding(), src->encoding()); 918 emit_arith(0x13, 0xC0, dst, src); 919 } 920 921 void Assembler::addl(Address dst, int32_t imm32) { 922 InstructionMark im(this); 923 prefix(dst); 924 emit_arith_operand(0x81, rax, dst, imm32); 925 } 926 927 void Assembler::addl(Address dst, Register src) { 928 InstructionMark im(this); 929 prefix(dst, src); 930 emit_byte(0x01); 931 emit_operand(src, dst); 932 } 933 934 void Assembler::addl(Register dst, int32_t imm32) { 935 prefix(dst); 936 emit_arith(0x81, 0xC0, dst, imm32); 937 } 938 939 void Assembler::addl(Register dst, Address src) { 940 InstructionMark im(this); 941 prefix(src, dst); 942 emit_byte(0x03); 943 emit_operand(dst, src); 944 } 945 946 void Assembler::addl(Register dst, Register src) { 947 (void) prefix_and_encode(dst->encoding(), src->encoding()); 948 emit_arith(0x03, 0xC0, dst, src); 949 } 950 951 void Assembler::addr_nop_4() { 952 assert(UseAddressNop, "no CPU support"); 953 // 4 bytes: NOP DWORD PTR [EAX+0] 954 emit_byte(0x0F); 955 emit_byte(0x1F); 956 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 957 emit_byte(0); // 8-bits offset (1 byte) 958 } 959 960 void Assembler::addr_nop_5() { 961 assert(UseAddressNop, "no CPU support"); 962 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 963 emit_byte(0x0F); 964 emit_byte(0x1F); 965 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 966 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 967 emit_byte(0); // 8-bits offset (1 byte) 968 } 969 970 void Assembler::addr_nop_7() { 971 assert(UseAddressNop, "no CPU support"); 972 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 973 emit_byte(0x0F); 974 emit_byte(0x1F); 975 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 976 emit_long(0); // 32-bits offset (4 bytes) 977 } 978 979 void Assembler::addr_nop_8() { 980 assert(UseAddressNop, "no CPU support"); 981 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 982 emit_byte(0x0F); 983 emit_byte(0x1F); 984 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 985 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 986 emit_long(0); // 32-bits offset (4 bytes) 987 } 988 989 void Assembler::addsd(XMMRegister dst, XMMRegister src) { 990 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 991 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 992 emit_byte(0x58); 993 emit_byte(0xC0 | encode); 994 } 995 996 void Assembler::addsd(XMMRegister dst, Address src) { 997 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 998 InstructionMark im(this); 999 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1000 emit_byte(0x58); 1001 emit_operand(dst, src); 1002 } 1003 1004 void Assembler::addss(XMMRegister dst, XMMRegister src) { 1005 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1006 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1007 emit_byte(0x58); 1008 emit_byte(0xC0 | encode); 1009 } 1010 1011 void Assembler::addss(XMMRegister dst, Address src) { 1012 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1013 InstructionMark im(this); 1014 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1015 emit_byte(0x58); 1016 emit_operand(dst, src); 1017 } 1018 1019 void Assembler::andl(Address dst, int32_t imm32) { 1020 InstructionMark im(this); 1021 prefix(dst); 1022 emit_byte(0x81); 1023 emit_operand(rsp, dst, 4); 1024 emit_long(imm32); 1025 } 1026 1027 void Assembler::andl(Register dst, int32_t imm32) { 1028 prefix(dst); 1029 emit_arith(0x81, 0xE0, dst, imm32); 1030 } 1031 1032 void Assembler::andl(Register dst, Address src) { 1033 InstructionMark im(this); 1034 prefix(src, dst); 1035 emit_byte(0x23); 1036 emit_operand(dst, src); 1037 } 1038 1039 void Assembler::andl(Register dst, Register src) { 1040 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1041 emit_arith(0x23, 0xC0, dst, src); 1042 } 1043 1044 void Assembler::andpd(XMMRegister dst, Address src) { 1045 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1046 InstructionMark im(this); 1047 simd_prefix(dst, dst, src, VEX_SIMD_66); 1048 emit_byte(0x54); 1049 emit_operand(dst, src); 1050 } 1051 1052 void Assembler::andpd(XMMRegister dst, XMMRegister src) { 1053 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1054 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 1055 emit_byte(0x54); 1056 emit_byte(0xC0 | encode); 1057 } 1058 1059 void Assembler::andps(XMMRegister dst, Address src) { 1060 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1061 InstructionMark im(this); 1062 simd_prefix(dst, dst, src, VEX_SIMD_NONE); 1063 emit_byte(0x54); 1064 emit_operand(dst, src); 1065 } 1066 1067 void Assembler::andps(XMMRegister dst, XMMRegister src) { 1068 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1069 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); 1070 emit_byte(0x54); 1071 emit_byte(0xC0 | encode); 1072 } 1073 1074 void Assembler::bsfl(Register dst, Register src) { 1075 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1076 emit_byte(0x0F); 1077 emit_byte(0xBC); 1078 emit_byte(0xC0 | encode); 1079 } 1080 1081 void Assembler::bsrl(Register dst, Register src) { 1082 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 1083 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1084 emit_byte(0x0F); 1085 emit_byte(0xBD); 1086 emit_byte(0xC0 | encode); 1087 } 1088 1089 void Assembler::bswapl(Register reg) { // bswap 1090 int encode = prefix_and_encode(reg->encoding()); 1091 emit_byte(0x0F); 1092 emit_byte(0xC8 | encode); 1093 } 1094 1095 void Assembler::call(Label& L, relocInfo::relocType rtype) { 1096 // suspect disp32 is always good 1097 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); 1098 1099 if (L.is_bound()) { 1100 const int long_size = 5; 1101 int offs = (int)( target(L) - pc() ); 1102 assert(offs <= 0, "assembler error"); 1103 InstructionMark im(this); 1104 // 1110 1000 #32-bit disp 1105 emit_byte(0xE8); 1106 emit_data(offs - long_size, rtype, operand); 1107 } else { 1108 InstructionMark im(this); 1109 // 1110 1000 #32-bit disp 1110 L.add_patch_at(code(), locator()); 1111 1112 emit_byte(0xE8); 1113 emit_data(int(0), rtype, operand); 1114 } 1115 } 1116 1117 void Assembler::call(Register dst) { 1118 int encode = prefix_and_encode(dst->encoding()); 1119 emit_byte(0xFF); 1120 emit_byte(0xD0 | encode); 1121 } 1122 1123 1124 void Assembler::call(Address adr) { 1125 InstructionMark im(this); 1126 prefix(adr); 1127 emit_byte(0xFF); 1128 emit_operand(rdx, adr); 1129 } 1130 1131 void Assembler::call_literal(address entry, RelocationHolder const& rspec) { 1132 assert(entry != NULL, "call most probably wrong"); 1133 InstructionMark im(this); 1134 emit_byte(0xE8); 1135 intptr_t disp = entry - (_code_pos + sizeof(int32_t)); 1136 assert(is_simm32(disp), "must be 32bit offset (call2)"); 1137 // Technically, should use call32_operand, but this format is 1138 // implied by the fact that we're emitting a call instruction. 1139 1140 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand); 1141 emit_data((int) disp, rspec, operand); 1142 } 1143 1144 void Assembler::cdql() { 1145 emit_byte(0x99); 1146 } 1147 1148 void Assembler::cmovl(Condition cc, Register dst, Register src) { 1149 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1150 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1151 emit_byte(0x0F); 1152 emit_byte(0x40 | cc); 1153 emit_byte(0xC0 | encode); 1154 } 1155 1156 1157 void Assembler::cmovl(Condition cc, Register dst, Address src) { 1158 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1159 prefix(src, dst); 1160 emit_byte(0x0F); 1161 emit_byte(0x40 | cc); 1162 emit_operand(dst, src); 1163 } 1164 1165 void Assembler::cmpb(Address dst, int imm8) { 1166 InstructionMark im(this); 1167 prefix(dst); 1168 emit_byte(0x80); 1169 emit_operand(rdi, dst, 1); 1170 emit_byte(imm8); 1171 } 1172 1173 void Assembler::cmpl(Address dst, int32_t imm32) { 1174 InstructionMark im(this); 1175 prefix(dst); 1176 emit_byte(0x81); 1177 emit_operand(rdi, dst, 4); 1178 emit_long(imm32); 1179 } 1180 1181 void Assembler::cmpl(Register dst, int32_t imm32) { 1182 prefix(dst); 1183 emit_arith(0x81, 0xF8, dst, imm32); 1184 } 1185 1186 void Assembler::cmpl(Register dst, Register src) { 1187 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1188 emit_arith(0x3B, 0xC0, dst, src); 1189 } 1190 1191 1192 void Assembler::cmpl(Register dst, Address src) { 1193 InstructionMark im(this); 1194 prefix(src, dst); 1195 emit_byte(0x3B); 1196 emit_operand(dst, src); 1197 } 1198 1199 void Assembler::cmpw(Address dst, int imm16) { 1200 InstructionMark im(this); 1201 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers"); 1202 emit_byte(0x66); 1203 emit_byte(0x81); 1204 emit_operand(rdi, dst, 2); 1205 emit_word(imm16); 1206 } 1207 1208 // The 32-bit cmpxchg compares the value at adr with the contents of rax, 1209 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1210 // The ZF is set if the compared values were equal, and cleared otherwise. 1211 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg 1212 if (Atomics & 2) { 1213 // caveat: no instructionmark, so this isn't relocatable. 1214 // Emit a synthetic, non-atomic, CAS equivalent. 1215 // Beware. The synthetic form sets all ICCs, not just ZF. 1216 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r) 1217 cmpl(rax, adr); 1218 movl(rax, adr); 1219 if (reg != rax) { 1220 Label L ; 1221 jcc(Assembler::notEqual, L); 1222 movl(adr, reg); 1223 bind(L); 1224 } 1225 } else { 1226 InstructionMark im(this); 1227 prefix(adr, reg); 1228 emit_byte(0x0F); 1229 emit_byte(0xB1); 1230 emit_operand(reg, adr); 1231 } 1232 } 1233 1234 void Assembler::comisd(XMMRegister dst, Address src) { 1235 // NOTE: dbx seems to decode this as comiss even though the 1236 // 0x66 is there. Strangly ucomisd comes out correct 1237 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1238 InstructionMark im(this); 1239 simd_prefix(dst, src, VEX_SIMD_66); 1240 emit_byte(0x2F); 1241 emit_operand(dst, src); 1242 } 1243 1244 void Assembler::comisd(XMMRegister dst, XMMRegister src) { 1245 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1246 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1247 emit_byte(0x2F); 1248 emit_byte(0xC0 | encode); 1249 } 1250 1251 void Assembler::comiss(XMMRegister dst, Address src) { 1252 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1253 InstructionMark im(this); 1254 simd_prefix(dst, src, VEX_SIMD_NONE); 1255 emit_byte(0x2F); 1256 emit_operand(dst, src); 1257 } 1258 1259 void Assembler::comiss(XMMRegister dst, XMMRegister src) { 1260 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1261 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 1262 emit_byte(0x2F); 1263 emit_byte(0xC0 | encode); 1264 } 1265 1266 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 1267 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1268 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1269 emit_byte(0xE6); 1270 emit_byte(0xC0 | encode); 1271 } 1272 1273 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 1274 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1275 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 1276 emit_byte(0x5B); 1277 emit_byte(0xC0 | encode); 1278 } 1279 1280 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 1281 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1282 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1283 emit_byte(0x5A); 1284 emit_byte(0xC0 | encode); 1285 } 1286 1287 void Assembler::cvtsd2ss(XMMRegister dst, Address src) { 1288 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1289 InstructionMark im(this); 1290 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1291 emit_byte(0x5A); 1292 emit_operand(dst, src); 1293 } 1294 1295 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { 1296 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1297 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1298 emit_byte(0x2A); 1299 emit_byte(0xC0 | encode); 1300 } 1301 1302 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { 1303 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1304 InstructionMark im(this); 1305 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1306 emit_byte(0x2A); 1307 emit_operand(dst, src); 1308 } 1309 1310 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { 1311 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1312 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1313 emit_byte(0x2A); 1314 emit_byte(0xC0 | encode); 1315 } 1316 1317 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { 1318 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1319 InstructionMark im(this); 1320 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1321 emit_byte(0x2A); 1322 emit_operand(dst, src); 1323 } 1324 1325 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { 1326 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1327 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1328 emit_byte(0x5A); 1329 emit_byte(0xC0 | encode); 1330 } 1331 1332 void Assembler::cvtss2sd(XMMRegister dst, Address src) { 1333 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1334 InstructionMark im(this); 1335 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1336 emit_byte(0x5A); 1337 emit_operand(dst, src); 1338 } 1339 1340 1341 void Assembler::cvttsd2sil(Register dst, XMMRegister src) { 1342 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1343 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); 1344 emit_byte(0x2C); 1345 emit_byte(0xC0 | encode); 1346 } 1347 1348 void Assembler::cvttss2sil(Register dst, XMMRegister src) { 1349 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1350 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1351 emit_byte(0x2C); 1352 emit_byte(0xC0 | encode); 1353 } 1354 1355 void Assembler::decl(Address dst) { 1356 // Don't use it directly. Use MacroAssembler::decrement() instead. 1357 InstructionMark im(this); 1358 prefix(dst); 1359 emit_byte(0xFF); 1360 emit_operand(rcx, dst); 1361 } 1362 1363 void Assembler::divsd(XMMRegister dst, Address src) { 1364 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1365 InstructionMark im(this); 1366 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1367 emit_byte(0x5E); 1368 emit_operand(dst, src); 1369 } 1370 1371 void Assembler::divsd(XMMRegister dst, XMMRegister src) { 1372 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1373 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1374 emit_byte(0x5E); 1375 emit_byte(0xC0 | encode); 1376 } 1377 1378 void Assembler::divss(XMMRegister dst, Address src) { 1379 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1380 InstructionMark im(this); 1381 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1382 emit_byte(0x5E); 1383 emit_operand(dst, src); 1384 } 1385 1386 void Assembler::divss(XMMRegister dst, XMMRegister src) { 1387 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1388 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1389 emit_byte(0x5E); 1390 emit_byte(0xC0 | encode); 1391 } 1392 1393 void Assembler::emms() { 1394 NOT_LP64(assert(VM_Version::supports_mmx(), "")); 1395 emit_byte(0x0F); 1396 emit_byte(0x77); 1397 } 1398 1399 void Assembler::hlt() { 1400 emit_byte(0xF4); 1401 } 1402 1403 void Assembler::idivl(Register src) { 1404 int encode = prefix_and_encode(src->encoding()); 1405 emit_byte(0xF7); 1406 emit_byte(0xF8 | encode); 1407 } 1408 1409 void Assembler::divl(Register src) { // Unsigned 1410 int encode = prefix_and_encode(src->encoding()); 1411 emit_byte(0xF7); 1412 emit_byte(0xF0 | encode); 1413 } 1414 1415 void Assembler::imull(Register dst, Register src) { 1416 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1417 emit_byte(0x0F); 1418 emit_byte(0xAF); 1419 emit_byte(0xC0 | encode); 1420 } 1421 1422 1423 void Assembler::imull(Register dst, Register src, int value) { 1424 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1425 if (is8bit(value)) { 1426 emit_byte(0x6B); 1427 emit_byte(0xC0 | encode); 1428 emit_byte(value & 0xFF); 1429 } else { 1430 emit_byte(0x69); 1431 emit_byte(0xC0 | encode); 1432 emit_long(value); 1433 } 1434 } 1435 1436 void Assembler::incl(Address dst) { 1437 // Don't use it directly. Use MacroAssembler::increment() instead. 1438 InstructionMark im(this); 1439 prefix(dst); 1440 emit_byte(0xFF); 1441 emit_operand(rax, dst); 1442 } 1443 1444 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) { 1445 InstructionMark im(this); 1446 assert((0 <= cc) && (cc < 16), "illegal cc"); 1447 if (L.is_bound()) { 1448 address dst = target(L); 1449 assert(dst != NULL, "jcc most probably wrong"); 1450 1451 const int short_size = 2; 1452 const int long_size = 6; 1453 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos; 1454 if (maybe_short && is8bit(offs - short_size)) { 1455 // 0111 tttn #8-bit disp 1456 emit_byte(0x70 | cc); 1457 emit_byte((offs - short_size) & 0xFF); 1458 } else { 1459 // 0000 1111 1000 tttn #32-bit disp 1460 assert(is_simm32(offs - long_size), 1461 "must be 32bit offset (call4)"); 1462 emit_byte(0x0F); 1463 emit_byte(0x80 | cc); 1464 emit_long(offs - long_size); 1465 } 1466 } else { 1467 // Note: could eliminate cond. jumps to this jump if condition 1468 // is the same however, seems to be rather unlikely case. 1469 // Note: use jccb() if label to be bound is very close to get 1470 // an 8-bit displacement 1471 L.add_patch_at(code(), locator()); 1472 emit_byte(0x0F); 1473 emit_byte(0x80 | cc); 1474 emit_long(0); 1475 } 1476 } 1477 1478 void Assembler::jccb(Condition cc, Label& L) { 1479 if (L.is_bound()) { 1480 const int short_size = 2; 1481 address entry = target(L); 1482 #ifdef ASSERT 1483 intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); 1484 intptr_t delta = short_branch_delta(); 1485 if (delta != 0) { 1486 dist += (dist < 0 ? (-delta) :delta); 1487 } 1488 assert(is8bit(dist), "Dispacement too large for a short jmp"); 1489 #endif 1490 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; 1491 // 0111 tttn #8-bit disp 1492 emit_byte(0x70 | cc); 1493 emit_byte((offs - short_size) & 0xFF); 1494 } else { 1495 InstructionMark im(this); 1496 L.add_patch_at(code(), locator()); 1497 emit_byte(0x70 | cc); 1498 emit_byte(0); 1499 } 1500 } 1501 1502 void Assembler::jmp(Address adr) { 1503 InstructionMark im(this); 1504 prefix(adr); 1505 emit_byte(0xFF); 1506 emit_operand(rsp, adr); 1507 } 1508 1509 void Assembler::jmp(Label& L, bool maybe_short) { 1510 if (L.is_bound()) { 1511 address entry = target(L); 1512 assert(entry != NULL, "jmp most probably wrong"); 1513 InstructionMark im(this); 1514 const int short_size = 2; 1515 const int long_size = 5; 1516 intptr_t offs = entry - _code_pos; 1517 if (maybe_short && is8bit(offs - short_size)) { 1518 emit_byte(0xEB); 1519 emit_byte((offs - short_size) & 0xFF); 1520 } else { 1521 emit_byte(0xE9); 1522 emit_long(offs - long_size); 1523 } 1524 } else { 1525 // By default, forward jumps are always 32-bit displacements, since 1526 // we can't yet know where the label will be bound. If you're sure that 1527 // the forward jump will not run beyond 256 bytes, use jmpb to 1528 // force an 8-bit displacement. 1529 InstructionMark im(this); 1530 L.add_patch_at(code(), locator()); 1531 emit_byte(0xE9); 1532 emit_long(0); 1533 } 1534 } 1535 1536 void Assembler::jmp(Register entry) { 1537 int encode = prefix_and_encode(entry->encoding()); 1538 emit_byte(0xFF); 1539 emit_byte(0xE0 | encode); 1540 } 1541 1542 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { 1543 InstructionMark im(this); 1544 emit_byte(0xE9); 1545 assert(dest != NULL, "must have a target"); 1546 intptr_t disp = dest - (_code_pos + sizeof(int32_t)); 1547 assert(is_simm32(disp), "must be 32bit offset (jmp)"); 1548 emit_data(disp, rspec.reloc(), call32_operand); 1549 } 1550 1551 void Assembler::jmpb(Label& L) { 1552 if (L.is_bound()) { 1553 const int short_size = 2; 1554 address entry = target(L); 1555 assert(entry != NULL, "jmp most probably wrong"); 1556 #ifdef ASSERT 1557 intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); 1558 intptr_t delta = short_branch_delta(); 1559 if (delta != 0) { 1560 dist += (dist < 0 ? (-delta) :delta); 1561 } 1562 assert(is8bit(dist), "Dispacement too large for a short jmp"); 1563 #endif 1564 intptr_t offs = entry - _code_pos; 1565 emit_byte(0xEB); 1566 emit_byte((offs - short_size) & 0xFF); 1567 } else { 1568 InstructionMark im(this); 1569 L.add_patch_at(code(), locator()); 1570 emit_byte(0xEB); 1571 emit_byte(0); 1572 } 1573 } 1574 1575 void Assembler::ldmxcsr( Address src) { 1576 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1577 InstructionMark im(this); 1578 prefix(src); 1579 emit_byte(0x0F); 1580 emit_byte(0xAE); 1581 emit_operand(as_Register(2), src); 1582 } 1583 1584 void Assembler::leal(Register dst, Address src) { 1585 InstructionMark im(this); 1586 #ifdef _LP64 1587 emit_byte(0x67); // addr32 1588 prefix(src, dst); 1589 #endif // LP64 1590 emit_byte(0x8D); 1591 emit_operand(dst, src); 1592 } 1593 1594 void Assembler::lock() { 1595 if (Atomics & 1) { 1596 // Emit either nothing, a NOP, or a NOP: prefix 1597 emit_byte(0x90) ; 1598 } else { 1599 emit_byte(0xF0); 1600 } 1601 } 1602 1603 void Assembler::lzcntl(Register dst, Register src) { 1604 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 1605 emit_byte(0xF3); 1606 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1607 emit_byte(0x0F); 1608 emit_byte(0xBD); 1609 emit_byte(0xC0 | encode); 1610 } 1611 1612 // Emit mfence instruction 1613 void Assembler::mfence() { 1614 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) 1615 emit_byte( 0x0F ); 1616 emit_byte( 0xAE ); 1617 emit_byte( 0xF0 ); 1618 } 1619 1620 void Assembler::mov(Register dst, Register src) { 1621 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 1622 } 1623 1624 void Assembler::movapd(XMMRegister dst, XMMRegister src) { 1625 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1626 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1627 emit_byte(0x28); 1628 emit_byte(0xC0 | encode); 1629 } 1630 1631 void Assembler::movaps(XMMRegister dst, XMMRegister src) { 1632 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1633 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 1634 emit_byte(0x28); 1635 emit_byte(0xC0 | encode); 1636 } 1637 1638 void Assembler::movb(Register dst, Address src) { 1639 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 1640 InstructionMark im(this); 1641 prefix(src, dst, true); 1642 emit_byte(0x8A); 1643 emit_operand(dst, src); 1644 } 1645 1646 1647 void Assembler::movb(Address dst, int imm8) { 1648 InstructionMark im(this); 1649 prefix(dst); 1650 emit_byte(0xC6); 1651 emit_operand(rax, dst, 1); 1652 emit_byte(imm8); 1653 } 1654 1655 1656 void Assembler::movb(Address dst, Register src) { 1657 assert(src->has_byte_register(), "must have byte register"); 1658 InstructionMark im(this); 1659 prefix(dst, src, true); 1660 emit_byte(0x88); 1661 emit_operand(src, dst); 1662 } 1663 1664 void Assembler::movdl(XMMRegister dst, Register src) { 1665 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1666 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1667 emit_byte(0x6E); 1668 emit_byte(0xC0 | encode); 1669 } 1670 1671 void Assembler::movdl(Register dst, XMMRegister src) { 1672 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1673 // swap src/dst to get correct prefix 1674 int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66); 1675 emit_byte(0x7E); 1676 emit_byte(0xC0 | encode); 1677 } 1678 1679 void Assembler::movdl(XMMRegister dst, Address src) { 1680 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1681 InstructionMark im(this); 1682 simd_prefix(dst, src, VEX_SIMD_66); 1683 emit_byte(0x6E); 1684 emit_operand(dst, src); 1685 } 1686 1687 void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 1688 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1689 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1690 emit_byte(0x6F); 1691 emit_byte(0xC0 | encode); 1692 } 1693 1694 void Assembler::movdqu(XMMRegister dst, Address src) { 1695 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1696 InstructionMark im(this); 1697 simd_prefix(dst, src, VEX_SIMD_F3); 1698 emit_byte(0x6F); 1699 emit_operand(dst, src); 1700 } 1701 1702 void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 1703 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1704 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1705 emit_byte(0x6F); 1706 emit_byte(0xC0 | encode); 1707 } 1708 1709 void Assembler::movdqu(Address dst, XMMRegister src) { 1710 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1711 InstructionMark im(this); 1712 simd_prefix(dst, src, VEX_SIMD_F3); 1713 emit_byte(0x7F); 1714 emit_operand(src, dst); 1715 } 1716 1717 // Uses zero extension on 64bit 1718 1719 void Assembler::movl(Register dst, int32_t imm32) { 1720 int encode = prefix_and_encode(dst->encoding()); 1721 emit_byte(0xB8 | encode); 1722 emit_long(imm32); 1723 } 1724 1725 void Assembler::movl(Register dst, Register src) { 1726 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1727 emit_byte(0x8B); 1728 emit_byte(0xC0 | encode); 1729 } 1730 1731 void Assembler::movl(Register dst, Address src) { 1732 InstructionMark im(this); 1733 prefix(src, dst); 1734 emit_byte(0x8B); 1735 emit_operand(dst, src); 1736 } 1737 1738 void Assembler::movl(Address dst, int32_t imm32) { 1739 InstructionMark im(this); 1740 prefix(dst); 1741 emit_byte(0xC7); 1742 emit_operand(rax, dst, 4); 1743 emit_long(imm32); 1744 } 1745 1746 void Assembler::movl(Address dst, Register src) { 1747 InstructionMark im(this); 1748 prefix(dst, src); 1749 emit_byte(0x89); 1750 emit_operand(src, dst); 1751 } 1752 1753 // New cpus require to use movsd and movss to avoid partial register stall 1754 // when loading from memory. But for old Opteron use movlpd instead of movsd. 1755 // The selection is done in MacroAssembler::movdbl() and movflt(). 1756 void Assembler::movlpd(XMMRegister dst, Address src) { 1757 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1758 InstructionMark im(this); 1759 simd_prefix(dst, dst, src, VEX_SIMD_66); 1760 emit_byte(0x12); 1761 emit_operand(dst, src); 1762 } 1763 1764 void Assembler::movq( MMXRegister dst, Address src ) { 1765 assert( VM_Version::supports_mmx(), "" ); 1766 emit_byte(0x0F); 1767 emit_byte(0x6F); 1768 emit_operand(dst, src); 1769 } 1770 1771 void Assembler::movq( Address dst, MMXRegister src ) { 1772 assert( VM_Version::supports_mmx(), "" ); 1773 emit_byte(0x0F); 1774 emit_byte(0x7F); 1775 // workaround gcc (3.2.1-7a) bug 1776 // In that version of gcc with only an emit_operand(MMX, Address) 1777 // gcc will tail jump and try and reverse the parameters completely 1778 // obliterating dst in the process. By having a version available 1779 // that doesn't need to swap the args at the tail jump the bug is 1780 // avoided. 1781 emit_operand(dst, src); 1782 } 1783 1784 void Assembler::movq(XMMRegister dst, Address src) { 1785 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1786 InstructionMark im(this); 1787 simd_prefix(dst, src, VEX_SIMD_F3); 1788 emit_byte(0x7E); 1789 emit_operand(dst, src); 1790 } 1791 1792 void Assembler::movq(Address dst, XMMRegister src) { 1793 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1794 InstructionMark im(this); 1795 simd_prefix(dst, src, VEX_SIMD_66); 1796 emit_byte(0xD6); 1797 emit_operand(src, dst); 1798 } 1799 1800 void Assembler::movsbl(Register dst, Address src) { // movsxb 1801 InstructionMark im(this); 1802 prefix(src, dst); 1803 emit_byte(0x0F); 1804 emit_byte(0xBE); 1805 emit_operand(dst, src); 1806 } 1807 1808 void Assembler::movsbl(Register dst, Register src) { // movsxb 1809 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1810 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1811 emit_byte(0x0F); 1812 emit_byte(0xBE); 1813 emit_byte(0xC0 | encode); 1814 } 1815 1816 void Assembler::movsd(XMMRegister dst, XMMRegister src) { 1817 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1818 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1819 emit_byte(0x10); 1820 emit_byte(0xC0 | encode); 1821 } 1822 1823 void Assembler::movsd(XMMRegister dst, Address src) { 1824 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1825 InstructionMark im(this); 1826 simd_prefix(dst, src, VEX_SIMD_F2); 1827 emit_byte(0x10); 1828 emit_operand(dst, src); 1829 } 1830 1831 void Assembler::movsd(Address dst, XMMRegister src) { 1832 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1833 InstructionMark im(this); 1834 simd_prefix(dst, src, VEX_SIMD_F2); 1835 emit_byte(0x11); 1836 emit_operand(src, dst); 1837 } 1838 1839 void Assembler::movss(XMMRegister dst, XMMRegister src) { 1840 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1841 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1842 emit_byte(0x10); 1843 emit_byte(0xC0 | encode); 1844 } 1845 1846 void Assembler::movss(XMMRegister dst, Address src) { 1847 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1848 InstructionMark im(this); 1849 simd_prefix(dst, src, VEX_SIMD_F3); 1850 emit_byte(0x10); 1851 emit_operand(dst, src); 1852 } 1853 1854 void Assembler::movss(Address dst, XMMRegister src) { 1855 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1856 InstructionMark im(this); 1857 simd_prefix(dst, src, VEX_SIMD_F3); 1858 emit_byte(0x11); 1859 emit_operand(src, dst); 1860 } 1861 1862 void Assembler::movswl(Register dst, Address src) { // movsxw 1863 InstructionMark im(this); 1864 prefix(src, dst); 1865 emit_byte(0x0F); 1866 emit_byte(0xBF); 1867 emit_operand(dst, src); 1868 } 1869 1870 void Assembler::movswl(Register dst, Register src) { // movsxw 1871 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1872 emit_byte(0x0F); 1873 emit_byte(0xBF); 1874 emit_byte(0xC0 | encode); 1875 } 1876 1877 void Assembler::movw(Address dst, int imm16) { 1878 InstructionMark im(this); 1879 1880 emit_byte(0x66); // switch to 16-bit mode 1881 prefix(dst); 1882 emit_byte(0xC7); 1883 emit_operand(rax, dst, 2); 1884 emit_word(imm16); 1885 } 1886 1887 void Assembler::movw(Register dst, Address src) { 1888 InstructionMark im(this); 1889 emit_byte(0x66); 1890 prefix(src, dst); 1891 emit_byte(0x8B); 1892 emit_operand(dst, src); 1893 } 1894 1895 void Assembler::movw(Address dst, Register src) { 1896 InstructionMark im(this); 1897 emit_byte(0x66); 1898 prefix(dst, src); 1899 emit_byte(0x89); 1900 emit_operand(src, dst); 1901 } 1902 1903 void Assembler::movzbl(Register dst, Address src) { // movzxb 1904 InstructionMark im(this); 1905 prefix(src, dst); 1906 emit_byte(0x0F); 1907 emit_byte(0xB6); 1908 emit_operand(dst, src); 1909 } 1910 1911 void Assembler::movzbl(Register dst, Register src) { // movzxb 1912 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1913 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1914 emit_byte(0x0F); 1915 emit_byte(0xB6); 1916 emit_byte(0xC0 | encode); 1917 } 1918 1919 void Assembler::movzwl(Register dst, Address src) { // movzxw 1920 InstructionMark im(this); 1921 prefix(src, dst); 1922 emit_byte(0x0F); 1923 emit_byte(0xB7); 1924 emit_operand(dst, src); 1925 } 1926 1927 void Assembler::movzwl(Register dst, Register src) { // movzxw 1928 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1929 emit_byte(0x0F); 1930 emit_byte(0xB7); 1931 emit_byte(0xC0 | encode); 1932 } 1933 1934 void Assembler::mull(Address src) { 1935 InstructionMark im(this); 1936 prefix(src); 1937 emit_byte(0xF7); 1938 emit_operand(rsp, src); 1939 } 1940 1941 void Assembler::mull(Register src) { 1942 int encode = prefix_and_encode(src->encoding()); 1943 emit_byte(0xF7); 1944 emit_byte(0xE0 | encode); 1945 } 1946 1947 void Assembler::mulsd(XMMRegister dst, Address src) { 1948 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1949 InstructionMark im(this); 1950 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1951 emit_byte(0x59); 1952 emit_operand(dst, src); 1953 } 1954 1955 void Assembler::mulsd(XMMRegister dst, XMMRegister src) { 1956 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1957 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1958 emit_byte(0x59); 1959 emit_byte(0xC0 | encode); 1960 } 1961 1962 void Assembler::mulss(XMMRegister dst, Address src) { 1963 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1964 InstructionMark im(this); 1965 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1966 emit_byte(0x59); 1967 emit_operand(dst, src); 1968 } 1969 1970 void Assembler::mulss(XMMRegister dst, XMMRegister src) { 1971 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1972 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1973 emit_byte(0x59); 1974 emit_byte(0xC0 | encode); 1975 } 1976 1977 void Assembler::negl(Register dst) { 1978 int encode = prefix_and_encode(dst->encoding()); 1979 emit_byte(0xF7); 1980 emit_byte(0xD8 | encode); 1981 } 1982 1983 void Assembler::nop(int i) { 1984 #ifdef ASSERT 1985 assert(i > 0, " "); 1986 // The fancy nops aren't currently recognized by debuggers making it a 1987 // pain to disassemble code while debugging. If asserts are on clearly 1988 // speed is not an issue so simply use the single byte traditional nop 1989 // to do alignment. 1990 1991 for (; i > 0 ; i--) emit_byte(0x90); 1992 return; 1993 1994 #endif // ASSERT 1995 1996 if (UseAddressNop && VM_Version::is_intel()) { 1997 // 1998 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 1999 // 1: 0x90 2000 // 2: 0x66 0x90 2001 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2002 // 4: 0x0F 0x1F 0x40 0x00 2003 // 5: 0x0F 0x1F 0x44 0x00 0x00 2004 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2005 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2006 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2007 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2008 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2009 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2010 2011 // The rest coding is Intel specific - don't use consecutive address nops 2012 2013 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2014 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2015 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2016 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2017 2018 while(i >= 15) { 2019 // For Intel don't generate consecutive addess nops (mix with regular nops) 2020 i -= 15; 2021 emit_byte(0x66); // size prefix 2022 emit_byte(0x66); // size prefix 2023 emit_byte(0x66); // size prefix 2024 addr_nop_8(); 2025 emit_byte(0x66); // size prefix 2026 emit_byte(0x66); // size prefix 2027 emit_byte(0x66); // size prefix 2028 emit_byte(0x90); // nop 2029 } 2030 switch (i) { 2031 case 14: 2032 emit_byte(0x66); // size prefix 2033 case 13: 2034 emit_byte(0x66); // size prefix 2035 case 12: 2036 addr_nop_8(); 2037 emit_byte(0x66); // size prefix 2038 emit_byte(0x66); // size prefix 2039 emit_byte(0x66); // size prefix 2040 emit_byte(0x90); // nop 2041 break; 2042 case 11: 2043 emit_byte(0x66); // size prefix 2044 case 10: 2045 emit_byte(0x66); // size prefix 2046 case 9: 2047 emit_byte(0x66); // size prefix 2048 case 8: 2049 addr_nop_8(); 2050 break; 2051 case 7: 2052 addr_nop_7(); 2053 break; 2054 case 6: 2055 emit_byte(0x66); // size prefix 2056 case 5: 2057 addr_nop_5(); 2058 break; 2059 case 4: 2060 addr_nop_4(); 2061 break; 2062 case 3: 2063 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2064 emit_byte(0x66); // size prefix 2065 case 2: 2066 emit_byte(0x66); // size prefix 2067 case 1: 2068 emit_byte(0x90); // nop 2069 break; 2070 default: 2071 assert(i == 0, " "); 2072 } 2073 return; 2074 } 2075 if (UseAddressNop && VM_Version::is_amd()) { 2076 // 2077 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD. 2078 // 1: 0x90 2079 // 2: 0x66 0x90 2080 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2081 // 4: 0x0F 0x1F 0x40 0x00 2082 // 5: 0x0F 0x1F 0x44 0x00 0x00 2083 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2084 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2085 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2086 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2087 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2088 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2089 2090 // The rest coding is AMD specific - use consecutive address nops 2091 2092 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2093 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2094 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2095 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2096 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2097 // Size prefixes (0x66) are added for larger sizes 2098 2099 while(i >= 22) { 2100 i -= 11; 2101 emit_byte(0x66); // size prefix 2102 emit_byte(0x66); // size prefix 2103 emit_byte(0x66); // size prefix 2104 addr_nop_8(); 2105 } 2106 // Generate first nop for size between 21-12 2107 switch (i) { 2108 case 21: 2109 i -= 1; 2110 emit_byte(0x66); // size prefix 2111 case 20: 2112 case 19: 2113 i -= 1; 2114 emit_byte(0x66); // size prefix 2115 case 18: 2116 case 17: 2117 i -= 1; 2118 emit_byte(0x66); // size prefix 2119 case 16: 2120 case 15: 2121 i -= 8; 2122 addr_nop_8(); 2123 break; 2124 case 14: 2125 case 13: 2126 i -= 7; 2127 addr_nop_7(); 2128 break; 2129 case 12: 2130 i -= 6; 2131 emit_byte(0x66); // size prefix 2132 addr_nop_5(); 2133 break; 2134 default: 2135 assert(i < 12, " "); 2136 } 2137 2138 // Generate second nop for size between 11-1 2139 switch (i) { 2140 case 11: 2141 emit_byte(0x66); // size prefix 2142 case 10: 2143 emit_byte(0x66); // size prefix 2144 case 9: 2145 emit_byte(0x66); // size prefix 2146 case 8: 2147 addr_nop_8(); 2148 break; 2149 case 7: 2150 addr_nop_7(); 2151 break; 2152 case 6: 2153 emit_byte(0x66); // size prefix 2154 case 5: 2155 addr_nop_5(); 2156 break; 2157 case 4: 2158 addr_nop_4(); 2159 break; 2160 case 3: 2161 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2162 emit_byte(0x66); // size prefix 2163 case 2: 2164 emit_byte(0x66); // size prefix 2165 case 1: 2166 emit_byte(0x90); // nop 2167 break; 2168 default: 2169 assert(i == 0, " "); 2170 } 2171 return; 2172 } 2173 2174 // Using nops with size prefixes "0x66 0x90". 2175 // From AMD Optimization Guide: 2176 // 1: 0x90 2177 // 2: 0x66 0x90 2178 // 3: 0x66 0x66 0x90 2179 // 4: 0x66 0x66 0x66 0x90 2180 // 5: 0x66 0x66 0x90 0x66 0x90 2181 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2182 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2183 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2184 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2185 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2186 // 2187 while(i > 12) { 2188 i -= 4; 2189 emit_byte(0x66); // size prefix 2190 emit_byte(0x66); 2191 emit_byte(0x66); 2192 emit_byte(0x90); // nop 2193 } 2194 // 1 - 12 nops 2195 if(i > 8) { 2196 if(i > 9) { 2197 i -= 1; 2198 emit_byte(0x66); 2199 } 2200 i -= 3; 2201 emit_byte(0x66); 2202 emit_byte(0x66); 2203 emit_byte(0x90); 2204 } 2205 // 1 - 8 nops 2206 if(i > 4) { 2207 if(i > 6) { 2208 i -= 1; 2209 emit_byte(0x66); 2210 } 2211 i -= 3; 2212 emit_byte(0x66); 2213 emit_byte(0x66); 2214 emit_byte(0x90); 2215 } 2216 switch (i) { 2217 case 4: 2218 emit_byte(0x66); 2219 case 3: 2220 emit_byte(0x66); 2221 case 2: 2222 emit_byte(0x66); 2223 case 1: 2224 emit_byte(0x90); 2225 break; 2226 default: 2227 assert(i == 0, " "); 2228 } 2229 } 2230 2231 void Assembler::notl(Register dst) { 2232 int encode = prefix_and_encode(dst->encoding()); 2233 emit_byte(0xF7); 2234 emit_byte(0xD0 | encode ); 2235 } 2236 2237 void Assembler::orl(Address dst, int32_t imm32) { 2238 InstructionMark im(this); 2239 prefix(dst); 2240 emit_arith_operand(0x81, rcx, dst, imm32); 2241 } 2242 2243 void Assembler::orl(Register dst, int32_t imm32) { 2244 prefix(dst); 2245 emit_arith(0x81, 0xC8, dst, imm32); 2246 } 2247 2248 void Assembler::orl(Register dst, Address src) { 2249 InstructionMark im(this); 2250 prefix(src, dst); 2251 emit_byte(0x0B); 2252 emit_operand(dst, src); 2253 } 2254 2255 void Assembler::orl(Register dst, Register src) { 2256 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2257 emit_arith(0x0B, 0xC0, dst, src); 2258 } 2259 2260 void Assembler::packuswb(XMMRegister dst, Address src) { 2261 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2262 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2263 InstructionMark im(this); 2264 simd_prefix(dst, dst, src, VEX_SIMD_66); 2265 emit_byte(0x67); 2266 emit_operand(dst, src); 2267 } 2268 2269 void Assembler::packuswb(XMMRegister dst, XMMRegister src) { 2270 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2271 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2272 emit_byte(0x67); 2273 emit_byte(0xC0 | encode); 2274 } 2275 2276 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { 2277 assert(VM_Version::supports_sse4_2(), ""); 2278 InstructionMark im(this); 2279 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2280 emit_byte(0x61); 2281 emit_operand(dst, src); 2282 emit_byte(imm8); 2283 } 2284 2285 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { 2286 assert(VM_Version::supports_sse4_2(), ""); 2287 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2288 emit_byte(0x61); 2289 emit_byte(0xC0 | encode); 2290 emit_byte(imm8); 2291 } 2292 2293 void Assembler::pmovzxbw(XMMRegister dst, Address src) { 2294 assert(VM_Version::supports_sse4_1(), ""); 2295 InstructionMark im(this); 2296 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2297 emit_byte(0x30); 2298 emit_operand(dst, src); 2299 } 2300 2301 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { 2302 assert(VM_Version::supports_sse4_1(), ""); 2303 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2304 emit_byte(0x30); 2305 emit_byte(0xC0 | encode); 2306 } 2307 2308 // generic 2309 void Assembler::pop(Register dst) { 2310 int encode = prefix_and_encode(dst->encoding()); 2311 emit_byte(0x58 | encode); 2312 } 2313 2314 void Assembler::popcntl(Register dst, Address src) { 2315 assert(VM_Version::supports_popcnt(), "must support"); 2316 InstructionMark im(this); 2317 emit_byte(0xF3); 2318 prefix(src, dst); 2319 emit_byte(0x0F); 2320 emit_byte(0xB8); 2321 emit_operand(dst, src); 2322 } 2323 2324 void Assembler::popcntl(Register dst, Register src) { 2325 assert(VM_Version::supports_popcnt(), "must support"); 2326 emit_byte(0xF3); 2327 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2328 emit_byte(0x0F); 2329 emit_byte(0xB8); 2330 emit_byte(0xC0 | encode); 2331 } 2332 2333 void Assembler::popf() { 2334 emit_byte(0x9D); 2335 } 2336 2337 #ifndef _LP64 // no 32bit push/pop on amd64 2338 void Assembler::popl(Address dst) { 2339 // NOTE: this will adjust stack by 8byte on 64bits 2340 InstructionMark im(this); 2341 prefix(dst); 2342 emit_byte(0x8F); 2343 emit_operand(rax, dst); 2344 } 2345 #endif 2346 2347 void Assembler::prefetch_prefix(Address src) { 2348 prefix(src); 2349 emit_byte(0x0F); 2350 } 2351 2352 void Assembler::prefetchnta(Address src) { 2353 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2354 InstructionMark im(this); 2355 prefetch_prefix(src); 2356 emit_byte(0x18); 2357 emit_operand(rax, src); // 0, src 2358 } 2359 2360 void Assembler::prefetchr(Address src) { 2361 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2362 InstructionMark im(this); 2363 prefetch_prefix(src); 2364 emit_byte(0x0D); 2365 emit_operand(rax, src); // 0, src 2366 } 2367 2368 void Assembler::prefetcht0(Address src) { 2369 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2370 InstructionMark im(this); 2371 prefetch_prefix(src); 2372 emit_byte(0x18); 2373 emit_operand(rcx, src); // 1, src 2374 } 2375 2376 void Assembler::prefetcht1(Address src) { 2377 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2378 InstructionMark im(this); 2379 prefetch_prefix(src); 2380 emit_byte(0x18); 2381 emit_operand(rdx, src); // 2, src 2382 } 2383 2384 void Assembler::prefetcht2(Address src) { 2385 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2386 InstructionMark im(this); 2387 prefetch_prefix(src); 2388 emit_byte(0x18); 2389 emit_operand(rbx, src); // 3, src 2390 } 2391 2392 void Assembler::prefetchw(Address src) { 2393 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2394 InstructionMark im(this); 2395 prefetch_prefix(src); 2396 emit_byte(0x0D); 2397 emit_operand(rcx, src); // 1, src 2398 } 2399 2400 void Assembler::prefix(Prefix p) { 2401 a_byte(p); 2402 } 2403 2404 void Assembler::por(XMMRegister dst, XMMRegister src) { 2405 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2406 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2407 emit_byte(0xEB); 2408 emit_byte(0xC0 | encode); 2409 } 2410 2411 void Assembler::por(XMMRegister dst, Address src) { 2412 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2413 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2414 InstructionMark im(this); 2415 simd_prefix(dst, dst, src, VEX_SIMD_66); 2416 emit_byte(0xEB); 2417 emit_operand(dst, src); 2418 } 2419 2420 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 2421 assert(isByte(mode), "invalid value"); 2422 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2423 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 2424 emit_byte(0x70); 2425 emit_byte(0xC0 | encode); 2426 emit_byte(mode & 0xFF); 2427 2428 } 2429 2430 void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 2431 assert(isByte(mode), "invalid value"); 2432 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2433 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2434 InstructionMark im(this); 2435 simd_prefix(dst, src, VEX_SIMD_66); 2436 emit_byte(0x70); 2437 emit_operand(dst, src); 2438 emit_byte(mode & 0xFF); 2439 } 2440 2441 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 2442 assert(isByte(mode), "invalid value"); 2443 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2444 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); 2445 emit_byte(0x70); 2446 emit_byte(0xC0 | encode); 2447 emit_byte(mode & 0xFF); 2448 } 2449 2450 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 2451 assert(isByte(mode), "invalid value"); 2452 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2453 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2454 InstructionMark im(this); 2455 simd_prefix(dst, src, VEX_SIMD_F2); 2456 emit_byte(0x70); 2457 emit_operand(dst, src); 2458 emit_byte(mode & 0xFF); 2459 } 2460 2461 void Assembler::psrlq(XMMRegister dst, int shift) { 2462 // Shift 64 bit value logically right by specified number of bits. 2463 // HMM Table D-1 says sse2 or mmx. 2464 // Do not confuse it with psrldq SSE2 instruction which 2465 // shifts 128 bit value in xmm register by number of bytes. 2466 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2467 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 2468 emit_byte(0x73); 2469 emit_byte(0xC0 | encode); 2470 emit_byte(shift); 2471 } 2472 2473 void Assembler::psrldq(XMMRegister dst, int shift) { 2474 // Shift 128 bit value in xmm register by number of bytes. 2475 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2476 int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66); 2477 emit_byte(0x73); 2478 emit_byte(0xC0 | encode); 2479 emit_byte(shift); 2480 } 2481 2482 void Assembler::ptest(XMMRegister dst, Address src) { 2483 assert(VM_Version::supports_sse4_1(), ""); 2484 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2485 InstructionMark im(this); 2486 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2487 emit_byte(0x17); 2488 emit_operand(dst, src); 2489 } 2490 2491 void Assembler::ptest(XMMRegister dst, XMMRegister src) { 2492 assert(VM_Version::supports_sse4_1(), ""); 2493 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2494 emit_byte(0x17); 2495 emit_byte(0xC0 | encode); 2496 } 2497 2498 void Assembler::punpcklbw(XMMRegister dst, Address src) { 2499 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2500 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2501 InstructionMark im(this); 2502 simd_prefix(dst, dst, src, VEX_SIMD_66); 2503 emit_byte(0x60); 2504 emit_operand(dst, src); 2505 } 2506 2507 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 2508 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2509 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2510 emit_byte(0x60); 2511 emit_byte(0xC0 | encode); 2512 } 2513 2514 void Assembler::punpckldq(XMMRegister dst, Address src) { 2515 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2516 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2517 InstructionMark im(this); 2518 simd_prefix(dst, dst, src, VEX_SIMD_66); 2519 emit_byte(0x62); 2520 emit_operand(dst, src); 2521 } 2522 2523 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { 2524 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2525 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2526 emit_byte(0x62); 2527 emit_byte(0xC0 | encode); 2528 } 2529 2530 void Assembler::push(int32_t imm32) { 2531 // in 64bits we push 64bits onto the stack but only 2532 // take a 32bit immediate 2533 emit_byte(0x68); 2534 emit_long(imm32); 2535 } 2536 2537 void Assembler::push(Register src) { 2538 int encode = prefix_and_encode(src->encoding()); 2539 2540 emit_byte(0x50 | encode); 2541 } 2542 2543 void Assembler::pushf() { 2544 emit_byte(0x9C); 2545 } 2546 2547 #ifndef _LP64 // no 32bit push/pop on amd64 2548 void Assembler::pushl(Address src) { 2549 // Note this will push 64bit on 64bit 2550 InstructionMark im(this); 2551 prefix(src); 2552 emit_byte(0xFF); 2553 emit_operand(rsi, src); 2554 } 2555 #endif 2556 2557 void Assembler::pxor(XMMRegister dst, Address src) { 2558 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2559 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2560 InstructionMark im(this); 2561 simd_prefix(dst, dst, src, VEX_SIMD_66); 2562 emit_byte(0xEF); 2563 emit_operand(dst, src); 2564 } 2565 2566 void Assembler::pxor(XMMRegister dst, XMMRegister src) { 2567 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2568 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2569 emit_byte(0xEF); 2570 emit_byte(0xC0 | encode); 2571 } 2572 2573 void Assembler::rcll(Register dst, int imm8) { 2574 assert(isShiftCount(imm8), "illegal shift count"); 2575 int encode = prefix_and_encode(dst->encoding()); 2576 if (imm8 == 1) { 2577 emit_byte(0xD1); 2578 emit_byte(0xD0 | encode); 2579 } else { 2580 emit_byte(0xC1); 2581 emit_byte(0xD0 | encode); 2582 emit_byte(imm8); 2583 } 2584 } 2585 2586 // copies data from [esi] to [edi] using rcx pointer sized words 2587 // generic 2588 void Assembler::rep_mov() { 2589 emit_byte(0xF3); 2590 // MOVSQ 2591 LP64_ONLY(prefix(REX_W)); 2592 emit_byte(0xA5); 2593 } 2594 2595 // sets rcx pointer sized words with rax, value at [edi] 2596 // generic 2597 void Assembler::rep_set() { // rep_set 2598 emit_byte(0xF3); 2599 // STOSQ 2600 LP64_ONLY(prefix(REX_W)); 2601 emit_byte(0xAB); 2602 } 2603 2604 // scans rcx pointer sized words at [edi] for occurance of rax, 2605 // generic 2606 void Assembler::repne_scan() { // repne_scan 2607 emit_byte(0xF2); 2608 // SCASQ 2609 LP64_ONLY(prefix(REX_W)); 2610 emit_byte(0xAF); 2611 } 2612 2613 #ifdef _LP64 2614 // scans rcx 4 byte words at [edi] for occurance of rax, 2615 // generic 2616 void Assembler::repne_scanl() { // repne_scan 2617 emit_byte(0xF2); 2618 // SCASL 2619 emit_byte(0xAF); 2620 } 2621 #endif 2622 2623 void Assembler::ret(int imm16) { 2624 if (imm16 == 0) { 2625 emit_byte(0xC3); 2626 } else { 2627 emit_byte(0xC2); 2628 emit_word(imm16); 2629 } 2630 } 2631 2632 void Assembler::sahf() { 2633 #ifdef _LP64 2634 // Not supported in 64bit mode 2635 ShouldNotReachHere(); 2636 #endif 2637 emit_byte(0x9E); 2638 } 2639 2640 void Assembler::sarl(Register dst, int imm8) { 2641 int encode = prefix_and_encode(dst->encoding()); 2642 assert(isShiftCount(imm8), "illegal shift count"); 2643 if (imm8 == 1) { 2644 emit_byte(0xD1); 2645 emit_byte(0xF8 | encode); 2646 } else { 2647 emit_byte(0xC1); 2648 emit_byte(0xF8 | encode); 2649 emit_byte(imm8); 2650 } 2651 } 2652 2653 void Assembler::sarl(Register dst) { 2654 int encode = prefix_and_encode(dst->encoding()); 2655 emit_byte(0xD3); 2656 emit_byte(0xF8 | encode); 2657 } 2658 2659 void Assembler::sbbl(Address dst, int32_t imm32) { 2660 InstructionMark im(this); 2661 prefix(dst); 2662 emit_arith_operand(0x81, rbx, dst, imm32); 2663 } 2664 2665 void Assembler::sbbl(Register dst, int32_t imm32) { 2666 prefix(dst); 2667 emit_arith(0x81, 0xD8, dst, imm32); 2668 } 2669 2670 2671 void Assembler::sbbl(Register dst, Address src) { 2672 InstructionMark im(this); 2673 prefix(src, dst); 2674 emit_byte(0x1B); 2675 emit_operand(dst, src); 2676 } 2677 2678 void Assembler::sbbl(Register dst, Register src) { 2679 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2680 emit_arith(0x1B, 0xC0, dst, src); 2681 } 2682 2683 void Assembler::setb(Condition cc, Register dst) { 2684 assert(0 <= cc && cc < 16, "illegal cc"); 2685 int encode = prefix_and_encode(dst->encoding(), true); 2686 emit_byte(0x0F); 2687 emit_byte(0x90 | cc); 2688 emit_byte(0xC0 | encode); 2689 } 2690 2691 void Assembler::shll(Register dst, int imm8) { 2692 assert(isShiftCount(imm8), "illegal shift count"); 2693 int encode = prefix_and_encode(dst->encoding()); 2694 if (imm8 == 1 ) { 2695 emit_byte(0xD1); 2696 emit_byte(0xE0 | encode); 2697 } else { 2698 emit_byte(0xC1); 2699 emit_byte(0xE0 | encode); 2700 emit_byte(imm8); 2701 } 2702 } 2703 2704 void Assembler::shll(Register dst) { 2705 int encode = prefix_and_encode(dst->encoding()); 2706 emit_byte(0xD3); 2707 emit_byte(0xE0 | encode); 2708 } 2709 2710 void Assembler::shrl(Register dst, int imm8) { 2711 assert(isShiftCount(imm8), "illegal shift count"); 2712 int encode = prefix_and_encode(dst->encoding()); 2713 emit_byte(0xC1); 2714 emit_byte(0xE8 | encode); 2715 emit_byte(imm8); 2716 } 2717 2718 void Assembler::shrl(Register dst) { 2719 int encode = prefix_and_encode(dst->encoding()); 2720 emit_byte(0xD3); 2721 emit_byte(0xE8 | encode); 2722 } 2723 2724 // copies a single word from [esi] to [edi] 2725 void Assembler::smovl() { 2726 emit_byte(0xA5); 2727 } 2728 2729 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { 2730 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2731 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 2732 emit_byte(0x51); 2733 emit_byte(0xC0 | encode); 2734 } 2735 2736 void Assembler::sqrtsd(XMMRegister dst, Address src) { 2737 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2738 InstructionMark im(this); 2739 simd_prefix(dst, dst, src, VEX_SIMD_F2); 2740 emit_byte(0x51); 2741 emit_operand(dst, src); 2742 } 2743 2744 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { 2745 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2746 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 2747 emit_byte(0x51); 2748 emit_byte(0xC0 | encode); 2749 } 2750 2751 void Assembler::sqrtss(XMMRegister dst, Address src) { 2752 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2753 InstructionMark im(this); 2754 simd_prefix(dst, dst, src, VEX_SIMD_F3); 2755 emit_byte(0x51); 2756 emit_operand(dst, src); 2757 } 2758 2759 void Assembler::stmxcsr( Address dst) { 2760 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2761 InstructionMark im(this); 2762 prefix(dst); 2763 emit_byte(0x0F); 2764 emit_byte(0xAE); 2765 emit_operand(as_Register(3), dst); 2766 } 2767 2768 void Assembler::subl(Address dst, int32_t imm32) { 2769 InstructionMark im(this); 2770 prefix(dst); 2771 emit_arith_operand(0x81, rbp, dst, imm32); 2772 } 2773 2774 void Assembler::subl(Address dst, Register src) { 2775 InstructionMark im(this); 2776 prefix(dst, src); 2777 emit_byte(0x29); 2778 emit_operand(src, dst); 2779 } 2780 2781 void Assembler::subl(Register dst, int32_t imm32) { 2782 prefix(dst); 2783 emit_arith(0x81, 0xE8, dst, imm32); 2784 } 2785 2786 // Force generation of a 4 byte immediate value even if it fits into 8bit 2787 void Assembler::subl_imm32(Register dst, int32_t imm32) { 2788 prefix(dst); 2789 emit_arith_imm32(0x81, 0xE8, dst, imm32); 2790 } 2791 2792 void Assembler::subl(Register dst, Address src) { 2793 InstructionMark im(this); 2794 prefix(src, dst); 2795 emit_byte(0x2B); 2796 emit_operand(dst, src); 2797 } 2798 2799 void Assembler::subl(Register dst, Register src) { 2800 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2801 emit_arith(0x2B, 0xC0, dst, src); 2802 } 2803 2804 void Assembler::subsd(XMMRegister dst, XMMRegister src) { 2805 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2806 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 2807 emit_byte(0x5C); 2808 emit_byte(0xC0 | encode); 2809 } 2810 2811 void Assembler::subsd(XMMRegister dst, Address src) { 2812 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2813 InstructionMark im(this); 2814 simd_prefix(dst, dst, src, VEX_SIMD_F2); 2815 emit_byte(0x5C); 2816 emit_operand(dst, src); 2817 } 2818 2819 void Assembler::subss(XMMRegister dst, XMMRegister src) { 2820 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2821 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 2822 emit_byte(0x5C); 2823 emit_byte(0xC0 | encode); 2824 } 2825 2826 void Assembler::subss(XMMRegister dst, Address src) { 2827 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2828 InstructionMark im(this); 2829 simd_prefix(dst, dst, src, VEX_SIMD_F3); 2830 emit_byte(0x5C); 2831 emit_operand(dst, src); 2832 } 2833 2834 void Assembler::testb(Register dst, int imm8) { 2835 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 2836 (void) prefix_and_encode(dst->encoding(), true); 2837 emit_arith_b(0xF6, 0xC0, dst, imm8); 2838 } 2839 2840 void Assembler::testl(Register dst, int32_t imm32) { 2841 // not using emit_arith because test 2842 // doesn't support sign-extension of 2843 // 8bit operands 2844 int encode = dst->encoding(); 2845 if (encode == 0) { 2846 emit_byte(0xA9); 2847 } else { 2848 encode = prefix_and_encode(encode); 2849 emit_byte(0xF7); 2850 emit_byte(0xC0 | encode); 2851 } 2852 emit_long(imm32); 2853 } 2854 2855 void Assembler::testl(Register dst, Register src) { 2856 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2857 emit_arith(0x85, 0xC0, dst, src); 2858 } 2859 2860 void Assembler::testl(Register dst, Address src) { 2861 InstructionMark im(this); 2862 prefix(src, dst); 2863 emit_byte(0x85); 2864 emit_operand(dst, src); 2865 } 2866 2867 void Assembler::ucomisd(XMMRegister dst, Address src) { 2868 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2869 InstructionMark im(this); 2870 simd_prefix(dst, src, VEX_SIMD_66); 2871 emit_byte(0x2E); 2872 emit_operand(dst, src); 2873 } 2874 2875 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { 2876 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2877 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 2878 emit_byte(0x2E); 2879 emit_byte(0xC0 | encode); 2880 } 2881 2882 void Assembler::ucomiss(XMMRegister dst, Address src) { 2883 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2884 InstructionMark im(this); 2885 simd_prefix(dst, src, VEX_SIMD_NONE); 2886 emit_byte(0x2E); 2887 emit_operand(dst, src); 2888 } 2889 2890 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { 2891 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2892 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 2893 emit_byte(0x2E); 2894 emit_byte(0xC0 | encode); 2895 } 2896 2897 2898 void Assembler::xaddl(Address dst, Register src) { 2899 InstructionMark im(this); 2900 prefix(dst, src); 2901 emit_byte(0x0F); 2902 emit_byte(0xC1); 2903 emit_operand(src, dst); 2904 } 2905 2906 void Assembler::xchgl(Register dst, Address src) { // xchg 2907 InstructionMark im(this); 2908 prefix(src, dst); 2909 emit_byte(0x87); 2910 emit_operand(dst, src); 2911 } 2912 2913 void Assembler::xchgl(Register dst, Register src) { 2914 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2915 emit_byte(0x87); 2916 emit_byte(0xc0 | encode); 2917 } 2918 2919 void Assembler::xorl(Register dst, int32_t imm32) { 2920 prefix(dst); 2921 emit_arith(0x81, 0xF0, dst, imm32); 2922 } 2923 2924 void Assembler::xorl(Register dst, Address src) { 2925 InstructionMark im(this); 2926 prefix(src, dst); 2927 emit_byte(0x33); 2928 emit_operand(dst, src); 2929 } 2930 2931 void Assembler::xorl(Register dst, Register src) { 2932 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2933 emit_arith(0x33, 0xC0, dst, src); 2934 } 2935 2936 void Assembler::xorpd(XMMRegister dst, XMMRegister src) { 2937 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2938 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2939 emit_byte(0x57); 2940 emit_byte(0xC0 | encode); 2941 } 2942 2943 void Assembler::xorpd(XMMRegister dst, Address src) { 2944 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2945 InstructionMark im(this); 2946 simd_prefix(dst, dst, src, VEX_SIMD_66); 2947 emit_byte(0x57); 2948 emit_operand(dst, src); 2949 } 2950 2951 2952 void Assembler::xorps(XMMRegister dst, XMMRegister src) { 2953 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2954 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); 2955 emit_byte(0x57); 2956 emit_byte(0xC0 | encode); 2957 } 2958 2959 void Assembler::xorps(XMMRegister dst, Address src) { 2960 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2961 InstructionMark im(this); 2962 simd_prefix(dst, dst, src, VEX_SIMD_NONE); 2963 emit_byte(0x57); 2964 emit_operand(dst, src); 2965 } 2966 2967 // AVX 3-operands non destructive source instructions (encoded with VEX prefix) 2968 2969 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { 2970 assert(VM_Version::supports_avx(), ""); 2971 InstructionMark im(this); 2972 vex_prefix(dst, nds, src, VEX_SIMD_F2); 2973 emit_byte(0x58); 2974 emit_operand(dst, src); 2975 } 2976 2977 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2978 assert(VM_Version::supports_avx(), ""); 2979 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 2980 emit_byte(0x58); 2981 emit_byte(0xC0 | encode); 2982 } 2983 2984 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { 2985 assert(VM_Version::supports_avx(), ""); 2986 InstructionMark im(this); 2987 vex_prefix(dst, nds, src, VEX_SIMD_F3); 2988 emit_byte(0x58); 2989 emit_operand(dst, src); 2990 } 2991 2992 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2993 assert(VM_Version::supports_avx(), ""); 2994 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 2995 emit_byte(0x58); 2996 emit_byte(0xC0 | encode); 2997 } 2998 2999 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) { 3000 assert(VM_Version::supports_avx(), ""); 3001 InstructionMark im(this); 3002 vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector 3003 emit_byte(0x54); 3004 emit_operand(dst, src); 3005 } 3006 3007 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) { 3008 assert(VM_Version::supports_avx(), ""); 3009 InstructionMark im(this); 3010 vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector 3011 emit_byte(0x54); 3012 emit_operand(dst, src); 3013 } 3014 3015 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { 3016 assert(VM_Version::supports_avx(), ""); 3017 InstructionMark im(this); 3018 vex_prefix(dst, nds, src, VEX_SIMD_F2); 3019 emit_byte(0x5E); 3020 emit_operand(dst, src); 3021 } 3022 3023 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3024 assert(VM_Version::supports_avx(), ""); 3025 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 3026 emit_byte(0x5E); 3027 emit_byte(0xC0 | encode); 3028 } 3029 3030 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { 3031 assert(VM_Version::supports_avx(), ""); 3032 InstructionMark im(this); 3033 vex_prefix(dst, nds, src, VEX_SIMD_F3); 3034 emit_byte(0x5E); 3035 emit_operand(dst, src); 3036 } 3037 3038 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3039 assert(VM_Version::supports_avx(), ""); 3040 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 3041 emit_byte(0x5E); 3042 emit_byte(0xC0 | encode); 3043 } 3044 3045 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { 3046 assert(VM_Version::supports_avx(), ""); 3047 InstructionMark im(this); 3048 vex_prefix(dst, nds, src, VEX_SIMD_F2); 3049 emit_byte(0x59); 3050 emit_operand(dst, src); 3051 } 3052 3053 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3054 assert(VM_Version::supports_avx(), ""); 3055 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 3056 emit_byte(0x59); 3057 emit_byte(0xC0 | encode); 3058 } 3059 3060 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { 3061 InstructionMark im(this); 3062 vex_prefix(dst, nds, src, VEX_SIMD_F3); 3063 emit_byte(0x59); 3064 emit_operand(dst, src); 3065 } 3066 3067 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3068 assert(VM_Version::supports_avx(), ""); 3069 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 3070 emit_byte(0x59); 3071 emit_byte(0xC0 | encode); 3072 } 3073 3074 3075 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { 3076 assert(VM_Version::supports_avx(), ""); 3077 InstructionMark im(this); 3078 vex_prefix(dst, nds, src, VEX_SIMD_F2); 3079 emit_byte(0x5C); 3080 emit_operand(dst, src); 3081 } 3082 3083 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3084 assert(VM_Version::supports_avx(), ""); 3085 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 3086 emit_byte(0x5C); 3087 emit_byte(0xC0 | encode); 3088 } 3089 3090 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { 3091 assert(VM_Version::supports_avx(), ""); 3092 InstructionMark im(this); 3093 vex_prefix(dst, nds, src, VEX_SIMD_F3); 3094 emit_byte(0x5C); 3095 emit_operand(dst, src); 3096 } 3097 3098 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3099 assert(VM_Version::supports_avx(), ""); 3100 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 3101 emit_byte(0x5C); 3102 emit_byte(0xC0 | encode); 3103 } 3104 3105 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) { 3106 assert(VM_Version::supports_avx(), ""); 3107 InstructionMark im(this); 3108 vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector 3109 emit_byte(0x57); 3110 emit_operand(dst, src); 3111 } 3112 3113 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) { 3114 assert(VM_Version::supports_avx(), ""); 3115 InstructionMark im(this); 3116 vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector 3117 emit_byte(0x57); 3118 emit_operand(dst, src); 3119 } 3120 3121 3122 #ifndef _LP64 3123 // 32bit only pieces of the assembler 3124 3125 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { 3126 // NO PREFIX AS NEVER 64BIT 3127 InstructionMark im(this); 3128 emit_byte(0x81); 3129 emit_byte(0xF8 | src1->encoding()); 3130 emit_data(imm32, rspec, 0); 3131 } 3132 3133 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { 3134 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs 3135 InstructionMark im(this); 3136 emit_byte(0x81); 3137 emit_operand(rdi, src1); 3138 emit_data(imm32, rspec, 0); 3139 } 3140 3141 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax, 3142 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded 3143 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. 3144 void Assembler::cmpxchg8(Address adr) { 3145 InstructionMark im(this); 3146 emit_byte(0x0F); 3147 emit_byte(0xc7); 3148 emit_operand(rcx, adr); 3149 } 3150 3151 void Assembler::decl(Register dst) { 3152 // Don't use it directly. Use MacroAssembler::decrementl() instead. 3153 emit_byte(0x48 | dst->encoding()); 3154 } 3155 3156 #endif // _LP64 3157 3158 // 64bit typically doesn't use the x87 but needs to for the trig funcs 3159 3160 void Assembler::fabs() { 3161 emit_byte(0xD9); 3162 emit_byte(0xE1); 3163 } 3164 3165 void Assembler::fadd(int i) { 3166 emit_farith(0xD8, 0xC0, i); 3167 } 3168 3169 void Assembler::fadd_d(Address src) { 3170 InstructionMark im(this); 3171 emit_byte(0xDC); 3172 emit_operand32(rax, src); 3173 } 3174 3175 void Assembler::fadd_s(Address src) { 3176 InstructionMark im(this); 3177 emit_byte(0xD8); 3178 emit_operand32(rax, src); 3179 } 3180 3181 void Assembler::fadda(int i) { 3182 emit_farith(0xDC, 0xC0, i); 3183 } 3184 3185 void Assembler::faddp(int i) { 3186 emit_farith(0xDE, 0xC0, i); 3187 } 3188 3189 void Assembler::fchs() { 3190 emit_byte(0xD9); 3191 emit_byte(0xE0); 3192 } 3193 3194 void Assembler::fcom(int i) { 3195 emit_farith(0xD8, 0xD0, i); 3196 } 3197 3198 void Assembler::fcomp(int i) { 3199 emit_farith(0xD8, 0xD8, i); 3200 } 3201 3202 void Assembler::fcomp_d(Address src) { 3203 InstructionMark im(this); 3204 emit_byte(0xDC); 3205 emit_operand32(rbx, src); 3206 } 3207 3208 void Assembler::fcomp_s(Address src) { 3209 InstructionMark im(this); 3210 emit_byte(0xD8); 3211 emit_operand32(rbx, src); 3212 } 3213 3214 void Assembler::fcompp() { 3215 emit_byte(0xDE); 3216 emit_byte(0xD9); 3217 } 3218 3219 void Assembler::fcos() { 3220 emit_byte(0xD9); 3221 emit_byte(0xFF); 3222 } 3223 3224 void Assembler::fdecstp() { 3225 emit_byte(0xD9); 3226 emit_byte(0xF6); 3227 } 3228 3229 void Assembler::fdiv(int i) { 3230 emit_farith(0xD8, 0xF0, i); 3231 } 3232 3233 void Assembler::fdiv_d(Address src) { 3234 InstructionMark im(this); 3235 emit_byte(0xDC); 3236 emit_operand32(rsi, src); 3237 } 3238 3239 void Assembler::fdiv_s(Address src) { 3240 InstructionMark im(this); 3241 emit_byte(0xD8); 3242 emit_operand32(rsi, src); 3243 } 3244 3245 void Assembler::fdiva(int i) { 3246 emit_farith(0xDC, 0xF8, i); 3247 } 3248 3249 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) 3250 // is erroneous for some of the floating-point instructions below. 3251 3252 void Assembler::fdivp(int i) { 3253 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) 3254 } 3255 3256 void Assembler::fdivr(int i) { 3257 emit_farith(0xD8, 0xF8, i); 3258 } 3259 3260 void Assembler::fdivr_d(Address src) { 3261 InstructionMark im(this); 3262 emit_byte(0xDC); 3263 emit_operand32(rdi, src); 3264 } 3265 3266 void Assembler::fdivr_s(Address src) { 3267 InstructionMark im(this); 3268 emit_byte(0xD8); 3269 emit_operand32(rdi, src); 3270 } 3271 3272 void Assembler::fdivra(int i) { 3273 emit_farith(0xDC, 0xF0, i); 3274 } 3275 3276 void Assembler::fdivrp(int i) { 3277 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) 3278 } 3279 3280 void Assembler::ffree(int i) { 3281 emit_farith(0xDD, 0xC0, i); 3282 } 3283 3284 void Assembler::fild_d(Address adr) { 3285 InstructionMark im(this); 3286 emit_byte(0xDF); 3287 emit_operand32(rbp, adr); 3288 } 3289 3290 void Assembler::fild_s(Address adr) { 3291 InstructionMark im(this); 3292 emit_byte(0xDB); 3293 emit_operand32(rax, adr); 3294 } 3295 3296 void Assembler::fincstp() { 3297 emit_byte(0xD9); 3298 emit_byte(0xF7); 3299 } 3300 3301 void Assembler::finit() { 3302 emit_byte(0x9B); 3303 emit_byte(0xDB); 3304 emit_byte(0xE3); 3305 } 3306 3307 void Assembler::fist_s(Address adr) { 3308 InstructionMark im(this); 3309 emit_byte(0xDB); 3310 emit_operand32(rdx, adr); 3311 } 3312 3313 void Assembler::fistp_d(Address adr) { 3314 InstructionMark im(this); 3315 emit_byte(0xDF); 3316 emit_operand32(rdi, adr); 3317 } 3318 3319 void Assembler::fistp_s(Address adr) { 3320 InstructionMark im(this); 3321 emit_byte(0xDB); 3322 emit_operand32(rbx, adr); 3323 } 3324 3325 void Assembler::fld1() { 3326 emit_byte(0xD9); 3327 emit_byte(0xE8); 3328 } 3329 3330 void Assembler::fld_d(Address adr) { 3331 InstructionMark im(this); 3332 emit_byte(0xDD); 3333 emit_operand32(rax, adr); 3334 } 3335 3336 void Assembler::fld_s(Address adr) { 3337 InstructionMark im(this); 3338 emit_byte(0xD9); 3339 emit_operand32(rax, adr); 3340 } 3341 3342 3343 void Assembler::fld_s(int index) { 3344 emit_farith(0xD9, 0xC0, index); 3345 } 3346 3347 void Assembler::fld_x(Address adr) { 3348 InstructionMark im(this); 3349 emit_byte(0xDB); 3350 emit_operand32(rbp, adr); 3351 } 3352 3353 void Assembler::fldcw(Address src) { 3354 InstructionMark im(this); 3355 emit_byte(0xd9); 3356 emit_operand32(rbp, src); 3357 } 3358 3359 void Assembler::fldenv(Address src) { 3360 InstructionMark im(this); 3361 emit_byte(0xD9); 3362 emit_operand32(rsp, src); 3363 } 3364 3365 void Assembler::fldlg2() { 3366 emit_byte(0xD9); 3367 emit_byte(0xEC); 3368 } 3369 3370 void Assembler::fldln2() { 3371 emit_byte(0xD9); 3372 emit_byte(0xED); 3373 } 3374 3375 void Assembler::fldz() { 3376 emit_byte(0xD9); 3377 emit_byte(0xEE); 3378 } 3379 3380 void Assembler::flog() { 3381 fldln2(); 3382 fxch(); 3383 fyl2x(); 3384 } 3385 3386 void Assembler::flog10() { 3387 fldlg2(); 3388 fxch(); 3389 fyl2x(); 3390 } 3391 3392 void Assembler::fmul(int i) { 3393 emit_farith(0xD8, 0xC8, i); 3394 } 3395 3396 void Assembler::fmul_d(Address src) { 3397 InstructionMark im(this); 3398 emit_byte(0xDC); 3399 emit_operand32(rcx, src); 3400 } 3401 3402 void Assembler::fmul_s(Address src) { 3403 InstructionMark im(this); 3404 emit_byte(0xD8); 3405 emit_operand32(rcx, src); 3406 } 3407 3408 void Assembler::fmula(int i) { 3409 emit_farith(0xDC, 0xC8, i); 3410 } 3411 3412 void Assembler::fmulp(int i) { 3413 emit_farith(0xDE, 0xC8, i); 3414 } 3415 3416 void Assembler::fnsave(Address dst) { 3417 InstructionMark im(this); 3418 emit_byte(0xDD); 3419 emit_operand32(rsi, dst); 3420 } 3421 3422 void Assembler::fnstcw(Address src) { 3423 InstructionMark im(this); 3424 emit_byte(0x9B); 3425 emit_byte(0xD9); 3426 emit_operand32(rdi, src); 3427 } 3428 3429 void Assembler::fnstsw_ax() { 3430 emit_byte(0xdF); 3431 emit_byte(0xE0); 3432 } 3433 3434 void Assembler::fprem() { 3435 emit_byte(0xD9); 3436 emit_byte(0xF8); 3437 } 3438 3439 void Assembler::fprem1() { 3440 emit_byte(0xD9); 3441 emit_byte(0xF5); 3442 } 3443 3444 void Assembler::frstor(Address src) { 3445 InstructionMark im(this); 3446 emit_byte(0xDD); 3447 emit_operand32(rsp, src); 3448 } 3449 3450 void Assembler::fsin() { 3451 emit_byte(0xD9); 3452 emit_byte(0xFE); 3453 } 3454 3455 void Assembler::fsqrt() { 3456 emit_byte(0xD9); 3457 emit_byte(0xFA); 3458 } 3459 3460 void Assembler::fst_d(Address adr) { 3461 InstructionMark im(this); 3462 emit_byte(0xDD); 3463 emit_operand32(rdx, adr); 3464 } 3465 3466 void Assembler::fst_s(Address adr) { 3467 InstructionMark im(this); 3468 emit_byte(0xD9); 3469 emit_operand32(rdx, adr); 3470 } 3471 3472 void Assembler::fstp_d(Address adr) { 3473 InstructionMark im(this); 3474 emit_byte(0xDD); 3475 emit_operand32(rbx, adr); 3476 } 3477 3478 void Assembler::fstp_d(int index) { 3479 emit_farith(0xDD, 0xD8, index); 3480 } 3481 3482 void Assembler::fstp_s(Address adr) { 3483 InstructionMark im(this); 3484 emit_byte(0xD9); 3485 emit_operand32(rbx, adr); 3486 } 3487 3488 void Assembler::fstp_x(Address adr) { 3489 InstructionMark im(this); 3490 emit_byte(0xDB); 3491 emit_operand32(rdi, adr); 3492 } 3493 3494 void Assembler::fsub(int i) { 3495 emit_farith(0xD8, 0xE0, i); 3496 } 3497 3498 void Assembler::fsub_d(Address src) { 3499 InstructionMark im(this); 3500 emit_byte(0xDC); 3501 emit_operand32(rsp, src); 3502 } 3503 3504 void Assembler::fsub_s(Address src) { 3505 InstructionMark im(this); 3506 emit_byte(0xD8); 3507 emit_operand32(rsp, src); 3508 } 3509 3510 void Assembler::fsuba(int i) { 3511 emit_farith(0xDC, 0xE8, i); 3512 } 3513 3514 void Assembler::fsubp(int i) { 3515 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) 3516 } 3517 3518 void Assembler::fsubr(int i) { 3519 emit_farith(0xD8, 0xE8, i); 3520 } 3521 3522 void Assembler::fsubr_d(Address src) { 3523 InstructionMark im(this); 3524 emit_byte(0xDC); 3525 emit_operand32(rbp, src); 3526 } 3527 3528 void Assembler::fsubr_s(Address src) { 3529 InstructionMark im(this); 3530 emit_byte(0xD8); 3531 emit_operand32(rbp, src); 3532 } 3533 3534 void Assembler::fsubra(int i) { 3535 emit_farith(0xDC, 0xE0, i); 3536 } 3537 3538 void Assembler::fsubrp(int i) { 3539 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) 3540 } 3541 3542 void Assembler::ftan() { 3543 emit_byte(0xD9); 3544 emit_byte(0xF2); 3545 emit_byte(0xDD); 3546 emit_byte(0xD8); 3547 } 3548 3549 void Assembler::ftst() { 3550 emit_byte(0xD9); 3551 emit_byte(0xE4); 3552 } 3553 3554 void Assembler::fucomi(int i) { 3555 // make sure the instruction is supported (introduced for P6, together with cmov) 3556 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3557 emit_farith(0xDB, 0xE8, i); 3558 } 3559 3560 void Assembler::fucomip(int i) { 3561 // make sure the instruction is supported (introduced for P6, together with cmov) 3562 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3563 emit_farith(0xDF, 0xE8, i); 3564 } 3565 3566 void Assembler::fwait() { 3567 emit_byte(0x9B); 3568 } 3569 3570 void Assembler::fxch(int i) { 3571 emit_farith(0xD9, 0xC8, i); 3572 } 3573 3574 void Assembler::fyl2x() { 3575 emit_byte(0xD9); 3576 emit_byte(0xF1); 3577 } 3578 3579 void Assembler::frndint() { 3580 emit_byte(0xD9); 3581 emit_byte(0xFC); 3582 } 3583 3584 void Assembler::f2xm1() { 3585 emit_byte(0xD9); 3586 emit_byte(0xF0); 3587 } 3588 3589 void Assembler::fldl2e() { 3590 emit_byte(0xD9); 3591 emit_byte(0xEA); 3592 } 3593 3594 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. 3595 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; 3596 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding. 3597 static int simd_opc[4] = { 0, 0, 0x38, 0x3A }; 3598 3599 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding. 3600 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 3601 if (pre > 0) { 3602 emit_byte(simd_pre[pre]); 3603 } 3604 if (rex_w) { 3605 prefixq(adr, xreg); 3606 } else { 3607 prefix(adr, xreg); 3608 } 3609 if (opc > 0) { 3610 emit_byte(0x0F); 3611 int opc2 = simd_opc[opc]; 3612 if (opc2 > 0) { 3613 emit_byte(opc2); 3614 } 3615 } 3616 } 3617 3618 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 3619 if (pre > 0) { 3620 emit_byte(simd_pre[pre]); 3621 } 3622 int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : 3623 prefix_and_encode(dst_enc, src_enc); 3624 if (opc > 0) { 3625 emit_byte(0x0F); 3626 int opc2 = simd_opc[opc]; 3627 if (opc2 > 0) { 3628 emit_byte(opc2); 3629 } 3630 } 3631 return encode; 3632 } 3633 3634 3635 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) { 3636 if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { 3637 prefix(VEX_3bytes); 3638 3639 int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0); 3640 byte1 = (~byte1) & 0xE0; 3641 byte1 |= opc; 3642 a_byte(byte1); 3643 3644 int byte2 = ((~nds_enc) & 0xf) << 3; 3645 byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre; 3646 emit_byte(byte2); 3647 } else { 3648 prefix(VEX_2bytes); 3649 3650 int byte1 = vex_r ? VEX_R : 0; 3651 byte1 = (~byte1) & 0x80; 3652 byte1 |= ((~nds_enc) & 0xf) << 3; 3653 byte1 |= (vector256 ? 4 : 0) | pre; 3654 emit_byte(byte1); 3655 } 3656 } 3657 3658 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){ 3659 bool vex_r = (xreg_enc >= 8); 3660 bool vex_b = adr.base_needs_rex(); 3661 bool vex_x = adr.index_needs_rex(); 3662 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 3663 } 3664 3665 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) { 3666 bool vex_r = (dst_enc >= 8); 3667 bool vex_b = (src_enc >= 8); 3668 bool vex_x = false; 3669 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 3670 return (((dst_enc & 7) << 3) | (src_enc & 7)); 3671 } 3672 3673 3674 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 3675 if (UseAVX > 0) { 3676 int xreg_enc = xreg->encoding(); 3677 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 3678 vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256); 3679 } else { 3680 assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); 3681 rex_prefix(adr, xreg, pre, opc, rex_w); 3682 } 3683 } 3684 3685 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 3686 int dst_enc = dst->encoding(); 3687 int src_enc = src->encoding(); 3688 if (UseAVX > 0) { 3689 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 3690 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256); 3691 } else { 3692 assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); 3693 return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); 3694 } 3695 } 3696 3697 #ifndef _LP64 3698 3699 void Assembler::incl(Register dst) { 3700 // Don't use it directly. Use MacroAssembler::incrementl() instead. 3701 emit_byte(0x40 | dst->encoding()); 3702 } 3703 3704 void Assembler::lea(Register dst, Address src) { 3705 leal(dst, src); 3706 } 3707 3708 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { 3709 InstructionMark im(this); 3710 emit_byte(0xC7); 3711 emit_operand(rax, dst); 3712 emit_data((int)imm32, rspec, 0); 3713 } 3714 3715 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { 3716 InstructionMark im(this); 3717 int encode = prefix_and_encode(dst->encoding()); 3718 emit_byte(0xB8 | encode); 3719 emit_data((int)imm32, rspec, 0); 3720 } 3721 3722 void Assembler::popa() { // 32bit 3723 emit_byte(0x61); 3724 } 3725 3726 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { 3727 InstructionMark im(this); 3728 emit_byte(0x68); 3729 emit_data(imm32, rspec, 0); 3730 } 3731 3732 void Assembler::pusha() { // 32bit 3733 emit_byte(0x60); 3734 } 3735 3736 void Assembler::set_byte_if_not_zero(Register dst) { 3737 emit_byte(0x0F); 3738 emit_byte(0x95); 3739 emit_byte(0xE0 | dst->encoding()); 3740 } 3741 3742 void Assembler::shldl(Register dst, Register src) { 3743 emit_byte(0x0F); 3744 emit_byte(0xA5); 3745 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3746 } 3747 3748 void Assembler::shrdl(Register dst, Register src) { 3749 emit_byte(0x0F); 3750 emit_byte(0xAD); 3751 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3752 } 3753 3754 #else // LP64 3755 3756 void Assembler::set_byte_if_not_zero(Register dst) { 3757 int enc = prefix_and_encode(dst->encoding(), true); 3758 emit_byte(0x0F); 3759 emit_byte(0x95); 3760 emit_byte(0xE0 | enc); 3761 } 3762 3763 // 64bit only pieces of the assembler 3764 // This should only be used by 64bit instructions that can use rip-relative 3765 // it cannot be used by instructions that want an immediate value. 3766 3767 bool Assembler::reachable(AddressLiteral adr) { 3768 int64_t disp; 3769 // None will force a 64bit literal to the code stream. Likely a placeholder 3770 // for something that will be patched later and we need to certain it will 3771 // always be reachable. 3772 if (adr.reloc() == relocInfo::none) { 3773 return false; 3774 } 3775 if (adr.reloc() == relocInfo::internal_word_type) { 3776 // This should be rip relative and easily reachable. 3777 return true; 3778 } 3779 if (adr.reloc() == relocInfo::virtual_call_type || 3780 adr.reloc() == relocInfo::opt_virtual_call_type || 3781 adr.reloc() == relocInfo::static_call_type || 3782 adr.reloc() == relocInfo::static_stub_type ) { 3783 // This should be rip relative within the code cache and easily 3784 // reachable until we get huge code caches. (At which point 3785 // ic code is going to have issues). 3786 return true; 3787 } 3788 if (adr.reloc() != relocInfo::external_word_type && 3789 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special 3790 adr.reloc() != relocInfo::poll_type && // relocs to identify them 3791 adr.reloc() != relocInfo::runtime_call_type ) { 3792 return false; 3793 } 3794 3795 // Stress the correction code 3796 if (ForceUnreachable) { 3797 // Must be runtimecall reloc, see if it is in the codecache 3798 // Flipping stuff in the codecache to be unreachable causes issues 3799 // with things like inline caches where the additional instructions 3800 // are not handled. 3801 if (CodeCache::find_blob(adr._target) == NULL) { 3802 return false; 3803 } 3804 } 3805 // For external_word_type/runtime_call_type if it is reachable from where we 3806 // are now (possibly a temp buffer) and where we might end up 3807 // anywhere in the codeCache then we are always reachable. 3808 // This would have to change if we ever save/restore shared code 3809 // to be more pessimistic. 3810 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int)); 3811 if (!is_simm32(disp)) return false; 3812 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int)); 3813 if (!is_simm32(disp)) return false; 3814 3815 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int)); 3816 3817 // Because rip relative is a disp + address_of_next_instruction and we 3818 // don't know the value of address_of_next_instruction we apply a fudge factor 3819 // to make sure we will be ok no matter the size of the instruction we get placed into. 3820 // We don't have to fudge the checks above here because they are already worst case. 3821 3822 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal 3823 // + 4 because better safe than sorry. 3824 const int fudge = 12 + 4; 3825 if (disp < 0) { 3826 disp -= fudge; 3827 } else { 3828 disp += fudge; 3829 } 3830 return is_simm32(disp); 3831 } 3832 3833 // Check if the polling page is not reachable from the code cache using rip-relative 3834 // addressing. 3835 bool Assembler::is_polling_page_far() { 3836 intptr_t addr = (intptr_t)os::get_polling_page(); 3837 return ForceUnreachable || 3838 !is_simm32(addr - (intptr_t)CodeCache::low_bound()) || 3839 !is_simm32(addr - (intptr_t)CodeCache::high_bound()); 3840 } 3841 3842 void Assembler::emit_data64(jlong data, 3843 relocInfo::relocType rtype, 3844 int format) { 3845 if (rtype == relocInfo::none) { 3846 emit_long64(data); 3847 } else { 3848 emit_data64(data, Relocation::spec_simple(rtype), format); 3849 } 3850 } 3851 3852 void Assembler::emit_data64(jlong data, 3853 RelocationHolder const& rspec, 3854 int format) { 3855 assert(imm_operand == 0, "default format must be immediate in this file"); 3856 assert(imm_operand == format, "must be immediate"); 3857 assert(inst_mark() != NULL, "must be inside InstructionMark"); 3858 // Do not use AbstractAssembler::relocate, which is not intended for 3859 // embedded words. Instead, relocate to the enclosing instruction. 3860 code_section()->relocate(inst_mark(), rspec, format); 3861 #ifdef ASSERT 3862 check_relocation(rspec, format); 3863 #endif 3864 emit_long64(data); 3865 } 3866 3867 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { 3868 if (reg_enc >= 8) { 3869 prefix(REX_B); 3870 reg_enc -= 8; 3871 } else if (byteinst && reg_enc >= 4) { 3872 prefix(REX); 3873 } 3874 return reg_enc; 3875 } 3876 3877 int Assembler::prefixq_and_encode(int reg_enc) { 3878 if (reg_enc < 8) { 3879 prefix(REX_W); 3880 } else { 3881 prefix(REX_WB); 3882 reg_enc -= 8; 3883 } 3884 return reg_enc; 3885 } 3886 3887 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { 3888 if (dst_enc < 8) { 3889 if (src_enc >= 8) { 3890 prefix(REX_B); 3891 src_enc -= 8; 3892 } else if (byteinst && src_enc >= 4) { 3893 prefix(REX); 3894 } 3895 } else { 3896 if (src_enc < 8) { 3897 prefix(REX_R); 3898 } else { 3899 prefix(REX_RB); 3900 src_enc -= 8; 3901 } 3902 dst_enc -= 8; 3903 } 3904 return dst_enc << 3 | src_enc; 3905 } 3906 3907 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { 3908 if (dst_enc < 8) { 3909 if (src_enc < 8) { 3910 prefix(REX_W); 3911 } else { 3912 prefix(REX_WB); 3913 src_enc -= 8; 3914 } 3915 } else { 3916 if (src_enc < 8) { 3917 prefix(REX_WR); 3918 } else { 3919 prefix(REX_WRB); 3920 src_enc -= 8; 3921 } 3922 dst_enc -= 8; 3923 } 3924 return dst_enc << 3 | src_enc; 3925 } 3926 3927 void Assembler::prefix(Register reg) { 3928 if (reg->encoding() >= 8) { 3929 prefix(REX_B); 3930 } 3931 } 3932 3933 void Assembler::prefix(Address adr) { 3934 if (adr.base_needs_rex()) { 3935 if (adr.index_needs_rex()) { 3936 prefix(REX_XB); 3937 } else { 3938 prefix(REX_B); 3939 } 3940 } else { 3941 if (adr.index_needs_rex()) { 3942 prefix(REX_X); 3943 } 3944 } 3945 } 3946 3947 void Assembler::prefixq(Address adr) { 3948 if (adr.base_needs_rex()) { 3949 if (adr.index_needs_rex()) { 3950 prefix(REX_WXB); 3951 } else { 3952 prefix(REX_WB); 3953 } 3954 } else { 3955 if (adr.index_needs_rex()) { 3956 prefix(REX_WX); 3957 } else { 3958 prefix(REX_W); 3959 } 3960 } 3961 } 3962 3963 3964 void Assembler::prefix(Address adr, Register reg, bool byteinst) { 3965 if (reg->encoding() < 8) { 3966 if (adr.base_needs_rex()) { 3967 if (adr.index_needs_rex()) { 3968 prefix(REX_XB); 3969 } else { 3970 prefix(REX_B); 3971 } 3972 } else { 3973 if (adr.index_needs_rex()) { 3974 prefix(REX_X); 3975 } else if (byteinst && reg->encoding() >= 4 ) { 3976 prefix(REX); 3977 } 3978 } 3979 } else { 3980 if (adr.base_needs_rex()) { 3981 if (adr.index_needs_rex()) { 3982 prefix(REX_RXB); 3983 } else { 3984 prefix(REX_RB); 3985 } 3986 } else { 3987 if (adr.index_needs_rex()) { 3988 prefix(REX_RX); 3989 } else { 3990 prefix(REX_R); 3991 } 3992 } 3993 } 3994 } 3995 3996 void Assembler::prefixq(Address adr, Register src) { 3997 if (src->encoding() < 8) { 3998 if (adr.base_needs_rex()) { 3999 if (adr.index_needs_rex()) { 4000 prefix(REX_WXB); 4001 } else { 4002 prefix(REX_WB); 4003 } 4004 } else { 4005 if (adr.index_needs_rex()) { 4006 prefix(REX_WX); 4007 } else { 4008 prefix(REX_W); 4009 } 4010 } 4011 } else { 4012 if (adr.base_needs_rex()) { 4013 if (adr.index_needs_rex()) { 4014 prefix(REX_WRXB); 4015 } else { 4016 prefix(REX_WRB); 4017 } 4018 } else { 4019 if (adr.index_needs_rex()) { 4020 prefix(REX_WRX); 4021 } else { 4022 prefix(REX_WR); 4023 } 4024 } 4025 } 4026 } 4027 4028 void Assembler::prefix(Address adr, XMMRegister reg) { 4029 if (reg->encoding() < 8) { 4030 if (adr.base_needs_rex()) { 4031 if (adr.index_needs_rex()) { 4032 prefix(REX_XB); 4033 } else { 4034 prefix(REX_B); 4035 } 4036 } else { 4037 if (adr.index_needs_rex()) { 4038 prefix(REX_X); 4039 } 4040 } 4041 } else { 4042 if (adr.base_needs_rex()) { 4043 if (adr.index_needs_rex()) { 4044 prefix(REX_RXB); 4045 } else { 4046 prefix(REX_RB); 4047 } 4048 } else { 4049 if (adr.index_needs_rex()) { 4050 prefix(REX_RX); 4051 } else { 4052 prefix(REX_R); 4053 } 4054 } 4055 } 4056 } 4057 4058 void Assembler::prefixq(Address adr, XMMRegister src) { 4059 if (src->encoding() < 8) { 4060 if (adr.base_needs_rex()) { 4061 if (adr.index_needs_rex()) { 4062 prefix(REX_WXB); 4063 } else { 4064 prefix(REX_WB); 4065 } 4066 } else { 4067 if (adr.index_needs_rex()) { 4068 prefix(REX_WX); 4069 } else { 4070 prefix(REX_W); 4071 } 4072 } 4073 } else { 4074 if (adr.base_needs_rex()) { 4075 if (adr.index_needs_rex()) { 4076 prefix(REX_WRXB); 4077 } else { 4078 prefix(REX_WRB); 4079 } 4080 } else { 4081 if (adr.index_needs_rex()) { 4082 prefix(REX_WRX); 4083 } else { 4084 prefix(REX_WR); 4085 } 4086 } 4087 } 4088 } 4089 4090 void Assembler::adcq(Register dst, int32_t imm32) { 4091 (void) prefixq_and_encode(dst->encoding()); 4092 emit_arith(0x81, 0xD0, dst, imm32); 4093 } 4094 4095 void Assembler::adcq(Register dst, Address src) { 4096 InstructionMark im(this); 4097 prefixq(src, dst); 4098 emit_byte(0x13); 4099 emit_operand(dst, src); 4100 } 4101 4102 void Assembler::adcq(Register dst, Register src) { 4103 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 4104 emit_arith(0x13, 0xC0, dst, src); 4105 } 4106 4107 void Assembler::addq(Address dst, int32_t imm32) { 4108 InstructionMark im(this); 4109 prefixq(dst); 4110 emit_arith_operand(0x81, rax, dst,imm32); 4111 } 4112 4113 void Assembler::addq(Address dst, Register src) { 4114 InstructionMark im(this); 4115 prefixq(dst, src); 4116 emit_byte(0x01); 4117 emit_operand(src, dst); 4118 } 4119 4120 void Assembler::addq(Register dst, int32_t imm32) { 4121 (void) prefixq_and_encode(dst->encoding()); 4122 emit_arith(0x81, 0xC0, dst, imm32); 4123 } 4124 4125 void Assembler::addq(Register dst, Address src) { 4126 InstructionMark im(this); 4127 prefixq(src, dst); 4128 emit_byte(0x03); 4129 emit_operand(dst, src); 4130 } 4131 4132 void Assembler::addq(Register dst, Register src) { 4133 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4134 emit_arith(0x03, 0xC0, dst, src); 4135 } 4136 4137 void Assembler::andq(Address dst, int32_t imm32) { 4138 InstructionMark im(this); 4139 prefixq(dst); 4140 emit_byte(0x81); 4141 emit_operand(rsp, dst, 4); 4142 emit_long(imm32); 4143 } 4144 4145 void Assembler::andq(Register dst, int32_t imm32) { 4146 (void) prefixq_and_encode(dst->encoding()); 4147 emit_arith(0x81, 0xE0, dst, imm32); 4148 } 4149 4150 void Assembler::andq(Register dst, Address src) { 4151 InstructionMark im(this); 4152 prefixq(src, dst); 4153 emit_byte(0x23); 4154 emit_operand(dst, src); 4155 } 4156 4157 void Assembler::andq(Register dst, Register src) { 4158 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 4159 emit_arith(0x23, 0xC0, dst, src); 4160 } 4161 4162 void Assembler::bsfq(Register dst, Register src) { 4163 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4164 emit_byte(0x0F); 4165 emit_byte(0xBC); 4166 emit_byte(0xC0 | encode); 4167 } 4168 4169 void Assembler::bsrq(Register dst, Register src) { 4170 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 4171 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4172 emit_byte(0x0F); 4173 emit_byte(0xBD); 4174 emit_byte(0xC0 | encode); 4175 } 4176 4177 void Assembler::bswapq(Register reg) { 4178 int encode = prefixq_and_encode(reg->encoding()); 4179 emit_byte(0x0F); 4180 emit_byte(0xC8 | encode); 4181 } 4182 4183 void Assembler::cdqq() { 4184 prefix(REX_W); 4185 emit_byte(0x99); 4186 } 4187 4188 void Assembler::clflush(Address adr) { 4189 prefix(adr); 4190 emit_byte(0x0F); 4191 emit_byte(0xAE); 4192 emit_operand(rdi, adr); 4193 } 4194 4195 void Assembler::cmovq(Condition cc, Register dst, Register src) { 4196 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4197 emit_byte(0x0F); 4198 emit_byte(0x40 | cc); 4199 emit_byte(0xC0 | encode); 4200 } 4201 4202 void Assembler::cmovq(Condition cc, Register dst, Address src) { 4203 InstructionMark im(this); 4204 prefixq(src, dst); 4205 emit_byte(0x0F); 4206 emit_byte(0x40 | cc); 4207 emit_operand(dst, src); 4208 } 4209 4210 void Assembler::cmpq(Address dst, int32_t imm32) { 4211 InstructionMark im(this); 4212 prefixq(dst); 4213 emit_byte(0x81); 4214 emit_operand(rdi, dst, 4); 4215 emit_long(imm32); 4216 } 4217 4218 void Assembler::cmpq(Register dst, int32_t imm32) { 4219 (void) prefixq_and_encode(dst->encoding()); 4220 emit_arith(0x81, 0xF8, dst, imm32); 4221 } 4222 4223 void Assembler::cmpq(Address dst, Register src) { 4224 InstructionMark im(this); 4225 prefixq(dst, src); 4226 emit_byte(0x3B); 4227 emit_operand(src, dst); 4228 } 4229 4230 void Assembler::cmpq(Register dst, Register src) { 4231 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4232 emit_arith(0x3B, 0xC0, dst, src); 4233 } 4234 4235 void Assembler::cmpq(Register dst, Address src) { 4236 InstructionMark im(this); 4237 prefixq(src, dst); 4238 emit_byte(0x3B); 4239 emit_operand(dst, src); 4240 } 4241 4242 void Assembler::cmpxchgq(Register reg, Address adr) { 4243 InstructionMark im(this); 4244 prefixq(adr, reg); 4245 emit_byte(0x0F); 4246 emit_byte(0xB1); 4247 emit_operand(reg, adr); 4248 } 4249 4250 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { 4251 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4252 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2); 4253 emit_byte(0x2A); 4254 emit_byte(0xC0 | encode); 4255 } 4256 4257 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { 4258 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4259 InstructionMark im(this); 4260 simd_prefix_q(dst, dst, src, VEX_SIMD_F2); 4261 emit_byte(0x2A); 4262 emit_operand(dst, src); 4263 } 4264 4265 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { 4266 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4267 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3); 4268 emit_byte(0x2A); 4269 emit_byte(0xC0 | encode); 4270 } 4271 4272 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { 4273 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4274 InstructionMark im(this); 4275 simd_prefix_q(dst, dst, src, VEX_SIMD_F3); 4276 emit_byte(0x2A); 4277 emit_operand(dst, src); 4278 } 4279 4280 void Assembler::cvttsd2siq(Register dst, XMMRegister src) { 4281 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4282 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2); 4283 emit_byte(0x2C); 4284 emit_byte(0xC0 | encode); 4285 } 4286 4287 void Assembler::cvttss2siq(Register dst, XMMRegister src) { 4288 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4289 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3); 4290 emit_byte(0x2C); 4291 emit_byte(0xC0 | encode); 4292 } 4293 4294 void Assembler::decl(Register dst) { 4295 // Don't use it directly. Use MacroAssembler::decrementl() instead. 4296 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 4297 int encode = prefix_and_encode(dst->encoding()); 4298 emit_byte(0xFF); 4299 emit_byte(0xC8 | encode); 4300 } 4301 4302 void Assembler::decq(Register dst) { 4303 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4304 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4305 int encode = prefixq_and_encode(dst->encoding()); 4306 emit_byte(0xFF); 4307 emit_byte(0xC8 | encode); 4308 } 4309 4310 void Assembler::decq(Address dst) { 4311 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4312 InstructionMark im(this); 4313 prefixq(dst); 4314 emit_byte(0xFF); 4315 emit_operand(rcx, dst); 4316 } 4317 4318 void Assembler::fxrstor(Address src) { 4319 prefixq(src); 4320 emit_byte(0x0F); 4321 emit_byte(0xAE); 4322 emit_operand(as_Register(1), src); 4323 } 4324 4325 void Assembler::fxsave(Address dst) { 4326 prefixq(dst); 4327 emit_byte(0x0F); 4328 emit_byte(0xAE); 4329 emit_operand(as_Register(0), dst); 4330 } 4331 4332 void Assembler::idivq(Register src) { 4333 int encode = prefixq_and_encode(src->encoding()); 4334 emit_byte(0xF7); 4335 emit_byte(0xF8 | encode); 4336 } 4337 4338 void Assembler::imulq(Register dst, Register src) { 4339 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4340 emit_byte(0x0F); 4341 emit_byte(0xAF); 4342 emit_byte(0xC0 | encode); 4343 } 4344 4345 void Assembler::imulq(Register dst, Register src, int value) { 4346 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4347 if (is8bit(value)) { 4348 emit_byte(0x6B); 4349 emit_byte(0xC0 | encode); 4350 emit_byte(value & 0xFF); 4351 } else { 4352 emit_byte(0x69); 4353 emit_byte(0xC0 | encode); 4354 emit_long(value); 4355 } 4356 } 4357 4358 void Assembler::incl(Register dst) { 4359 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4360 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4361 int encode = prefix_and_encode(dst->encoding()); 4362 emit_byte(0xFF); 4363 emit_byte(0xC0 | encode); 4364 } 4365 4366 void Assembler::incq(Register dst) { 4367 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4368 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4369 int encode = prefixq_and_encode(dst->encoding()); 4370 emit_byte(0xFF); 4371 emit_byte(0xC0 | encode); 4372 } 4373 4374 void Assembler::incq(Address dst) { 4375 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4376 InstructionMark im(this); 4377 prefixq(dst); 4378 emit_byte(0xFF); 4379 emit_operand(rax, dst); 4380 } 4381 4382 void Assembler::lea(Register dst, Address src) { 4383 leaq(dst, src); 4384 } 4385 4386 void Assembler::leaq(Register dst, Address src) { 4387 InstructionMark im(this); 4388 prefixq(src, dst); 4389 emit_byte(0x8D); 4390 emit_operand(dst, src); 4391 } 4392 4393 void Assembler::mov64(Register dst, int64_t imm64) { 4394 InstructionMark im(this); 4395 int encode = prefixq_and_encode(dst->encoding()); 4396 emit_byte(0xB8 | encode); 4397 emit_long64(imm64); 4398 } 4399 4400 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { 4401 InstructionMark im(this); 4402 int encode = prefixq_and_encode(dst->encoding()); 4403 emit_byte(0xB8 | encode); 4404 emit_data64(imm64, rspec); 4405 } 4406 4407 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4408 InstructionMark im(this); 4409 int encode = prefix_and_encode(dst->encoding()); 4410 emit_byte(0xB8 | encode); 4411 emit_data((int)imm32, rspec, narrow_oop_operand); 4412 } 4413 4414 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4415 InstructionMark im(this); 4416 prefix(dst); 4417 emit_byte(0xC7); 4418 emit_operand(rax, dst, 4); 4419 emit_data((int)imm32, rspec, narrow_oop_operand); 4420 } 4421 4422 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) { 4423 InstructionMark im(this); 4424 int encode = prefix_and_encode(src1->encoding()); 4425 emit_byte(0x81); 4426 emit_byte(0xF8 | encode); 4427 emit_data((int)imm32, rspec, narrow_oop_operand); 4428 } 4429 4430 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) { 4431 InstructionMark im(this); 4432 prefix(src1); 4433 emit_byte(0x81); 4434 emit_operand(rax, src1, 4); 4435 emit_data((int)imm32, rspec, narrow_oop_operand); 4436 } 4437 4438 void Assembler::lzcntq(Register dst, Register src) { 4439 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 4440 emit_byte(0xF3); 4441 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4442 emit_byte(0x0F); 4443 emit_byte(0xBD); 4444 emit_byte(0xC0 | encode); 4445 } 4446 4447 void Assembler::movdq(XMMRegister dst, Register src) { 4448 // table D-1 says MMX/SSE2 4449 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4450 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66); 4451 emit_byte(0x6E); 4452 emit_byte(0xC0 | encode); 4453 } 4454 4455 void Assembler::movdq(Register dst, XMMRegister src) { 4456 // table D-1 says MMX/SSE2 4457 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4458 // swap src/dst to get correct prefix 4459 int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66); 4460 emit_byte(0x7E); 4461 emit_byte(0xC0 | encode); 4462 } 4463 4464 void Assembler::movq(Register dst, Register src) { 4465 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4466 emit_byte(0x8B); 4467 emit_byte(0xC0 | encode); 4468 } 4469 4470 void Assembler::movq(Register dst, Address src) { 4471 InstructionMark im(this); 4472 prefixq(src, dst); 4473 emit_byte(0x8B); 4474 emit_operand(dst, src); 4475 } 4476 4477 void Assembler::movq(Address dst, Register src) { 4478 InstructionMark im(this); 4479 prefixq(dst, src); 4480 emit_byte(0x89); 4481 emit_operand(src, dst); 4482 } 4483 4484 void Assembler::movsbq(Register dst, Address src) { 4485 InstructionMark im(this); 4486 prefixq(src, dst); 4487 emit_byte(0x0F); 4488 emit_byte(0xBE); 4489 emit_operand(dst, src); 4490 } 4491 4492 void Assembler::movsbq(Register dst, Register src) { 4493 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4494 emit_byte(0x0F); 4495 emit_byte(0xBE); 4496 emit_byte(0xC0 | encode); 4497 } 4498 4499 void Assembler::movslq(Register dst, int32_t imm32) { 4500 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx) 4501 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx) 4502 // as a result we shouldn't use until tested at runtime... 4503 ShouldNotReachHere(); 4504 InstructionMark im(this); 4505 int encode = prefixq_and_encode(dst->encoding()); 4506 emit_byte(0xC7 | encode); 4507 emit_long(imm32); 4508 } 4509 4510 void Assembler::movslq(Address dst, int32_t imm32) { 4511 assert(is_simm32(imm32), "lost bits"); 4512 InstructionMark im(this); 4513 prefixq(dst); 4514 emit_byte(0xC7); 4515 emit_operand(rax, dst, 4); 4516 emit_long(imm32); 4517 } 4518 4519 void Assembler::movslq(Register dst, Address src) { 4520 InstructionMark im(this); 4521 prefixq(src, dst); 4522 emit_byte(0x63); 4523 emit_operand(dst, src); 4524 } 4525 4526 void Assembler::movslq(Register dst, Register src) { 4527 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4528 emit_byte(0x63); 4529 emit_byte(0xC0 | encode); 4530 } 4531 4532 void Assembler::movswq(Register dst, Address src) { 4533 InstructionMark im(this); 4534 prefixq(src, dst); 4535 emit_byte(0x0F); 4536 emit_byte(0xBF); 4537 emit_operand(dst, src); 4538 } 4539 4540 void Assembler::movswq(Register dst, Register src) { 4541 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4542 emit_byte(0x0F); 4543 emit_byte(0xBF); 4544 emit_byte(0xC0 | encode); 4545 } 4546 4547 void Assembler::movzbq(Register dst, Address src) { 4548 InstructionMark im(this); 4549 prefixq(src, dst); 4550 emit_byte(0x0F); 4551 emit_byte(0xB6); 4552 emit_operand(dst, src); 4553 } 4554 4555 void Assembler::movzbq(Register dst, Register src) { 4556 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4557 emit_byte(0x0F); 4558 emit_byte(0xB6); 4559 emit_byte(0xC0 | encode); 4560 } 4561 4562 void Assembler::movzwq(Register dst, Address src) { 4563 InstructionMark im(this); 4564 prefixq(src, dst); 4565 emit_byte(0x0F); 4566 emit_byte(0xB7); 4567 emit_operand(dst, src); 4568 } 4569 4570 void Assembler::movzwq(Register dst, Register src) { 4571 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4572 emit_byte(0x0F); 4573 emit_byte(0xB7); 4574 emit_byte(0xC0 | encode); 4575 } 4576 4577 void Assembler::negq(Register dst) { 4578 int encode = prefixq_and_encode(dst->encoding()); 4579 emit_byte(0xF7); 4580 emit_byte(0xD8 | encode); 4581 } 4582 4583 void Assembler::notq(Register dst) { 4584 int encode = prefixq_and_encode(dst->encoding()); 4585 emit_byte(0xF7); 4586 emit_byte(0xD0 | encode); 4587 } 4588 4589 void Assembler::orq(Address dst, int32_t imm32) { 4590 InstructionMark im(this); 4591 prefixq(dst); 4592 emit_byte(0x81); 4593 emit_operand(rcx, dst, 4); 4594 emit_long(imm32); 4595 } 4596 4597 void Assembler::orq(Register dst, int32_t imm32) { 4598 (void) prefixq_and_encode(dst->encoding()); 4599 emit_arith(0x81, 0xC8, dst, imm32); 4600 } 4601 4602 void Assembler::orq(Register dst, Address src) { 4603 InstructionMark im(this); 4604 prefixq(src, dst); 4605 emit_byte(0x0B); 4606 emit_operand(dst, src); 4607 } 4608 4609 void Assembler::orq(Register dst, Register src) { 4610 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4611 emit_arith(0x0B, 0xC0, dst, src); 4612 } 4613 4614 void Assembler::popa() { // 64bit 4615 movq(r15, Address(rsp, 0)); 4616 movq(r14, Address(rsp, wordSize)); 4617 movq(r13, Address(rsp, 2 * wordSize)); 4618 movq(r12, Address(rsp, 3 * wordSize)); 4619 movq(r11, Address(rsp, 4 * wordSize)); 4620 movq(r10, Address(rsp, 5 * wordSize)); 4621 movq(r9, Address(rsp, 6 * wordSize)); 4622 movq(r8, Address(rsp, 7 * wordSize)); 4623 movq(rdi, Address(rsp, 8 * wordSize)); 4624 movq(rsi, Address(rsp, 9 * wordSize)); 4625 movq(rbp, Address(rsp, 10 * wordSize)); 4626 // skip rsp 4627 movq(rbx, Address(rsp, 12 * wordSize)); 4628 movq(rdx, Address(rsp, 13 * wordSize)); 4629 movq(rcx, Address(rsp, 14 * wordSize)); 4630 movq(rax, Address(rsp, 15 * wordSize)); 4631 4632 addq(rsp, 16 * wordSize); 4633 } 4634 4635 void Assembler::popcntq(Register dst, Address src) { 4636 assert(VM_Version::supports_popcnt(), "must support"); 4637 InstructionMark im(this); 4638 emit_byte(0xF3); 4639 prefixq(src, dst); 4640 emit_byte(0x0F); 4641 emit_byte(0xB8); 4642 emit_operand(dst, src); 4643 } 4644 4645 void Assembler::popcntq(Register dst, Register src) { 4646 assert(VM_Version::supports_popcnt(), "must support"); 4647 emit_byte(0xF3); 4648 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4649 emit_byte(0x0F); 4650 emit_byte(0xB8); 4651 emit_byte(0xC0 | encode); 4652 } 4653 4654 void Assembler::popq(Address dst) { 4655 InstructionMark im(this); 4656 prefixq(dst); 4657 emit_byte(0x8F); 4658 emit_operand(rax, dst); 4659 } 4660 4661 void Assembler::pusha() { // 64bit 4662 // we have to store original rsp. ABI says that 128 bytes 4663 // below rsp are local scratch. 4664 movq(Address(rsp, -5 * wordSize), rsp); 4665 4666 subq(rsp, 16 * wordSize); 4667 4668 movq(Address(rsp, 15 * wordSize), rax); 4669 movq(Address(rsp, 14 * wordSize), rcx); 4670 movq(Address(rsp, 13 * wordSize), rdx); 4671 movq(Address(rsp, 12 * wordSize), rbx); 4672 // skip rsp 4673 movq(Address(rsp, 10 * wordSize), rbp); 4674 movq(Address(rsp, 9 * wordSize), rsi); 4675 movq(Address(rsp, 8 * wordSize), rdi); 4676 movq(Address(rsp, 7 * wordSize), r8); 4677 movq(Address(rsp, 6 * wordSize), r9); 4678 movq(Address(rsp, 5 * wordSize), r10); 4679 movq(Address(rsp, 4 * wordSize), r11); 4680 movq(Address(rsp, 3 * wordSize), r12); 4681 movq(Address(rsp, 2 * wordSize), r13); 4682 movq(Address(rsp, wordSize), r14); 4683 movq(Address(rsp, 0), r15); 4684 } 4685 4686 void Assembler::pushq(Address src) { 4687 InstructionMark im(this); 4688 prefixq(src); 4689 emit_byte(0xFF); 4690 emit_operand(rsi, src); 4691 } 4692 4693 void Assembler::rclq(Register dst, int imm8) { 4694 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4695 int encode = prefixq_and_encode(dst->encoding()); 4696 if (imm8 == 1) { 4697 emit_byte(0xD1); 4698 emit_byte(0xD0 | encode); 4699 } else { 4700 emit_byte(0xC1); 4701 emit_byte(0xD0 | encode); 4702 emit_byte(imm8); 4703 } 4704 } 4705 void Assembler::sarq(Register dst, int imm8) { 4706 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4707 int encode = prefixq_and_encode(dst->encoding()); 4708 if (imm8 == 1) { 4709 emit_byte(0xD1); 4710 emit_byte(0xF8 | encode); 4711 } else { 4712 emit_byte(0xC1); 4713 emit_byte(0xF8 | encode); 4714 emit_byte(imm8); 4715 } 4716 } 4717 4718 void Assembler::sarq(Register dst) { 4719 int encode = prefixq_and_encode(dst->encoding()); 4720 emit_byte(0xD3); 4721 emit_byte(0xF8 | encode); 4722 } 4723 4724 void Assembler::sbbq(Address dst, int32_t imm32) { 4725 InstructionMark im(this); 4726 prefixq(dst); 4727 emit_arith_operand(0x81, rbx, dst, imm32); 4728 } 4729 4730 void Assembler::sbbq(Register dst, int32_t imm32) { 4731 (void) prefixq_and_encode(dst->encoding()); 4732 emit_arith(0x81, 0xD8, dst, imm32); 4733 } 4734 4735 void Assembler::sbbq(Register dst, Address src) { 4736 InstructionMark im(this); 4737 prefixq(src, dst); 4738 emit_byte(0x1B); 4739 emit_operand(dst, src); 4740 } 4741 4742 void Assembler::sbbq(Register dst, Register src) { 4743 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4744 emit_arith(0x1B, 0xC0, dst, src); 4745 } 4746 4747 void Assembler::shlq(Register dst, int imm8) { 4748 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4749 int encode = prefixq_and_encode(dst->encoding()); 4750 if (imm8 == 1) { 4751 emit_byte(0xD1); 4752 emit_byte(0xE0 | encode); 4753 } else { 4754 emit_byte(0xC1); 4755 emit_byte(0xE0 | encode); 4756 emit_byte(imm8); 4757 } 4758 } 4759 4760 void Assembler::shlq(Register dst) { 4761 int encode = prefixq_and_encode(dst->encoding()); 4762 emit_byte(0xD3); 4763 emit_byte(0xE0 | encode); 4764 } 4765 4766 void Assembler::shrq(Register dst, int imm8) { 4767 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4768 int encode = prefixq_and_encode(dst->encoding()); 4769 emit_byte(0xC1); 4770 emit_byte(0xE8 | encode); 4771 emit_byte(imm8); 4772 } 4773 4774 void Assembler::shrq(Register dst) { 4775 int encode = prefixq_and_encode(dst->encoding()); 4776 emit_byte(0xD3); 4777 emit_byte(0xE8 | encode); 4778 } 4779 4780 void Assembler::subq(Address dst, int32_t imm32) { 4781 InstructionMark im(this); 4782 prefixq(dst); 4783 emit_arith_operand(0x81, rbp, dst, imm32); 4784 } 4785 4786 void Assembler::subq(Address dst, Register src) { 4787 InstructionMark im(this); 4788 prefixq(dst, src); 4789 emit_byte(0x29); 4790 emit_operand(src, dst); 4791 } 4792 4793 void Assembler::subq(Register dst, int32_t imm32) { 4794 (void) prefixq_and_encode(dst->encoding()); 4795 emit_arith(0x81, 0xE8, dst, imm32); 4796 } 4797 4798 // Force generation of a 4 byte immediate value even if it fits into 8bit 4799 void Assembler::subq_imm32(Register dst, int32_t imm32) { 4800 (void) prefixq_and_encode(dst->encoding()); 4801 emit_arith_imm32(0x81, 0xE8, dst, imm32); 4802 } 4803 4804 void Assembler::subq(Register dst, Address src) { 4805 InstructionMark im(this); 4806 prefixq(src, dst); 4807 emit_byte(0x2B); 4808 emit_operand(dst, src); 4809 } 4810 4811 void Assembler::subq(Register dst, Register src) { 4812 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4813 emit_arith(0x2B, 0xC0, dst, src); 4814 } 4815 4816 void Assembler::testq(Register dst, int32_t imm32) { 4817 // not using emit_arith because test 4818 // doesn't support sign-extension of 4819 // 8bit operands 4820 int encode = dst->encoding(); 4821 if (encode == 0) { 4822 prefix(REX_W); 4823 emit_byte(0xA9); 4824 } else { 4825 encode = prefixq_and_encode(encode); 4826 emit_byte(0xF7); 4827 emit_byte(0xC0 | encode); 4828 } 4829 emit_long(imm32); 4830 } 4831 4832 void Assembler::testq(Register dst, Register src) { 4833 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4834 emit_arith(0x85, 0xC0, dst, src); 4835 } 4836 4837 void Assembler::xaddq(Address dst, Register src) { 4838 InstructionMark im(this); 4839 prefixq(dst, src); 4840 emit_byte(0x0F); 4841 emit_byte(0xC1); 4842 emit_operand(src, dst); 4843 } 4844 4845 void Assembler::xchgq(Register dst, Address src) { 4846 InstructionMark im(this); 4847 prefixq(src, dst); 4848 emit_byte(0x87); 4849 emit_operand(dst, src); 4850 } 4851 4852 void Assembler::xchgq(Register dst, Register src) { 4853 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4854 emit_byte(0x87); 4855 emit_byte(0xc0 | encode); 4856 } 4857 4858 void Assembler::xorq(Register dst, Register src) { 4859 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4860 emit_arith(0x33, 0xC0, dst, src); 4861 } 4862 4863 void Assembler::xorq(Register dst, Address src) { 4864 InstructionMark im(this); 4865 prefixq(src, dst); 4866 emit_byte(0x33); 4867 emit_operand(dst, src); 4868 } 4869 4870 #endif // !LP64 4871 4872 static Assembler::Condition reverse[] = { 4873 Assembler::noOverflow /* overflow = 0x0 */ , 4874 Assembler::overflow /* noOverflow = 0x1 */ , 4875 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 4876 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 4877 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 4878 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 4879 Assembler::above /* belowEqual = 0x6 */ , 4880 Assembler::belowEqual /* above = 0x7 */ , 4881 Assembler::positive /* negative = 0x8 */ , 4882 Assembler::negative /* positive = 0x9 */ , 4883 Assembler::noParity /* parity = 0xa */ , 4884 Assembler::parity /* noParity = 0xb */ , 4885 Assembler::greaterEqual /* less = 0xc */ , 4886 Assembler::less /* greaterEqual = 0xd */ , 4887 Assembler::greater /* lessEqual = 0xe */ , 4888 Assembler::lessEqual /* greater = 0xf, */ 4889 4890 }; 4891 4892 4893 // Implementation of MacroAssembler 4894 4895 // First all the versions that have distinct versions depending on 32/64 bit 4896 // Unless the difference is trivial (1 line or so). 4897 4898 #ifndef _LP64 4899 4900 // 32bit versions 4901 4902 Address MacroAssembler::as_Address(AddressLiteral adr) { 4903 return Address(adr.target(), adr.rspec()); 4904 } 4905 4906 Address MacroAssembler::as_Address(ArrayAddress adr) { 4907 return Address::make_array(adr); 4908 } 4909 4910 int MacroAssembler::biased_locking_enter(Register lock_reg, 4911 Register obj_reg, 4912 Register swap_reg, 4913 Register tmp_reg, 4914 bool swap_reg_contains_mark, 4915 Label& done, 4916 Label* slow_case, 4917 BiasedLockingCounters* counters) { 4918 assert(UseBiasedLocking, "why call this otherwise?"); 4919 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 4920 assert_different_registers(lock_reg, obj_reg, swap_reg); 4921 4922 if (PrintBiasedLockingStatistics && counters == NULL) 4923 counters = BiasedLocking::counters(); 4924 4925 bool need_tmp_reg = false; 4926 if (tmp_reg == noreg) { 4927 need_tmp_reg = true; 4928 tmp_reg = lock_reg; 4929 } else { 4930 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 4931 } 4932 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 4933 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 4934 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 4935 Address saved_mark_addr(lock_reg, 0); 4936 4937 // Biased locking 4938 // See whether the lock is currently biased toward our thread and 4939 // whether the epoch is still valid 4940 // Note that the runtime guarantees sufficient alignment of JavaThread 4941 // pointers to allow age to be placed into low bits 4942 // First check to see whether biasing is even enabled for this object 4943 Label cas_label; 4944 int null_check_offset = -1; 4945 if (!swap_reg_contains_mark) { 4946 null_check_offset = offset(); 4947 movl(swap_reg, mark_addr); 4948 } 4949 if (need_tmp_reg) { 4950 push(tmp_reg); 4951 } 4952 movl(tmp_reg, swap_reg); 4953 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 4954 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 4955 if (need_tmp_reg) { 4956 pop(tmp_reg); 4957 } 4958 jcc(Assembler::notEqual, cas_label); 4959 // The bias pattern is present in the object's header. Need to check 4960 // whether the bias owner and the epoch are both still current. 4961 // Note that because there is no current thread register on x86 we 4962 // need to store off the mark word we read out of the object to 4963 // avoid reloading it and needing to recheck invariants below. This 4964 // store is unfortunate but it makes the overall code shorter and 4965 // simpler. 4966 movl(saved_mark_addr, swap_reg); 4967 if (need_tmp_reg) { 4968 push(tmp_reg); 4969 } 4970 get_thread(tmp_reg); 4971 xorl(swap_reg, tmp_reg); 4972 if (swap_reg_contains_mark) { 4973 null_check_offset = offset(); 4974 } 4975 movl(tmp_reg, klass_addr); 4976 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); 4977 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 4978 if (need_tmp_reg) { 4979 pop(tmp_reg); 4980 } 4981 if (counters != NULL) { 4982 cond_inc32(Assembler::zero, 4983 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 4984 } 4985 jcc(Assembler::equal, done); 4986 4987 Label try_revoke_bias; 4988 Label try_rebias; 4989 4990 // At this point we know that the header has the bias pattern and 4991 // that we are not the bias owner in the current epoch. We need to 4992 // figure out more details about the state of the header in order to 4993 // know what operations can be legally performed on the object's 4994 // header. 4995 4996 // If the low three bits in the xor result aren't clear, that means 4997 // the prototype header is no longer biased and we have to revoke 4998 // the bias on this object. 4999 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 5000 jcc(Assembler::notZero, try_revoke_bias); 5001 5002 // Biasing is still enabled for this data type. See whether the 5003 // epoch of the current bias is still valid, meaning that the epoch 5004 // bits of the mark word are equal to the epoch bits of the 5005 // prototype header. (Note that the prototype header's epoch bits 5006 // only change at a safepoint.) If not, attempt to rebias the object 5007 // toward the current thread. Note that we must be absolutely sure 5008 // that the current epoch is invalid in order to do this because 5009 // otherwise the manipulations it performs on the mark word are 5010 // illegal. 5011 testl(swap_reg, markOopDesc::epoch_mask_in_place); 5012 jcc(Assembler::notZero, try_rebias); 5013 5014 // The epoch of the current bias is still valid but we know nothing 5015 // about the owner; it might be set or it might be clear. Try to 5016 // acquire the bias of the object using an atomic operation. If this 5017 // fails we will go in to the runtime to revoke the object's bias. 5018 // Note that we first construct the presumed unbiased header so we 5019 // don't accidentally blow away another thread's valid bias. 5020 movl(swap_reg, saved_mark_addr); 5021 andl(swap_reg, 5022 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 5023 if (need_tmp_reg) { 5024 push(tmp_reg); 5025 } 5026 get_thread(tmp_reg); 5027 orl(tmp_reg, swap_reg); 5028 if (os::is_MP()) { 5029 lock(); 5030 } 5031 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5032 if (need_tmp_reg) { 5033 pop(tmp_reg); 5034 } 5035 // If the biasing toward our thread failed, this means that 5036 // another thread succeeded in biasing it toward itself and we 5037 // need to revoke that bias. The revocation will occur in the 5038 // interpreter runtime in the slow case. 5039 if (counters != NULL) { 5040 cond_inc32(Assembler::zero, 5041 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 5042 } 5043 if (slow_case != NULL) { 5044 jcc(Assembler::notZero, *slow_case); 5045 } 5046 jmp(done); 5047 5048 bind(try_rebias); 5049 // At this point we know the epoch has expired, meaning that the 5050 // current "bias owner", if any, is actually invalid. Under these 5051 // circumstances _only_, we are allowed to use the current header's 5052 // value as the comparison value when doing the cas to acquire the 5053 // bias in the current epoch. In other words, we allow transfer of 5054 // the bias from one thread to another directly in this situation. 5055 // 5056 // FIXME: due to a lack of registers we currently blow away the age 5057 // bits in this situation. Should attempt to preserve them. 5058 if (need_tmp_reg) { 5059 push(tmp_reg); 5060 } 5061 get_thread(tmp_reg); 5062 movl(swap_reg, klass_addr); 5063 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); 5064 movl(swap_reg, saved_mark_addr); 5065 if (os::is_MP()) { 5066 lock(); 5067 } 5068 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5069 if (need_tmp_reg) { 5070 pop(tmp_reg); 5071 } 5072 // If the biasing toward our thread failed, then another thread 5073 // succeeded in biasing it toward itself and we need to revoke that 5074 // bias. The revocation will occur in the runtime in the slow case. 5075 if (counters != NULL) { 5076 cond_inc32(Assembler::zero, 5077 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 5078 } 5079 if (slow_case != NULL) { 5080 jcc(Assembler::notZero, *slow_case); 5081 } 5082 jmp(done); 5083 5084 bind(try_revoke_bias); 5085 // The prototype mark in the klass doesn't have the bias bit set any 5086 // more, indicating that objects of this data type are not supposed 5087 // to be biased any more. We are going to try to reset the mark of 5088 // this object to the prototype value and fall through to the 5089 // CAS-based locking scheme. Note that if our CAS fails, it means 5090 // that another thread raced us for the privilege of revoking the 5091 // bias of this particular object, so it's okay to continue in the 5092 // normal locking code. 5093 // 5094 // FIXME: due to a lack of registers we currently blow away the age 5095 // bits in this situation. Should attempt to preserve them. 5096 movl(swap_reg, saved_mark_addr); 5097 if (need_tmp_reg) { 5098 push(tmp_reg); 5099 } 5100 movl(tmp_reg, klass_addr); 5101 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 5102 if (os::is_MP()) { 5103 lock(); 5104 } 5105 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5106 if (need_tmp_reg) { 5107 pop(tmp_reg); 5108 } 5109 // Fall through to the normal CAS-based lock, because no matter what 5110 // the result of the above CAS, some thread must have succeeded in 5111 // removing the bias bit from the object's header. 5112 if (counters != NULL) { 5113 cond_inc32(Assembler::zero, 5114 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 5115 } 5116 5117 bind(cas_label); 5118 5119 return null_check_offset; 5120 } 5121 void MacroAssembler::call_VM_leaf_base(address entry_point, 5122 int number_of_arguments) { 5123 call(RuntimeAddress(entry_point)); 5124 increment(rsp, number_of_arguments * wordSize); 5125 } 5126 5127 void MacroAssembler::cmpoop(Address src1, jobject obj) { 5128 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5129 } 5130 5131 void MacroAssembler::cmpoop(Register src1, jobject obj) { 5132 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5133 } 5134 5135 void MacroAssembler::extend_sign(Register hi, Register lo) { 5136 // According to Intel Doc. AP-526, "Integer Divide", p.18. 5137 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 5138 cdql(); 5139 } else { 5140 movl(hi, lo); 5141 sarl(hi, 31); 5142 } 5143 } 5144 5145 void MacroAssembler::jC2(Register tmp, Label& L) { 5146 // set parity bit if FPU flag C2 is set (via rax) 5147 save_rax(tmp); 5148 fwait(); fnstsw_ax(); 5149 sahf(); 5150 restore_rax(tmp); 5151 // branch 5152 jcc(Assembler::parity, L); 5153 } 5154 5155 void MacroAssembler::jnC2(Register tmp, Label& L) { 5156 // set parity bit if FPU flag C2 is set (via rax) 5157 save_rax(tmp); 5158 fwait(); fnstsw_ax(); 5159 sahf(); 5160 restore_rax(tmp); 5161 // branch 5162 jcc(Assembler::noParity, L); 5163 } 5164 5165 // 32bit can do a case table jump in one instruction but we no longer allow the base 5166 // to be installed in the Address class 5167 void MacroAssembler::jump(ArrayAddress entry) { 5168 jmp(as_Address(entry)); 5169 } 5170 5171 // Note: y_lo will be destroyed 5172 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5173 // Long compare for Java (semantics as described in JVM spec.) 5174 Label high, low, done; 5175 5176 cmpl(x_hi, y_hi); 5177 jcc(Assembler::less, low); 5178 jcc(Assembler::greater, high); 5179 // x_hi is the return register 5180 xorl(x_hi, x_hi); 5181 cmpl(x_lo, y_lo); 5182 jcc(Assembler::below, low); 5183 jcc(Assembler::equal, done); 5184 5185 bind(high); 5186 xorl(x_hi, x_hi); 5187 increment(x_hi); 5188 jmp(done); 5189 5190 bind(low); 5191 xorl(x_hi, x_hi); 5192 decrementl(x_hi); 5193 5194 bind(done); 5195 } 5196 5197 void MacroAssembler::lea(Register dst, AddressLiteral src) { 5198 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 5199 } 5200 5201 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5202 // leal(dst, as_Address(adr)); 5203 // see note in movl as to why we must use a move 5204 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 5205 } 5206 5207 void MacroAssembler::leave() { 5208 mov(rsp, rbp); 5209 pop(rbp); 5210 } 5211 5212 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 5213 // Multiplication of two Java long values stored on the stack 5214 // as illustrated below. Result is in rdx:rax. 5215 // 5216 // rsp ---> [ ?? ] \ \ 5217 // .... | y_rsp_offset | 5218 // [ y_lo ] / (in bytes) | x_rsp_offset 5219 // [ y_hi ] | (in bytes) 5220 // .... | 5221 // [ x_lo ] / 5222 // [ x_hi ] 5223 // .... 5224 // 5225 // Basic idea: lo(result) = lo(x_lo * y_lo) 5226 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 5227 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 5228 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 5229 Label quick; 5230 // load x_hi, y_hi and check if quick 5231 // multiplication is possible 5232 movl(rbx, x_hi); 5233 movl(rcx, y_hi); 5234 movl(rax, rbx); 5235 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 5236 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 5237 // do full multiplication 5238 // 1st step 5239 mull(y_lo); // x_hi * y_lo 5240 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 5241 // 2nd step 5242 movl(rax, x_lo); 5243 mull(rcx); // x_lo * y_hi 5244 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 5245 // 3rd step 5246 bind(quick); // note: rbx, = 0 if quick multiply! 5247 movl(rax, x_lo); 5248 mull(y_lo); // x_lo * y_lo 5249 addl(rdx, rbx); // correct hi(x_lo * y_lo) 5250 } 5251 5252 void MacroAssembler::lneg(Register hi, Register lo) { 5253 negl(lo); 5254 adcl(hi, 0); 5255 negl(hi); 5256 } 5257 5258 void MacroAssembler::lshl(Register hi, Register lo) { 5259 // Java shift left long support (semantics as described in JVM spec., p.305) 5260 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 5261 // shift value is in rcx ! 5262 assert(hi != rcx, "must not use rcx"); 5263 assert(lo != rcx, "must not use rcx"); 5264 const Register s = rcx; // shift count 5265 const int n = BitsPerWord; 5266 Label L; 5267 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5268 cmpl(s, n); // if (s < n) 5269 jcc(Assembler::less, L); // else (s >= n) 5270 movl(hi, lo); // x := x << n 5271 xorl(lo, lo); 5272 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5273 bind(L); // s (mod n) < n 5274 shldl(hi, lo); // x := x << s 5275 shll(lo); 5276 } 5277 5278 5279 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 5280 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 5281 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 5282 assert(hi != rcx, "must not use rcx"); 5283 assert(lo != rcx, "must not use rcx"); 5284 const Register s = rcx; // shift count 5285 const int n = BitsPerWord; 5286 Label L; 5287 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5288 cmpl(s, n); // if (s < n) 5289 jcc(Assembler::less, L); // else (s >= n) 5290 movl(lo, hi); // x := x >> n 5291 if (sign_extension) sarl(hi, 31); 5292 else xorl(hi, hi); 5293 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5294 bind(L); // s (mod n) < n 5295 shrdl(lo, hi); // x := x >> s 5296 if (sign_extension) sarl(hi); 5297 else shrl(hi); 5298 } 5299 5300 void MacroAssembler::movoop(Register dst, jobject obj) { 5301 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5302 } 5303 5304 void MacroAssembler::movoop(Address dst, jobject obj) { 5305 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5306 } 5307 5308 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5309 if (src.is_lval()) { 5310 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 5311 } else { 5312 movl(dst, as_Address(src)); 5313 } 5314 } 5315 5316 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5317 movl(as_Address(dst), src); 5318 } 5319 5320 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5321 movl(dst, as_Address(src)); 5322 } 5323 5324 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 5325 void MacroAssembler::movptr(Address dst, intptr_t src) { 5326 movl(dst, src); 5327 } 5328 5329 5330 void MacroAssembler::pop_callee_saved_registers() { 5331 pop(rcx); 5332 pop(rdx); 5333 pop(rdi); 5334 pop(rsi); 5335 } 5336 5337 void MacroAssembler::pop_fTOS() { 5338 fld_d(Address(rsp, 0)); 5339 addl(rsp, 2 * wordSize); 5340 } 5341 5342 void MacroAssembler::push_callee_saved_registers() { 5343 push(rsi); 5344 push(rdi); 5345 push(rdx); 5346 push(rcx); 5347 } 5348 5349 void MacroAssembler::push_fTOS() { 5350 subl(rsp, 2 * wordSize); 5351 fstp_d(Address(rsp, 0)); 5352 } 5353 5354 5355 void MacroAssembler::pushoop(jobject obj) { 5356 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 5357 } 5358 5359 5360 void MacroAssembler::pushptr(AddressLiteral src) { 5361 if (src.is_lval()) { 5362 push_literal32((int32_t)src.target(), src.rspec()); 5363 } else { 5364 pushl(as_Address(src)); 5365 } 5366 } 5367 5368 void MacroAssembler::set_word_if_not_zero(Register dst) { 5369 xorl(dst, dst); 5370 set_byte_if_not_zero(dst); 5371 } 5372 5373 static void pass_arg0(MacroAssembler* masm, Register arg) { 5374 masm->push(arg); 5375 } 5376 5377 static void pass_arg1(MacroAssembler* masm, Register arg) { 5378 masm->push(arg); 5379 } 5380 5381 static void pass_arg2(MacroAssembler* masm, Register arg) { 5382 masm->push(arg); 5383 } 5384 5385 static void pass_arg3(MacroAssembler* masm, Register arg) { 5386 masm->push(arg); 5387 } 5388 5389 #ifndef PRODUCT 5390 extern "C" void findpc(intptr_t x); 5391 #endif 5392 5393 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 5394 // In order to get locks to work, we need to fake a in_VM state 5395 JavaThread* thread = JavaThread::current(); 5396 JavaThreadState saved_state = thread->thread_state(); 5397 thread->set_thread_state(_thread_in_vm); 5398 if (ShowMessageBoxOnError) { 5399 JavaThread* thread = JavaThread::current(); 5400 JavaThreadState saved_state = thread->thread_state(); 5401 thread->set_thread_state(_thread_in_vm); 5402 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5403 ttyLocker ttyl; 5404 BytecodeCounter::print(); 5405 } 5406 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5407 // This is the value of eip which points to where verify_oop will return. 5408 if (os::message_box(msg, "Execution stopped, print registers?")) { 5409 ttyLocker ttyl; 5410 tty->print_cr("eip = 0x%08x", eip); 5411 #ifndef PRODUCT 5412 if ((WizardMode || Verbose) && PrintMiscellaneous) { 5413 tty->cr(); 5414 findpc(eip); 5415 tty->cr(); 5416 } 5417 #endif 5418 tty->print_cr("rax = 0x%08x", rax); 5419 tty->print_cr("rbx = 0x%08x", rbx); 5420 tty->print_cr("rcx = 0x%08x", rcx); 5421 tty->print_cr("rdx = 0x%08x", rdx); 5422 tty->print_cr("rdi = 0x%08x", rdi); 5423 tty->print_cr("rsi = 0x%08x", rsi); 5424 tty->print_cr("rbp = 0x%08x", rbp); 5425 tty->print_cr("rsp = 0x%08x", rsp); 5426 BREAKPOINT; 5427 assert(false, "start up GDB"); 5428 } 5429 } else { 5430 ttyLocker ttyl; 5431 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 5432 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 5433 } 5434 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5435 } 5436 5437 void MacroAssembler::stop(const char* msg) { 5438 ExternalAddress message((address)msg); 5439 // push address of message 5440 pushptr(message.addr()); 5441 { Label L; call(L, relocInfo::none); bind(L); } // push eip 5442 pusha(); // push registers 5443 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 5444 hlt(); 5445 } 5446 5447 void MacroAssembler::warn(const char* msg) { 5448 push_CPU_state(); 5449 5450 ExternalAddress message((address) msg); 5451 // push address of message 5452 pushptr(message.addr()); 5453 5454 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 5455 addl(rsp, wordSize); // discard argument 5456 pop_CPU_state(); 5457 } 5458 5459 #else // _LP64 5460 5461 // 64 bit versions 5462 5463 Address MacroAssembler::as_Address(AddressLiteral adr) { 5464 // amd64 always does this as a pc-rel 5465 // we can be absolute or disp based on the instruction type 5466 // jmp/call are displacements others are absolute 5467 assert(!adr.is_lval(), "must be rval"); 5468 assert(reachable(adr), "must be"); 5469 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 5470 5471 } 5472 5473 Address MacroAssembler::as_Address(ArrayAddress adr) { 5474 AddressLiteral base = adr.base(); 5475 lea(rscratch1, base); 5476 Address index = adr.index(); 5477 assert(index._disp == 0, "must not have disp"); // maybe it can? 5478 Address array(rscratch1, index._index, index._scale, index._disp); 5479 return array; 5480 } 5481 5482 int MacroAssembler::biased_locking_enter(Register lock_reg, 5483 Register obj_reg, 5484 Register swap_reg, 5485 Register tmp_reg, 5486 bool swap_reg_contains_mark, 5487 Label& done, 5488 Label* slow_case, 5489 BiasedLockingCounters* counters) { 5490 assert(UseBiasedLocking, "why call this otherwise?"); 5491 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 5492 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 5493 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 5494 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 5495 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 5496 Address saved_mark_addr(lock_reg, 0); 5497 5498 if (PrintBiasedLockingStatistics && counters == NULL) 5499 counters = BiasedLocking::counters(); 5500 5501 // Biased locking 5502 // See whether the lock is currently biased toward our thread and 5503 // whether the epoch is still valid 5504 // Note that the runtime guarantees sufficient alignment of JavaThread 5505 // pointers to allow age to be placed into low bits 5506 // First check to see whether biasing is even enabled for this object 5507 Label cas_label; 5508 int null_check_offset = -1; 5509 if (!swap_reg_contains_mark) { 5510 null_check_offset = offset(); 5511 movq(swap_reg, mark_addr); 5512 } 5513 movq(tmp_reg, swap_reg); 5514 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5515 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 5516 jcc(Assembler::notEqual, cas_label); 5517 // The bias pattern is present in the object's header. Need to check 5518 // whether the bias owner and the epoch are both still current. 5519 load_prototype_header(tmp_reg, obj_reg); 5520 orq(tmp_reg, r15_thread); 5521 xorq(tmp_reg, swap_reg); 5522 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 5523 if (counters != NULL) { 5524 cond_inc32(Assembler::zero, 5525 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5526 } 5527 jcc(Assembler::equal, done); 5528 5529 Label try_revoke_bias; 5530 Label try_rebias; 5531 5532 // At this point we know that the header has the bias pattern and 5533 // that we are not the bias owner in the current epoch. We need to 5534 // figure out more details about the state of the header in order to 5535 // know what operations can be legally performed on the object's 5536 // header. 5537 5538 // If the low three bits in the xor result aren't clear, that means 5539 // the prototype header is no longer biased and we have to revoke 5540 // the bias on this object. 5541 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5542 jcc(Assembler::notZero, try_revoke_bias); 5543 5544 // Biasing is still enabled for this data type. See whether the 5545 // epoch of the current bias is still valid, meaning that the epoch 5546 // bits of the mark word are equal to the epoch bits of the 5547 // prototype header. (Note that the prototype header's epoch bits 5548 // only change at a safepoint.) If not, attempt to rebias the object 5549 // toward the current thread. Note that we must be absolutely sure 5550 // that the current epoch is invalid in order to do this because 5551 // otherwise the manipulations it performs on the mark word are 5552 // illegal. 5553 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 5554 jcc(Assembler::notZero, try_rebias); 5555 5556 // The epoch of the current bias is still valid but we know nothing 5557 // about the owner; it might be set or it might be clear. Try to 5558 // acquire the bias of the object using an atomic operation. If this 5559 // fails we will go in to the runtime to revoke the object's bias. 5560 // Note that we first construct the presumed unbiased header so we 5561 // don't accidentally blow away another thread's valid bias. 5562 andq(swap_reg, 5563 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 5564 movq(tmp_reg, swap_reg); 5565 orq(tmp_reg, r15_thread); 5566 if (os::is_MP()) { 5567 lock(); 5568 } 5569 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5570 // If the biasing toward our thread failed, this means that 5571 // another thread succeeded in biasing it toward itself and we 5572 // need to revoke that bias. The revocation will occur in the 5573 // interpreter runtime in the slow case. 5574 if (counters != NULL) { 5575 cond_inc32(Assembler::zero, 5576 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5577 } 5578 if (slow_case != NULL) { 5579 jcc(Assembler::notZero, *slow_case); 5580 } 5581 jmp(done); 5582 5583 bind(try_rebias); 5584 // At this point we know the epoch has expired, meaning that the 5585 // current "bias owner", if any, is actually invalid. Under these 5586 // circumstances _only_, we are allowed to use the current header's 5587 // value as the comparison value when doing the cas to acquire the 5588 // bias in the current epoch. In other words, we allow transfer of 5589 // the bias from one thread to another directly in this situation. 5590 // 5591 // FIXME: due to a lack of registers we currently blow away the age 5592 // bits in this situation. Should attempt to preserve them. 5593 load_prototype_header(tmp_reg, obj_reg); 5594 orq(tmp_reg, r15_thread); 5595 if (os::is_MP()) { 5596 lock(); 5597 } 5598 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5599 // If the biasing toward our thread failed, then another thread 5600 // succeeded in biasing it toward itself and we need to revoke that 5601 // bias. The revocation will occur in the runtime in the slow case. 5602 if (counters != NULL) { 5603 cond_inc32(Assembler::zero, 5604 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 5605 } 5606 if (slow_case != NULL) { 5607 jcc(Assembler::notZero, *slow_case); 5608 } 5609 jmp(done); 5610 5611 bind(try_revoke_bias); 5612 // The prototype mark in the klass doesn't have the bias bit set any 5613 // more, indicating that objects of this data type are not supposed 5614 // to be biased any more. We are going to try to reset the mark of 5615 // this object to the prototype value and fall through to the 5616 // CAS-based locking scheme. Note that if our CAS fails, it means 5617 // that another thread raced us for the privilege of revoking the 5618 // bias of this particular object, so it's okay to continue in the 5619 // normal locking code. 5620 // 5621 // FIXME: due to a lack of registers we currently blow away the age 5622 // bits in this situation. Should attempt to preserve them. 5623 load_prototype_header(tmp_reg, obj_reg); 5624 if (os::is_MP()) { 5625 lock(); 5626 } 5627 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5628 // Fall through to the normal CAS-based lock, because no matter what 5629 // the result of the above CAS, some thread must have succeeded in 5630 // removing the bias bit from the object's header. 5631 if (counters != NULL) { 5632 cond_inc32(Assembler::zero, 5633 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 5634 } 5635 5636 bind(cas_label); 5637 5638 return null_check_offset; 5639 } 5640 5641 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 5642 Label L, E; 5643 5644 #ifdef _WIN64 5645 // Windows always allocates space for it's register args 5646 assert(num_args <= 4, "only register arguments supported"); 5647 subq(rsp, frame::arg_reg_save_area_bytes); 5648 #endif 5649 5650 // Align stack if necessary 5651 testl(rsp, 15); 5652 jcc(Assembler::zero, L); 5653 5654 subq(rsp, 8); 5655 { 5656 call(RuntimeAddress(entry_point)); 5657 } 5658 addq(rsp, 8); 5659 jmp(E); 5660 5661 bind(L); 5662 { 5663 call(RuntimeAddress(entry_point)); 5664 } 5665 5666 bind(E); 5667 5668 #ifdef _WIN64 5669 // restore stack pointer 5670 addq(rsp, frame::arg_reg_save_area_bytes); 5671 #endif 5672 5673 } 5674 5675 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 5676 assert(!src2.is_lval(), "should use cmpptr"); 5677 5678 if (reachable(src2)) { 5679 cmpq(src1, as_Address(src2)); 5680 } else { 5681 lea(rscratch1, src2); 5682 Assembler::cmpq(src1, Address(rscratch1, 0)); 5683 } 5684 } 5685 5686 int MacroAssembler::corrected_idivq(Register reg) { 5687 // Full implementation of Java ldiv and lrem; checks for special 5688 // case as described in JVM spec., p.243 & p.271. The function 5689 // returns the (pc) offset of the idivl instruction - may be needed 5690 // for implicit exceptions. 5691 // 5692 // normal case special case 5693 // 5694 // input : rax: dividend min_long 5695 // reg: divisor (may not be eax/edx) -1 5696 // 5697 // output: rax: quotient (= rax idiv reg) min_long 5698 // rdx: remainder (= rax irem reg) 0 5699 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 5700 static const int64_t min_long = 0x8000000000000000; 5701 Label normal_case, special_case; 5702 5703 // check for special case 5704 cmp64(rax, ExternalAddress((address) &min_long)); 5705 jcc(Assembler::notEqual, normal_case); 5706 xorl(rdx, rdx); // prepare rdx for possible special case (where 5707 // remainder = 0) 5708 cmpq(reg, -1); 5709 jcc(Assembler::equal, special_case); 5710 5711 // handle normal case 5712 bind(normal_case); 5713 cdqq(); 5714 int idivq_offset = offset(); 5715 idivq(reg); 5716 5717 // normal and special case exit 5718 bind(special_case); 5719 5720 return idivq_offset; 5721 } 5722 5723 void MacroAssembler::decrementq(Register reg, int value) { 5724 if (value == min_jint) { subq(reg, value); return; } 5725 if (value < 0) { incrementq(reg, -value); return; } 5726 if (value == 0) { ; return; } 5727 if (value == 1 && UseIncDec) { decq(reg) ; return; } 5728 /* else */ { subq(reg, value) ; return; } 5729 } 5730 5731 void MacroAssembler::decrementq(Address dst, int value) { 5732 if (value == min_jint) { subq(dst, value); return; } 5733 if (value < 0) { incrementq(dst, -value); return; } 5734 if (value == 0) { ; return; } 5735 if (value == 1 && UseIncDec) { decq(dst) ; return; } 5736 /* else */ { subq(dst, value) ; return; } 5737 } 5738 5739 void MacroAssembler::incrementq(Register reg, int value) { 5740 if (value == min_jint) { addq(reg, value); return; } 5741 if (value < 0) { decrementq(reg, -value); return; } 5742 if (value == 0) { ; return; } 5743 if (value == 1 && UseIncDec) { incq(reg) ; return; } 5744 /* else */ { addq(reg, value) ; return; } 5745 } 5746 5747 void MacroAssembler::incrementq(Address dst, int value) { 5748 if (value == min_jint) { addq(dst, value); return; } 5749 if (value < 0) { decrementq(dst, -value); return; } 5750 if (value == 0) { ; return; } 5751 if (value == 1 && UseIncDec) { incq(dst) ; return; } 5752 /* else */ { addq(dst, value) ; return; } 5753 } 5754 5755 // 32bit can do a case table jump in one instruction but we no longer allow the base 5756 // to be installed in the Address class 5757 void MacroAssembler::jump(ArrayAddress entry) { 5758 lea(rscratch1, entry.base()); 5759 Address dispatch = entry.index(); 5760 assert(dispatch._base == noreg, "must be"); 5761 dispatch._base = rscratch1; 5762 jmp(dispatch); 5763 } 5764 5765 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5766 ShouldNotReachHere(); // 64bit doesn't use two regs 5767 cmpq(x_lo, y_lo); 5768 } 5769 5770 void MacroAssembler::lea(Register dst, AddressLiteral src) { 5771 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5772 } 5773 5774 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5775 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 5776 movptr(dst, rscratch1); 5777 } 5778 5779 void MacroAssembler::leave() { 5780 // %%% is this really better? Why not on 32bit too? 5781 emit_byte(0xC9); // LEAVE 5782 } 5783 5784 void MacroAssembler::lneg(Register hi, Register lo) { 5785 ShouldNotReachHere(); // 64bit doesn't use two regs 5786 negq(lo); 5787 } 5788 5789 void MacroAssembler::movoop(Register dst, jobject obj) { 5790 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5791 } 5792 5793 void MacroAssembler::movoop(Address dst, jobject obj) { 5794 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5795 movq(dst, rscratch1); 5796 } 5797 5798 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5799 if (src.is_lval()) { 5800 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5801 } else { 5802 if (reachable(src)) { 5803 movq(dst, as_Address(src)); 5804 } else { 5805 lea(rscratch1, src); 5806 movq(dst, Address(rscratch1,0)); 5807 } 5808 } 5809 } 5810 5811 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5812 movq(as_Address(dst), src); 5813 } 5814 5815 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5816 movq(dst, as_Address(src)); 5817 } 5818 5819 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 5820 void MacroAssembler::movptr(Address dst, intptr_t src) { 5821 mov64(rscratch1, src); 5822 movq(dst, rscratch1); 5823 } 5824 5825 // These are mostly for initializing NULL 5826 void MacroAssembler::movptr(Address dst, int32_t src) { 5827 movslq(dst, src); 5828 } 5829 5830 void MacroAssembler::movptr(Register dst, int32_t src) { 5831 mov64(dst, (intptr_t)src); 5832 } 5833 5834 void MacroAssembler::pushoop(jobject obj) { 5835 movoop(rscratch1, obj); 5836 push(rscratch1); 5837 } 5838 5839 void MacroAssembler::pushptr(AddressLiteral src) { 5840 lea(rscratch1, src); 5841 if (src.is_lval()) { 5842 push(rscratch1); 5843 } else { 5844 pushq(Address(rscratch1, 0)); 5845 } 5846 } 5847 5848 void MacroAssembler::reset_last_Java_frame(bool clear_fp, 5849 bool clear_pc) { 5850 // we must set sp to zero to clear frame 5851 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 5852 // must clear fp, so that compiled frames are not confused; it is 5853 // possible that we need it only for debugging 5854 if (clear_fp) { 5855 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 5856 } 5857 5858 if (clear_pc) { 5859 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 5860 } 5861 } 5862 5863 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 5864 Register last_java_fp, 5865 address last_java_pc) { 5866 // determine last_java_sp register 5867 if (!last_java_sp->is_valid()) { 5868 last_java_sp = rsp; 5869 } 5870 5871 // last_java_fp is optional 5872 if (last_java_fp->is_valid()) { 5873 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 5874 last_java_fp); 5875 } 5876 5877 // last_java_pc is optional 5878 if (last_java_pc != NULL) { 5879 Address java_pc(r15_thread, 5880 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 5881 lea(rscratch1, InternalAddress(last_java_pc)); 5882 movptr(java_pc, rscratch1); 5883 } 5884 5885 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 5886 } 5887 5888 static void pass_arg0(MacroAssembler* masm, Register arg) { 5889 if (c_rarg0 != arg ) { 5890 masm->mov(c_rarg0, arg); 5891 } 5892 } 5893 5894 static void pass_arg1(MacroAssembler* masm, Register arg) { 5895 if (c_rarg1 != arg ) { 5896 masm->mov(c_rarg1, arg); 5897 } 5898 } 5899 5900 static void pass_arg2(MacroAssembler* masm, Register arg) { 5901 if (c_rarg2 != arg ) { 5902 masm->mov(c_rarg2, arg); 5903 } 5904 } 5905 5906 static void pass_arg3(MacroAssembler* masm, Register arg) { 5907 if (c_rarg3 != arg ) { 5908 masm->mov(c_rarg3, arg); 5909 } 5910 } 5911 5912 void MacroAssembler::stop(const char* msg) { 5913 address rip = pc(); 5914 pusha(); // get regs on stack 5915 lea(c_rarg0, ExternalAddress((address) msg)); 5916 lea(c_rarg1, InternalAddress(rip)); 5917 movq(c_rarg2, rsp); // pass pointer to regs array 5918 andq(rsp, -16); // align stack as required by ABI 5919 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 5920 hlt(); 5921 } 5922 5923 void MacroAssembler::warn(const char* msg) { 5924 push(rsp); 5925 andq(rsp, -16); // align stack as required by push_CPU_state and call 5926 5927 push_CPU_state(); // keeps alignment at 16 bytes 5928 lea(c_rarg0, ExternalAddress((address) msg)); 5929 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 5930 pop_CPU_state(); 5931 pop(rsp); 5932 } 5933 5934 #ifndef PRODUCT 5935 extern "C" void findpc(intptr_t x); 5936 #endif 5937 5938 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 5939 // In order to get locks to work, we need to fake a in_VM state 5940 if (ShowMessageBoxOnError ) { 5941 JavaThread* thread = JavaThread::current(); 5942 JavaThreadState saved_state = thread->thread_state(); 5943 thread->set_thread_state(_thread_in_vm); 5944 #ifndef PRODUCT 5945 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5946 ttyLocker ttyl; 5947 BytecodeCounter::print(); 5948 } 5949 #endif 5950 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5951 // XXX correct this offset for amd64 5952 // This is the value of eip which points to where verify_oop will return. 5953 if (os::message_box(msg, "Execution stopped, print registers?")) { 5954 ttyLocker ttyl; 5955 tty->print_cr("rip = 0x%016lx", pc); 5956 #ifndef PRODUCT 5957 tty->cr(); 5958 findpc(pc); 5959 tty->cr(); 5960 #endif 5961 tty->print_cr("rax = 0x%016lx", regs[15]); 5962 tty->print_cr("rbx = 0x%016lx", regs[12]); 5963 tty->print_cr("rcx = 0x%016lx", regs[14]); 5964 tty->print_cr("rdx = 0x%016lx", regs[13]); 5965 tty->print_cr("rdi = 0x%016lx", regs[8]); 5966 tty->print_cr("rsi = 0x%016lx", regs[9]); 5967 tty->print_cr("rbp = 0x%016lx", regs[10]); 5968 tty->print_cr("rsp = 0x%016lx", regs[11]); 5969 tty->print_cr("r8 = 0x%016lx", regs[7]); 5970 tty->print_cr("r9 = 0x%016lx", regs[6]); 5971 tty->print_cr("r10 = 0x%016lx", regs[5]); 5972 tty->print_cr("r11 = 0x%016lx", regs[4]); 5973 tty->print_cr("r12 = 0x%016lx", regs[3]); 5974 tty->print_cr("r13 = 0x%016lx", regs[2]); 5975 tty->print_cr("r14 = 0x%016lx", regs[1]); 5976 tty->print_cr("r15 = 0x%016lx", regs[0]); 5977 BREAKPOINT; 5978 } 5979 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5980 } else { 5981 ttyLocker ttyl; 5982 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 5983 msg); 5984 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 5985 } 5986 } 5987 5988 #endif // _LP64 5989 5990 // Now versions that are common to 32/64 bit 5991 5992 void MacroAssembler::addptr(Register dst, int32_t imm32) { 5993 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 5994 } 5995 5996 void MacroAssembler::addptr(Register dst, Register src) { 5997 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 5998 } 5999 6000 void MacroAssembler::addptr(Address dst, Register src) { 6001 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 6002 } 6003 6004 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { 6005 if (reachable(src)) { 6006 Assembler::addsd(dst, as_Address(src)); 6007 } else { 6008 lea(rscratch1, src); 6009 Assembler::addsd(dst, Address(rscratch1, 0)); 6010 } 6011 } 6012 6013 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { 6014 if (reachable(src)) { 6015 addss(dst, as_Address(src)); 6016 } else { 6017 lea(rscratch1, src); 6018 addss(dst, Address(rscratch1, 0)); 6019 } 6020 } 6021 6022 void MacroAssembler::align(int modulus) { 6023 if (offset() % modulus != 0) { 6024 nop(modulus - (offset() % modulus)); 6025 } 6026 } 6027 6028 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 6029 // Used in sign-masking with aligned address. 6030 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 6031 if (reachable(src)) { 6032 Assembler::andpd(dst, as_Address(src)); 6033 } else { 6034 lea(rscratch1, src); 6035 Assembler::andpd(dst, Address(rscratch1, 0)); 6036 } 6037 } 6038 6039 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { 6040 // Used in sign-masking with aligned address. 6041 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 6042 if (reachable(src)) { 6043 Assembler::andps(dst, as_Address(src)); 6044 } else { 6045 lea(rscratch1, src); 6046 Assembler::andps(dst, Address(rscratch1, 0)); 6047 } 6048 } 6049 6050 void MacroAssembler::andptr(Register dst, int32_t imm32) { 6051 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 6052 } 6053 6054 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 6055 pushf(); 6056 if (os::is_MP()) 6057 lock(); 6058 incrementl(counter_addr); 6059 popf(); 6060 } 6061 6062 // Writes to stack successive pages until offset reached to check for 6063 // stack overflow + shadow pages. This clobbers tmp. 6064 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 6065 movptr(tmp, rsp); 6066 // Bang stack for total size given plus shadow page size. 6067 // Bang one page at a time because large size can bang beyond yellow and 6068 // red zones. 6069 Label loop; 6070 bind(loop); 6071 movl(Address(tmp, (-os::vm_page_size())), size ); 6072 subptr(tmp, os::vm_page_size()); 6073 subl(size, os::vm_page_size()); 6074 jcc(Assembler::greater, loop); 6075 6076 // Bang down shadow pages too. 6077 // The -1 because we already subtracted 1 page. 6078 for (int i = 0; i< StackShadowPages-1; i++) { 6079 // this could be any sized move but this is can be a debugging crumb 6080 // so the bigger the better. 6081 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 6082 } 6083 } 6084 6085 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 6086 assert(UseBiasedLocking, "why call this otherwise?"); 6087 6088 // Check for biased locking unlock case, which is a no-op 6089 // Note: we do not have to check the thread ID for two reasons. 6090 // First, the interpreter checks for IllegalMonitorStateException at 6091 // a higher level. Second, if the bias was revoked while we held the 6092 // lock, the object could not be rebiased toward another thread, so 6093 // the bias bit would be clear. 6094 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 6095 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 6096 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 6097 jcc(Assembler::equal, done); 6098 } 6099 6100 void MacroAssembler::c2bool(Register x) { 6101 // implements x == 0 ? 0 : 1 6102 // note: must only look at least-significant byte of x 6103 // since C-style booleans are stored in one byte 6104 // only! (was bug) 6105 andl(x, 0xFF); 6106 setb(Assembler::notZero, x); 6107 } 6108 6109 // Wouldn't need if AddressLiteral version had new name 6110 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 6111 Assembler::call(L, rtype); 6112 } 6113 6114 void MacroAssembler::call(Register entry) { 6115 Assembler::call(entry); 6116 } 6117 6118 void MacroAssembler::call(AddressLiteral entry) { 6119 if (reachable(entry)) { 6120 Assembler::call_literal(entry.target(), entry.rspec()); 6121 } else { 6122 lea(rscratch1, entry); 6123 Assembler::call(rscratch1); 6124 } 6125 } 6126 6127 // Implementation of call_VM versions 6128 6129 void MacroAssembler::call_VM(Register oop_result, 6130 address entry_point, 6131 bool check_exceptions) { 6132 Label C, E; 6133 call(C, relocInfo::none); 6134 jmp(E); 6135 6136 bind(C); 6137 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 6138 ret(0); 6139 6140 bind(E); 6141 } 6142 6143 void MacroAssembler::call_VM(Register oop_result, 6144 address entry_point, 6145 Register arg_1, 6146 bool check_exceptions) { 6147 Label C, E; 6148 call(C, relocInfo::none); 6149 jmp(E); 6150 6151 bind(C); 6152 pass_arg1(this, arg_1); 6153 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 6154 ret(0); 6155 6156 bind(E); 6157 } 6158 6159 void MacroAssembler::call_VM(Register oop_result, 6160 address entry_point, 6161 Register arg_1, 6162 Register arg_2, 6163 bool check_exceptions) { 6164 Label C, E; 6165 call(C, relocInfo::none); 6166 jmp(E); 6167 6168 bind(C); 6169 6170 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6171 6172 pass_arg2(this, arg_2); 6173 pass_arg1(this, arg_1); 6174 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 6175 ret(0); 6176 6177 bind(E); 6178 } 6179 6180 void MacroAssembler::call_VM(Register oop_result, 6181 address entry_point, 6182 Register arg_1, 6183 Register arg_2, 6184 Register arg_3, 6185 bool check_exceptions) { 6186 Label C, E; 6187 call(C, relocInfo::none); 6188 jmp(E); 6189 6190 bind(C); 6191 6192 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6193 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6194 pass_arg3(this, arg_3); 6195 6196 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6197 pass_arg2(this, arg_2); 6198 6199 pass_arg1(this, arg_1); 6200 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 6201 ret(0); 6202 6203 bind(E); 6204 } 6205 6206 void MacroAssembler::call_VM(Register oop_result, 6207 Register last_java_sp, 6208 address entry_point, 6209 int number_of_arguments, 6210 bool check_exceptions) { 6211 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6212 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6213 } 6214 6215 void MacroAssembler::call_VM(Register oop_result, 6216 Register last_java_sp, 6217 address entry_point, 6218 Register arg_1, 6219 bool check_exceptions) { 6220 pass_arg1(this, arg_1); 6221 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6222 } 6223 6224 void MacroAssembler::call_VM(Register oop_result, 6225 Register last_java_sp, 6226 address entry_point, 6227 Register arg_1, 6228 Register arg_2, 6229 bool check_exceptions) { 6230 6231 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6232 pass_arg2(this, arg_2); 6233 pass_arg1(this, arg_1); 6234 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6235 } 6236 6237 void MacroAssembler::call_VM(Register oop_result, 6238 Register last_java_sp, 6239 address entry_point, 6240 Register arg_1, 6241 Register arg_2, 6242 Register arg_3, 6243 bool check_exceptions) { 6244 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6245 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6246 pass_arg3(this, arg_3); 6247 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6248 pass_arg2(this, arg_2); 6249 pass_arg1(this, arg_1); 6250 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6251 } 6252 6253 void MacroAssembler::super_call_VM(Register oop_result, 6254 Register last_java_sp, 6255 address entry_point, 6256 int number_of_arguments, 6257 bool check_exceptions) { 6258 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6259 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6260 } 6261 6262 void MacroAssembler::super_call_VM(Register oop_result, 6263 Register last_java_sp, 6264 address entry_point, 6265 Register arg_1, 6266 bool check_exceptions) { 6267 pass_arg1(this, arg_1); 6268 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6269 } 6270 6271 void MacroAssembler::super_call_VM(Register oop_result, 6272 Register last_java_sp, 6273 address entry_point, 6274 Register arg_1, 6275 Register arg_2, 6276 bool check_exceptions) { 6277 6278 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6279 pass_arg2(this, arg_2); 6280 pass_arg1(this, arg_1); 6281 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6282 } 6283 6284 void MacroAssembler::super_call_VM(Register oop_result, 6285 Register last_java_sp, 6286 address entry_point, 6287 Register arg_1, 6288 Register arg_2, 6289 Register arg_3, 6290 bool check_exceptions) { 6291 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6292 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6293 pass_arg3(this, arg_3); 6294 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6295 pass_arg2(this, arg_2); 6296 pass_arg1(this, arg_1); 6297 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6298 } 6299 6300 void MacroAssembler::call_VM_base(Register oop_result, 6301 Register java_thread, 6302 Register last_java_sp, 6303 address entry_point, 6304 int number_of_arguments, 6305 bool check_exceptions) { 6306 // determine java_thread register 6307 if (!java_thread->is_valid()) { 6308 #ifdef _LP64 6309 java_thread = r15_thread; 6310 #else 6311 java_thread = rdi; 6312 get_thread(java_thread); 6313 #endif // LP64 6314 } 6315 // determine last_java_sp register 6316 if (!last_java_sp->is_valid()) { 6317 last_java_sp = rsp; 6318 } 6319 // debugging support 6320 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 6321 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 6322 #ifdef ASSERT 6323 // TraceBytecodes does not use r12 but saves it over the call, so don't verify 6324 // r12 is the heapbase. 6325 LP64_ONLY(if (UseCompressedOops && !TraceBytecodes) verify_heapbase("call_VM_base");) 6326 #endif // ASSERT 6327 6328 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 6329 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 6330 6331 // push java thread (becomes first argument of C function) 6332 6333 NOT_LP64(push(java_thread); number_of_arguments++); 6334 LP64_ONLY(mov(c_rarg0, r15_thread)); 6335 6336 // set last Java frame before call 6337 assert(last_java_sp != rbp, "can't use ebp/rbp"); 6338 6339 // Only interpreter should have to set fp 6340 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 6341 6342 // do the call, remove parameters 6343 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 6344 6345 // restore the thread (cannot use the pushed argument since arguments 6346 // may be overwritten by C code generated by an optimizing compiler); 6347 // however can use the register value directly if it is callee saved. 6348 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 6349 // rdi & rsi (also r15) are callee saved -> nothing to do 6350 #ifdef ASSERT 6351 guarantee(java_thread != rax, "change this code"); 6352 push(rax); 6353 { Label L; 6354 get_thread(rax); 6355 cmpptr(java_thread, rax); 6356 jcc(Assembler::equal, L); 6357 stop("MacroAssembler::call_VM_base: rdi not callee saved?"); 6358 bind(L); 6359 } 6360 pop(rax); 6361 #endif 6362 } else { 6363 get_thread(java_thread); 6364 } 6365 // reset last Java frame 6366 // Only interpreter should have to clear fp 6367 reset_last_Java_frame(java_thread, true, false); 6368 6369 #ifndef CC_INTERP 6370 // C++ interp handles this in the interpreter 6371 check_and_handle_popframe(java_thread); 6372 check_and_handle_earlyret(java_thread); 6373 #endif /* CC_INTERP */ 6374 6375 if (check_exceptions) { 6376 // check for pending exceptions (java_thread is set upon return) 6377 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 6378 #ifndef _LP64 6379 jump_cc(Assembler::notEqual, 6380 RuntimeAddress(StubRoutines::forward_exception_entry())); 6381 #else 6382 // This used to conditionally jump to forward_exception however it is 6383 // possible if we relocate that the branch will not reach. So we must jump 6384 // around so we can always reach 6385 6386 Label ok; 6387 jcc(Assembler::equal, ok); 6388 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 6389 bind(ok); 6390 #endif // LP64 6391 } 6392 6393 // get oop result if there is one and reset the value in the thread 6394 if (oop_result->is_valid()) { 6395 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 6396 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 6397 verify_oop(oop_result, "broken oop in call_VM_base"); 6398 } 6399 } 6400 6401 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 6402 6403 // Calculate the value for last_Java_sp 6404 // somewhat subtle. call_VM does an intermediate call 6405 // which places a return address on the stack just under the 6406 // stack pointer as the user finsihed with it. This allows 6407 // use to retrieve last_Java_pc from last_Java_sp[-1]. 6408 // On 32bit we then have to push additional args on the stack to accomplish 6409 // the actual requested call. On 64bit call_VM only can use register args 6410 // so the only extra space is the return address that call_VM created. 6411 // This hopefully explains the calculations here. 6412 6413 #ifdef _LP64 6414 // We've pushed one address, correct last_Java_sp 6415 lea(rax, Address(rsp, wordSize)); 6416 #else 6417 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 6418 #endif // LP64 6419 6420 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 6421 6422 } 6423 6424 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 6425 call_VM_leaf_base(entry_point, number_of_arguments); 6426 } 6427 6428 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 6429 pass_arg0(this, arg_0); 6430 call_VM_leaf(entry_point, 1); 6431 } 6432 6433 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 6434 6435 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6436 pass_arg1(this, arg_1); 6437 pass_arg0(this, arg_0); 6438 call_VM_leaf(entry_point, 2); 6439 } 6440 6441 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 6442 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6443 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6444 pass_arg2(this, arg_2); 6445 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6446 pass_arg1(this, arg_1); 6447 pass_arg0(this, arg_0); 6448 call_VM_leaf(entry_point, 3); 6449 } 6450 6451 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 6452 pass_arg0(this, arg_0); 6453 MacroAssembler::call_VM_leaf_base(entry_point, 1); 6454 } 6455 6456 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 6457 6458 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6459 pass_arg1(this, arg_1); 6460 pass_arg0(this, arg_0); 6461 MacroAssembler::call_VM_leaf_base(entry_point, 2); 6462 } 6463 6464 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 6465 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6466 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6467 pass_arg2(this, arg_2); 6468 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6469 pass_arg1(this, arg_1); 6470 pass_arg0(this, arg_0); 6471 MacroAssembler::call_VM_leaf_base(entry_point, 3); 6472 } 6473 6474 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 6475 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); 6476 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6477 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6478 pass_arg3(this, arg_3); 6479 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6480 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6481 pass_arg2(this, arg_2); 6482 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6483 pass_arg1(this, arg_1); 6484 pass_arg0(this, arg_0); 6485 MacroAssembler::call_VM_leaf_base(entry_point, 4); 6486 } 6487 6488 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 6489 } 6490 6491 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 6492 } 6493 6494 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 6495 if (reachable(src1)) { 6496 cmpl(as_Address(src1), imm); 6497 } else { 6498 lea(rscratch1, src1); 6499 cmpl(Address(rscratch1, 0), imm); 6500 } 6501 } 6502 6503 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 6504 assert(!src2.is_lval(), "use cmpptr"); 6505 if (reachable(src2)) { 6506 cmpl(src1, as_Address(src2)); 6507 } else { 6508 lea(rscratch1, src2); 6509 cmpl(src1, Address(rscratch1, 0)); 6510 } 6511 } 6512 6513 void MacroAssembler::cmp32(Register src1, int32_t imm) { 6514 Assembler::cmpl(src1, imm); 6515 } 6516 6517 void MacroAssembler::cmp32(Register src1, Address src2) { 6518 Assembler::cmpl(src1, src2); 6519 } 6520 6521 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6522 ucomisd(opr1, opr2); 6523 6524 Label L; 6525 if (unordered_is_less) { 6526 movl(dst, -1); 6527 jcc(Assembler::parity, L); 6528 jcc(Assembler::below , L); 6529 movl(dst, 0); 6530 jcc(Assembler::equal , L); 6531 increment(dst); 6532 } else { // unordered is greater 6533 movl(dst, 1); 6534 jcc(Assembler::parity, L); 6535 jcc(Assembler::above , L); 6536 movl(dst, 0); 6537 jcc(Assembler::equal , L); 6538 decrementl(dst); 6539 } 6540 bind(L); 6541 } 6542 6543 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6544 ucomiss(opr1, opr2); 6545 6546 Label L; 6547 if (unordered_is_less) { 6548 movl(dst, -1); 6549 jcc(Assembler::parity, L); 6550 jcc(Assembler::below , L); 6551 movl(dst, 0); 6552 jcc(Assembler::equal , L); 6553 increment(dst); 6554 } else { // unordered is greater 6555 movl(dst, 1); 6556 jcc(Assembler::parity, L); 6557 jcc(Assembler::above , L); 6558 movl(dst, 0); 6559 jcc(Assembler::equal , L); 6560 decrementl(dst); 6561 } 6562 bind(L); 6563 } 6564 6565 6566 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 6567 if (reachable(src1)) { 6568 cmpb(as_Address(src1), imm); 6569 } else { 6570 lea(rscratch1, src1); 6571 cmpb(Address(rscratch1, 0), imm); 6572 } 6573 } 6574 6575 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 6576 #ifdef _LP64 6577 if (src2.is_lval()) { 6578 movptr(rscratch1, src2); 6579 Assembler::cmpq(src1, rscratch1); 6580 } else if (reachable(src2)) { 6581 cmpq(src1, as_Address(src2)); 6582 } else { 6583 lea(rscratch1, src2); 6584 Assembler::cmpq(src1, Address(rscratch1, 0)); 6585 } 6586 #else 6587 if (src2.is_lval()) { 6588 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 6589 } else { 6590 cmpl(src1, as_Address(src2)); 6591 } 6592 #endif // _LP64 6593 } 6594 6595 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 6596 assert(src2.is_lval(), "not a mem-mem compare"); 6597 #ifdef _LP64 6598 // moves src2's literal address 6599 movptr(rscratch1, src2); 6600 Assembler::cmpq(src1, rscratch1); 6601 #else 6602 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 6603 #endif // _LP64 6604 } 6605 6606 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 6607 if (reachable(adr)) { 6608 if (os::is_MP()) 6609 lock(); 6610 cmpxchgptr(reg, as_Address(adr)); 6611 } else { 6612 lea(rscratch1, adr); 6613 if (os::is_MP()) 6614 lock(); 6615 cmpxchgptr(reg, Address(rscratch1, 0)); 6616 } 6617 } 6618 6619 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 6620 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 6621 } 6622 6623 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 6624 if (reachable(src)) { 6625 Assembler::comisd(dst, as_Address(src)); 6626 } else { 6627 lea(rscratch1, src); 6628 Assembler::comisd(dst, Address(rscratch1, 0)); 6629 } 6630 } 6631 6632 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 6633 if (reachable(src)) { 6634 Assembler::comiss(dst, as_Address(src)); 6635 } else { 6636 lea(rscratch1, src); 6637 Assembler::comiss(dst, Address(rscratch1, 0)); 6638 } 6639 } 6640 6641 6642 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 6643 Condition negated_cond = negate_condition(cond); 6644 Label L; 6645 jcc(negated_cond, L); 6646 atomic_incl(counter_addr); 6647 bind(L); 6648 } 6649 6650 int MacroAssembler::corrected_idivl(Register reg) { 6651 // Full implementation of Java idiv and irem; checks for 6652 // special case as described in JVM spec., p.243 & p.271. 6653 // The function returns the (pc) offset of the idivl 6654 // instruction - may be needed for implicit exceptions. 6655 // 6656 // normal case special case 6657 // 6658 // input : rax,: dividend min_int 6659 // reg: divisor (may not be rax,/rdx) -1 6660 // 6661 // output: rax,: quotient (= rax, idiv reg) min_int 6662 // rdx: remainder (= rax, irem reg) 0 6663 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 6664 const int min_int = 0x80000000; 6665 Label normal_case, special_case; 6666 6667 // check for special case 6668 cmpl(rax, min_int); 6669 jcc(Assembler::notEqual, normal_case); 6670 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 6671 cmpl(reg, -1); 6672 jcc(Assembler::equal, special_case); 6673 6674 // handle normal case 6675 bind(normal_case); 6676 cdql(); 6677 int idivl_offset = offset(); 6678 idivl(reg); 6679 6680 // normal and special case exit 6681 bind(special_case); 6682 6683 return idivl_offset; 6684 } 6685 6686 6687 6688 void MacroAssembler::decrementl(Register reg, int value) { 6689 if (value == min_jint) {subl(reg, value) ; return; } 6690 if (value < 0) { incrementl(reg, -value); return; } 6691 if (value == 0) { ; return; } 6692 if (value == 1 && UseIncDec) { decl(reg) ; return; } 6693 /* else */ { subl(reg, value) ; return; } 6694 } 6695 6696 void MacroAssembler::decrementl(Address dst, int value) { 6697 if (value == min_jint) {subl(dst, value) ; return; } 6698 if (value < 0) { incrementl(dst, -value); return; } 6699 if (value == 0) { ; return; } 6700 if (value == 1 && UseIncDec) { decl(dst) ; return; } 6701 /* else */ { subl(dst, value) ; return; } 6702 } 6703 6704 void MacroAssembler::division_with_shift (Register reg, int shift_value) { 6705 assert (shift_value > 0, "illegal shift value"); 6706 Label _is_positive; 6707 testl (reg, reg); 6708 jcc (Assembler::positive, _is_positive); 6709 int offset = (1 << shift_value) - 1 ; 6710 6711 if (offset == 1) { 6712 incrementl(reg); 6713 } else { 6714 addl(reg, offset); 6715 } 6716 6717 bind (_is_positive); 6718 sarl(reg, shift_value); 6719 } 6720 6721 void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { 6722 if (reachable(src)) { 6723 Assembler::divsd(dst, as_Address(src)); 6724 } else { 6725 lea(rscratch1, src); 6726 Assembler::divsd(dst, Address(rscratch1, 0)); 6727 } 6728 } 6729 6730 void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { 6731 if (reachable(src)) { 6732 Assembler::divss(dst, as_Address(src)); 6733 } else { 6734 lea(rscratch1, src); 6735 Assembler::divss(dst, Address(rscratch1, 0)); 6736 } 6737 } 6738 6739 // !defined(COMPILER2) is because of stupid core builds 6740 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 6741 void MacroAssembler::empty_FPU_stack() { 6742 if (VM_Version::supports_mmx()) { 6743 emms(); 6744 } else { 6745 for (int i = 8; i-- > 0; ) ffree(i); 6746 } 6747 } 6748 #endif // !LP64 || C1 || !C2 6749 6750 6751 // Defines obj, preserves var_size_in_bytes 6752 void MacroAssembler::eden_allocate(Register obj, 6753 Register var_size_in_bytes, 6754 int con_size_in_bytes, 6755 Register t1, 6756 Label& slow_case) { 6757 assert(obj == rax, "obj must be in rax, for cmpxchg"); 6758 assert_different_registers(obj, var_size_in_bytes, t1); 6759 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 6760 jmp(slow_case); 6761 } else { 6762 Register end = t1; 6763 Label retry; 6764 bind(retry); 6765 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 6766 movptr(obj, heap_top); 6767 if (var_size_in_bytes == noreg) { 6768 lea(end, Address(obj, con_size_in_bytes)); 6769 } else { 6770 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 6771 } 6772 // if end < obj then we wrapped around => object too long => slow case 6773 cmpptr(end, obj); 6774 jcc(Assembler::below, slow_case); 6775 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 6776 jcc(Assembler::above, slow_case); 6777 // Compare obj with the top addr, and if still equal, store the new top addr in 6778 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 6779 // it otherwise. Use lock prefix for atomicity on MPs. 6780 locked_cmpxchgptr(end, heap_top); 6781 jcc(Assembler::notEqual, retry); 6782 } 6783 } 6784 6785 void MacroAssembler::enter() { 6786 push(rbp); 6787 mov(rbp, rsp); 6788 } 6789 6790 // A 5 byte nop that is safe for patching (see patch_verified_entry) 6791 void MacroAssembler::fat_nop() { 6792 if (UseAddressNop) { 6793 addr_nop_5(); 6794 } else { 6795 emit_byte(0x26); // es: 6796 emit_byte(0x2e); // cs: 6797 emit_byte(0x64); // fs: 6798 emit_byte(0x65); // gs: 6799 emit_byte(0x90); 6800 } 6801 } 6802 6803 void MacroAssembler::fcmp(Register tmp) { 6804 fcmp(tmp, 1, true, true); 6805 } 6806 6807 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 6808 assert(!pop_right || pop_left, "usage error"); 6809 if (VM_Version::supports_cmov()) { 6810 assert(tmp == noreg, "unneeded temp"); 6811 if (pop_left) { 6812 fucomip(index); 6813 } else { 6814 fucomi(index); 6815 } 6816 if (pop_right) { 6817 fpop(); 6818 } 6819 } else { 6820 assert(tmp != noreg, "need temp"); 6821 if (pop_left) { 6822 if (pop_right) { 6823 fcompp(); 6824 } else { 6825 fcomp(index); 6826 } 6827 } else { 6828 fcom(index); 6829 } 6830 // convert FPU condition into eflags condition via rax, 6831 save_rax(tmp); 6832 fwait(); fnstsw_ax(); 6833 sahf(); 6834 restore_rax(tmp); 6835 } 6836 // condition codes set as follows: 6837 // 6838 // CF (corresponds to C0) if x < y 6839 // PF (corresponds to C2) if unordered 6840 // ZF (corresponds to C3) if x = y 6841 } 6842 6843 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 6844 fcmp2int(dst, unordered_is_less, 1, true, true); 6845 } 6846 6847 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 6848 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 6849 Label L; 6850 if (unordered_is_less) { 6851 movl(dst, -1); 6852 jcc(Assembler::parity, L); 6853 jcc(Assembler::below , L); 6854 movl(dst, 0); 6855 jcc(Assembler::equal , L); 6856 increment(dst); 6857 } else { // unordered is greater 6858 movl(dst, 1); 6859 jcc(Assembler::parity, L); 6860 jcc(Assembler::above , L); 6861 movl(dst, 0); 6862 jcc(Assembler::equal , L); 6863 decrementl(dst); 6864 } 6865 bind(L); 6866 } 6867 6868 void MacroAssembler::fld_d(AddressLiteral src) { 6869 fld_d(as_Address(src)); 6870 } 6871 6872 void MacroAssembler::fld_s(AddressLiteral src) { 6873 fld_s(as_Address(src)); 6874 } 6875 6876 void MacroAssembler::fld_x(AddressLiteral src) { 6877 Assembler::fld_x(as_Address(src)); 6878 } 6879 6880 void MacroAssembler::fldcw(AddressLiteral src) { 6881 Assembler::fldcw(as_Address(src)); 6882 } 6883 6884 void MacroAssembler::pow_exp_core_encoding() { 6885 // kills rax, rcx, rdx 6886 subptr(rsp,sizeof(jdouble)); 6887 // computes 2^X. Stack: X ... 6888 // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and 6889 // keep it on the thread's stack to compute 2^int(X) later 6890 // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) 6891 // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) 6892 fld_s(0); // Stack: X X ... 6893 frndint(); // Stack: int(X) X ... 6894 fsuba(1); // Stack: int(X) X-int(X) ... 6895 fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... 6896 f2xm1(); // Stack: 2^(X-int(X))-1 ... 6897 fld1(); // Stack: 1 2^(X-int(X))-1 ... 6898 faddp(1); // Stack: 2^(X-int(X)) 6899 // computes 2^(int(X)): add exponent bias (1023) to int(X), then 6900 // shift int(X)+1023 to exponent position. 6901 // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 6902 // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent 6903 // values so detect them and set result to NaN. 6904 movl(rax,Address(rsp,0)); 6905 movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding 6906 addl(rax, 1023); 6907 movl(rdx,rax); 6908 shll(rax,20); 6909 // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. 6910 addl(rdx,1); 6911 // Check that 1 < int(X)+1023+1 < 2048 6912 // in 3 steps: 6913 // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 6914 // 2- (int(X)+1023+1)&-2048 != 0 6915 // 3- (int(X)+1023+1)&-2048 != 1 6916 // Do 2- first because addl just updated the flags. 6917 cmov32(Assembler::equal,rax,rcx); 6918 cmpl(rdx,1); 6919 cmov32(Assembler::equal,rax,rcx); 6920 testl(rdx,rcx); 6921 cmov32(Assembler::notEqual,rax,rcx); 6922 movl(Address(rsp,4),rax); 6923 movl(Address(rsp,0),0); 6924 fmul_d(Address(rsp,0)); // Stack: 2^X ... 6925 addptr(rsp,sizeof(jdouble)); 6926 } 6927 6928 void MacroAssembler::fast_pow() { 6929 // computes X^Y = 2^(Y * log2(X)) 6930 // if fast computation is not possible, result is NaN. Requires 6931 // fallback from user of this macro. 6932 fyl2x(); // Stack: (Y*log2(X)) ... 6933 pow_exp_core_encoding(); // Stack: exp(X) ... 6934 } 6935 6936 void MacroAssembler::fast_exp() { 6937 // computes exp(X) = 2^(X * log2(e)) 6938 // if fast computation is not possible, result is NaN. Requires 6939 // fallback from user of this macro. 6940 fldl2e(); // Stack: log2(e) X ... 6941 fmulp(1); // Stack: (X*log2(e)) ... 6942 pow_exp_core_encoding(); // Stack: exp(X) ... 6943 } 6944 6945 void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { 6946 // kills rax, rcx, rdx 6947 // pow and exp needs 2 extra registers on the fpu stack. 6948 Label slow_case, done; 6949 Register tmp = noreg; 6950 if (!VM_Version::supports_cmov()) { 6951 // fcmp needs a temporary so preserve rdx, 6952 tmp = rdx; 6953 } 6954 Register tmp2 = rdx; 6955 6956 if (is_exp) { 6957 // Stack: X 6958 fld_s(0); // duplicate argument for runtime call. Stack: X X 6959 fast_exp(); // Stack: exp(X) X 6960 fcmp(tmp, 0, false, false); // Stack: exp(X) X 6961 // exp(X) not equal to itself: exp(X) is NaN go to slow case. 6962 jcc(Assembler::parity, slow_case); 6963 // get rid of duplicate argument. Stack: exp(X) 6964 if (num_fpu_regs_in_use > 0) { 6965 fxch(); 6966 fpop(); 6967 } else { 6968 ffree(1); 6969 } 6970 jmp(done); 6971 } else { 6972 // Stack: X Y 6973 Label x_negative, y_odd; 6974 6975 fldz(); // Stack: 0 X Y 6976 fcmp(tmp, 1, true, false); // Stack: X Y 6977 jcc(Assembler::above, x_negative); 6978 6979 // X >= 0 6980 6981 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 6982 fld_s(1); // Stack: X Y X Y 6983 fast_pow(); // Stack: X^Y X Y 6984 fcmp(tmp, 0, false, false); // Stack: X^Y X Y 6985 // X^Y not equal to itself: X^Y is NaN go to slow case. 6986 jcc(Assembler::parity, slow_case); 6987 // get rid of duplicate arguments. Stack: X^Y 6988 if (num_fpu_regs_in_use > 0) { 6989 fxch(); fpop(); 6990 fxch(); fpop(); 6991 } else { 6992 ffree(2); 6993 ffree(1); 6994 } 6995 jmp(done); 6996 6997 // X <= 0 6998 bind(x_negative); 6999 7000 fld_s(1); // Stack: Y X Y 7001 frndint(); // Stack: int(Y) X Y 7002 fcmp(tmp, 2, false, false); // Stack: int(Y) X Y 7003 jcc(Assembler::notEqual, slow_case); 7004 7005 #ifdef _LP64 7006 subptr(rsp, 8); 7007 #else 7008 subptr(rsp, 4); 7009 #endif 7010 fistp_s(Address(rsp,0)); // Stack: X Y 7011 7012 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 7013 fld_s(1); // Stack: X Y X Y 7014 fabs(); // Stack: abs(X) Y X Y 7015 fast_pow(); // Stack: abs(X)^Y X Y 7016 fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y 7017 // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. 7018 7019 pop(tmp2); 7020 jcc(Assembler::parity, slow_case); 7021 7022 // test Y for integer indefinite value (int overflow) 7023 cmpl(tmp2, 0x80000000); 7024 jcc(Assembler::equal, slow_case); 7025 7026 // get rid of duplicate arguments. Stack: X^Y 7027 if (num_fpu_regs_in_use > 0) { 7028 fxch(); fpop(); 7029 fxch(); fpop(); 7030 } else { 7031 ffree(2); 7032 ffree(1); 7033 } 7034 7035 testl(tmp2, 1); 7036 jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y 7037 // X <= 0, Y even: X^Y = -abs(X)^Y 7038 7039 fchs(); // Stack: -abs(X)^Y Y 7040 jmp(done); 7041 } 7042 7043 // slow case: runtime call 7044 bind(slow_case); 7045 7046 fpop(); // pop incorrect result or int(Y) 7047 7048 fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 7049 is_exp ? 1 : 2, num_fpu_regs_in_use); 7050 7051 // Come here with result in F-TOS 7052 bind(done); 7053 } 7054 7055 void MacroAssembler::fpop() { 7056 ffree(); 7057 fincstp(); 7058 } 7059 7060 void MacroAssembler::fremr(Register tmp) { 7061 save_rax(tmp); 7062 { Label L; 7063 bind(L); 7064 fprem(); 7065 fwait(); fnstsw_ax(); 7066 #ifdef _LP64 7067 testl(rax, 0x400); 7068 jcc(Assembler::notEqual, L); 7069 #else 7070 sahf(); 7071 jcc(Assembler::parity, L); 7072 #endif // _LP64 7073 } 7074 restore_rax(tmp); 7075 // Result is in ST0. 7076 // Note: fxch & fpop to get rid of ST1 7077 // (otherwise FPU stack could overflow eventually) 7078 fxch(1); 7079 fpop(); 7080 } 7081 7082 7083 void MacroAssembler::incrementl(AddressLiteral dst) { 7084 if (reachable(dst)) { 7085 incrementl(as_Address(dst)); 7086 } else { 7087 lea(rscratch1, dst); 7088 incrementl(Address(rscratch1, 0)); 7089 } 7090 } 7091 7092 void MacroAssembler::incrementl(ArrayAddress dst) { 7093 incrementl(as_Address(dst)); 7094 } 7095 7096 void MacroAssembler::incrementl(Register reg, int value) { 7097 if (value == min_jint) {addl(reg, value) ; return; } 7098 if (value < 0) { decrementl(reg, -value); return; } 7099 if (value == 0) { ; return; } 7100 if (value == 1 && UseIncDec) { incl(reg) ; return; } 7101 /* else */ { addl(reg, value) ; return; } 7102 } 7103 7104 void MacroAssembler::incrementl(Address dst, int value) { 7105 if (value == min_jint) {addl(dst, value) ; return; } 7106 if (value < 0) { decrementl(dst, -value); return; } 7107 if (value == 0) { ; return; } 7108 if (value == 1 && UseIncDec) { incl(dst) ; return; } 7109 /* else */ { addl(dst, value) ; return; } 7110 } 7111 7112 void MacroAssembler::jump(AddressLiteral dst) { 7113 if (reachable(dst)) { 7114 jmp_literal(dst.target(), dst.rspec()); 7115 } else { 7116 lea(rscratch1, dst); 7117 jmp(rscratch1); 7118 } 7119 } 7120 7121 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 7122 if (reachable(dst)) { 7123 InstructionMark im(this); 7124 relocate(dst.reloc()); 7125 const int short_size = 2; 7126 const int long_size = 6; 7127 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos); 7128 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 7129 // 0111 tttn #8-bit disp 7130 emit_byte(0x70 | cc); 7131 emit_byte((offs - short_size) & 0xFF); 7132 } else { 7133 // 0000 1111 1000 tttn #32-bit disp 7134 emit_byte(0x0F); 7135 emit_byte(0x80 | cc); 7136 emit_long(offs - long_size); 7137 } 7138 } else { 7139 #ifdef ASSERT 7140 warning("reversing conditional branch"); 7141 #endif /* ASSERT */ 7142 Label skip; 7143 jccb(reverse[cc], skip); 7144 lea(rscratch1, dst); 7145 Assembler::jmp(rscratch1); 7146 bind(skip); 7147 } 7148 } 7149 7150 void MacroAssembler::ldmxcsr(AddressLiteral src) { 7151 if (reachable(src)) { 7152 Assembler::ldmxcsr(as_Address(src)); 7153 } else { 7154 lea(rscratch1, src); 7155 Assembler::ldmxcsr(Address(rscratch1, 0)); 7156 } 7157 } 7158 7159 int MacroAssembler::load_signed_byte(Register dst, Address src) { 7160 int off; 7161 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7162 off = offset(); 7163 movsbl(dst, src); // movsxb 7164 } else { 7165 off = load_unsigned_byte(dst, src); 7166 shll(dst, 24); 7167 sarl(dst, 24); 7168 } 7169 return off; 7170 } 7171 7172 // Note: load_signed_short used to be called load_signed_word. 7173 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler 7174 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. 7175 // The term "word" in HotSpot means a 32- or 64-bit machine word. 7176 int MacroAssembler::load_signed_short(Register dst, Address src) { 7177 int off; 7178 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7179 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 7180 // version but this is what 64bit has always done. This seems to imply 7181 // that users are only using 32bits worth. 7182 off = offset(); 7183 movswl(dst, src); // movsxw 7184 } else { 7185 off = load_unsigned_short(dst, src); 7186 shll(dst, 16); 7187 sarl(dst, 16); 7188 } 7189 return off; 7190 } 7191 7192 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 7193 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 7194 // and "3.9 Partial Register Penalties", p. 22). 7195 int off; 7196 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 7197 off = offset(); 7198 movzbl(dst, src); // movzxb 7199 } else { 7200 xorl(dst, dst); 7201 off = offset(); 7202 movb(dst, src); 7203 } 7204 return off; 7205 } 7206 7207 // Note: load_unsigned_short used to be called load_unsigned_word. 7208 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 7209 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 7210 // and "3.9 Partial Register Penalties", p. 22). 7211 int off; 7212 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 7213 off = offset(); 7214 movzwl(dst, src); // movzxw 7215 } else { 7216 xorl(dst, dst); 7217 off = offset(); 7218 movw(dst, src); 7219 } 7220 return off; 7221 } 7222 7223 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 7224 switch (size_in_bytes) { 7225 #ifndef _LP64 7226 case 8: 7227 assert(dst2 != noreg, "second dest register required"); 7228 movl(dst, src); 7229 movl(dst2, src.plus_disp(BytesPerInt)); 7230 break; 7231 #else 7232 case 8: movq(dst, src); break; 7233 #endif 7234 case 4: movl(dst, src); break; 7235 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 7236 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 7237 default: ShouldNotReachHere(); 7238 } 7239 } 7240 7241 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 7242 switch (size_in_bytes) { 7243 #ifndef _LP64 7244 case 8: 7245 assert(src2 != noreg, "second source register required"); 7246 movl(dst, src); 7247 movl(dst.plus_disp(BytesPerInt), src2); 7248 break; 7249 #else 7250 case 8: movq(dst, src); break; 7251 #endif 7252 case 4: movl(dst, src); break; 7253 case 2: movw(dst, src); break; 7254 case 1: movb(dst, src); break; 7255 default: ShouldNotReachHere(); 7256 } 7257 } 7258 7259 void MacroAssembler::mov32(AddressLiteral dst, Register src) { 7260 if (reachable(dst)) { 7261 movl(as_Address(dst), src); 7262 } else { 7263 lea(rscratch1, dst); 7264 movl(Address(rscratch1, 0), src); 7265 } 7266 } 7267 7268 void MacroAssembler::mov32(Register dst, AddressLiteral src) { 7269 if (reachable(src)) { 7270 movl(dst, as_Address(src)); 7271 } else { 7272 lea(rscratch1, src); 7273 movl(dst, Address(rscratch1, 0)); 7274 } 7275 } 7276 7277 // C++ bool manipulation 7278 7279 void MacroAssembler::movbool(Register dst, Address src) { 7280 if(sizeof(bool) == 1) 7281 movb(dst, src); 7282 else if(sizeof(bool) == 2) 7283 movw(dst, src); 7284 else if(sizeof(bool) == 4) 7285 movl(dst, src); 7286 else 7287 // unsupported 7288 ShouldNotReachHere(); 7289 } 7290 7291 void MacroAssembler::movbool(Address dst, bool boolconst) { 7292 if(sizeof(bool) == 1) 7293 movb(dst, (int) boolconst); 7294 else if(sizeof(bool) == 2) 7295 movw(dst, (int) boolconst); 7296 else if(sizeof(bool) == 4) 7297 movl(dst, (int) boolconst); 7298 else 7299 // unsupported 7300 ShouldNotReachHere(); 7301 } 7302 7303 void MacroAssembler::movbool(Address dst, Register src) { 7304 if(sizeof(bool) == 1) 7305 movb(dst, src); 7306 else if(sizeof(bool) == 2) 7307 movw(dst, src); 7308 else if(sizeof(bool) == 4) 7309 movl(dst, src); 7310 else 7311 // unsupported 7312 ShouldNotReachHere(); 7313 } 7314 7315 void MacroAssembler::movbyte(ArrayAddress dst, int src) { 7316 movb(as_Address(dst), src); 7317 } 7318 7319 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 7320 if (reachable(src)) { 7321 if (UseXmmLoadAndClearUpper) { 7322 movsd (dst, as_Address(src)); 7323 } else { 7324 movlpd(dst, as_Address(src)); 7325 } 7326 } else { 7327 lea(rscratch1, src); 7328 if (UseXmmLoadAndClearUpper) { 7329 movsd (dst, Address(rscratch1, 0)); 7330 } else { 7331 movlpd(dst, Address(rscratch1, 0)); 7332 } 7333 } 7334 } 7335 7336 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 7337 if (reachable(src)) { 7338 movss(dst, as_Address(src)); 7339 } else { 7340 lea(rscratch1, src); 7341 movss(dst, Address(rscratch1, 0)); 7342 } 7343 } 7344 7345 void MacroAssembler::movptr(Register dst, Register src) { 7346 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 7347 } 7348 7349 void MacroAssembler::movptr(Register dst, Address src) { 7350 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 7351 } 7352 7353 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 7354 void MacroAssembler::movptr(Register dst, intptr_t src) { 7355 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 7356 } 7357 7358 void MacroAssembler::movptr(Address dst, Register src) { 7359 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 7360 } 7361 7362 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 7363 if (reachable(src)) { 7364 Assembler::movsd(dst, as_Address(src)); 7365 } else { 7366 lea(rscratch1, src); 7367 Assembler::movsd(dst, Address(rscratch1, 0)); 7368 } 7369 } 7370 7371 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 7372 if (reachable(src)) { 7373 Assembler::movss(dst, as_Address(src)); 7374 } else { 7375 lea(rscratch1, src); 7376 Assembler::movss(dst, Address(rscratch1, 0)); 7377 } 7378 } 7379 7380 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { 7381 if (reachable(src)) { 7382 Assembler::mulsd(dst, as_Address(src)); 7383 } else { 7384 lea(rscratch1, src); 7385 Assembler::mulsd(dst, Address(rscratch1, 0)); 7386 } 7387 } 7388 7389 void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { 7390 if (reachable(src)) { 7391 Assembler::mulss(dst, as_Address(src)); 7392 } else { 7393 lea(rscratch1, src); 7394 Assembler::mulss(dst, Address(rscratch1, 0)); 7395 } 7396 } 7397 7398 void MacroAssembler::null_check(Register reg, int offset) { 7399 if (needs_explicit_null_check(offset)) { 7400 // provoke OS NULL exception if reg = NULL by 7401 // accessing M[reg] w/o changing any (non-CC) registers 7402 // NOTE: cmpl is plenty here to provoke a segv 7403 cmpptr(rax, Address(reg, 0)); 7404 // Note: should probably use testl(rax, Address(reg, 0)); 7405 // may be shorter code (however, this version of 7406 // testl needs to be implemented first) 7407 } else { 7408 // nothing to do, (later) access of M[reg + offset] 7409 // will provoke OS NULL exception if reg = NULL 7410 } 7411 } 7412 7413 void MacroAssembler::os_breakpoint() { 7414 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 7415 // (e.g., MSVC can't call ps() otherwise) 7416 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 7417 } 7418 7419 void MacroAssembler::pop_CPU_state() { 7420 pop_FPU_state(); 7421 pop_IU_state(); 7422 } 7423 7424 void MacroAssembler::pop_FPU_state() { 7425 NOT_LP64(frstor(Address(rsp, 0));) 7426 LP64_ONLY(fxrstor(Address(rsp, 0));) 7427 addptr(rsp, FPUStateSizeInWords * wordSize); 7428 } 7429 7430 void MacroAssembler::pop_IU_state() { 7431 popa(); 7432 LP64_ONLY(addq(rsp, 8)); 7433 popf(); 7434 } 7435 7436 // Save Integer and Float state 7437 // Warning: Stack must be 16 byte aligned (64bit) 7438 void MacroAssembler::push_CPU_state() { 7439 push_IU_state(); 7440 push_FPU_state(); 7441 } 7442 7443 void MacroAssembler::push_FPU_state() { 7444 subptr(rsp, FPUStateSizeInWords * wordSize); 7445 #ifndef _LP64 7446 fnsave(Address(rsp, 0)); 7447 fwait(); 7448 #else 7449 fxsave(Address(rsp, 0)); 7450 #endif // LP64 7451 } 7452 7453 void MacroAssembler::push_IU_state() { 7454 // Push flags first because pusha kills them 7455 pushf(); 7456 // Make sure rsp stays 16-byte aligned 7457 LP64_ONLY(subq(rsp, 8)); 7458 pusha(); 7459 } 7460 7461 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 7462 // determine java_thread register 7463 if (!java_thread->is_valid()) { 7464 java_thread = rdi; 7465 get_thread(java_thread); 7466 } 7467 // we must set sp to zero to clear frame 7468 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 7469 if (clear_fp) { 7470 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 7471 } 7472 7473 if (clear_pc) 7474 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 7475 7476 } 7477 7478 void MacroAssembler::restore_rax(Register tmp) { 7479 if (tmp == noreg) pop(rax); 7480 else if (tmp != rax) mov(rax, tmp); 7481 } 7482 7483 void MacroAssembler::round_to(Register reg, int modulus) { 7484 addptr(reg, modulus - 1); 7485 andptr(reg, -modulus); 7486 } 7487 7488 void MacroAssembler::save_rax(Register tmp) { 7489 if (tmp == noreg) push(rax); 7490 else if (tmp != rax) mov(tmp, rax); 7491 } 7492 7493 // Write serialization page so VM thread can do a pseudo remote membar. 7494 // We use the current thread pointer to calculate a thread specific 7495 // offset to write to within the page. This minimizes bus traffic 7496 // due to cache line collision. 7497 void MacroAssembler::serialize_memory(Register thread, Register tmp) { 7498 movl(tmp, thread); 7499 shrl(tmp, os::get_serialize_page_shift_count()); 7500 andl(tmp, (os::vm_page_size() - sizeof(int))); 7501 7502 Address index(noreg, tmp, Address::times_1); 7503 ExternalAddress page(os::get_memory_serialize_page()); 7504 7505 // Size of store must match masking code above 7506 movl(as_Address(ArrayAddress(page, index)), tmp); 7507 } 7508 7509 // Calls to C land 7510 // 7511 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded 7512 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 7513 // has to be reset to 0. This is required to allow proper stack traversal. 7514 void MacroAssembler::set_last_Java_frame(Register java_thread, 7515 Register last_java_sp, 7516 Register last_java_fp, 7517 address last_java_pc) { 7518 // determine java_thread register 7519 if (!java_thread->is_valid()) { 7520 java_thread = rdi; 7521 get_thread(java_thread); 7522 } 7523 // determine last_java_sp register 7524 if (!last_java_sp->is_valid()) { 7525 last_java_sp = rsp; 7526 } 7527 7528 // last_java_fp is optional 7529 7530 if (last_java_fp->is_valid()) { 7531 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 7532 } 7533 7534 // last_java_pc is optional 7535 7536 if (last_java_pc != NULL) { 7537 lea(Address(java_thread, 7538 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 7539 InternalAddress(last_java_pc)); 7540 7541 } 7542 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 7543 } 7544 7545 void MacroAssembler::shlptr(Register dst, int imm8) { 7546 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 7547 } 7548 7549 void MacroAssembler::shrptr(Register dst, int imm8) { 7550 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 7551 } 7552 7553 void MacroAssembler::sign_extend_byte(Register reg) { 7554 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 7555 movsbl(reg, reg); // movsxb 7556 } else { 7557 shll(reg, 24); 7558 sarl(reg, 24); 7559 } 7560 } 7561 7562 void MacroAssembler::sign_extend_short(Register reg) { 7563 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7564 movswl(reg, reg); // movsxw 7565 } else { 7566 shll(reg, 16); 7567 sarl(reg, 16); 7568 } 7569 } 7570 7571 void MacroAssembler::testl(Register dst, AddressLiteral src) { 7572 assert(reachable(src), "Address should be reachable"); 7573 testl(dst, as_Address(src)); 7574 } 7575 7576 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { 7577 if (reachable(src)) { 7578 Assembler::sqrtsd(dst, as_Address(src)); 7579 } else { 7580 lea(rscratch1, src); 7581 Assembler::sqrtsd(dst, Address(rscratch1, 0)); 7582 } 7583 } 7584 7585 void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { 7586 if (reachable(src)) { 7587 Assembler::sqrtss(dst, as_Address(src)); 7588 } else { 7589 lea(rscratch1, src); 7590 Assembler::sqrtss(dst, Address(rscratch1, 0)); 7591 } 7592 } 7593 7594 void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { 7595 if (reachable(src)) { 7596 Assembler::subsd(dst, as_Address(src)); 7597 } else { 7598 lea(rscratch1, src); 7599 Assembler::subsd(dst, Address(rscratch1, 0)); 7600 } 7601 } 7602 7603 void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { 7604 if (reachable(src)) { 7605 Assembler::subss(dst, as_Address(src)); 7606 } else { 7607 lea(rscratch1, src); 7608 Assembler::subss(dst, Address(rscratch1, 0)); 7609 } 7610 } 7611 7612 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 7613 if (reachable(src)) { 7614 Assembler::ucomisd(dst, as_Address(src)); 7615 } else { 7616 lea(rscratch1, src); 7617 Assembler::ucomisd(dst, Address(rscratch1, 0)); 7618 } 7619 } 7620 7621 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 7622 if (reachable(src)) { 7623 Assembler::ucomiss(dst, as_Address(src)); 7624 } else { 7625 lea(rscratch1, src); 7626 Assembler::ucomiss(dst, Address(rscratch1, 0)); 7627 } 7628 } 7629 7630 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 7631 // Used in sign-bit flipping with aligned address. 7632 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 7633 if (reachable(src)) { 7634 Assembler::xorpd(dst, as_Address(src)); 7635 } else { 7636 lea(rscratch1, src); 7637 Assembler::xorpd(dst, Address(rscratch1, 0)); 7638 } 7639 } 7640 7641 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 7642 // Used in sign-bit flipping with aligned address. 7643 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 7644 if (reachable(src)) { 7645 Assembler::xorps(dst, as_Address(src)); 7646 } else { 7647 lea(rscratch1, src); 7648 Assembler::xorps(dst, Address(rscratch1, 0)); 7649 } 7650 } 7651 7652 // AVX 3-operands instructions 7653 7654 void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7655 if (reachable(src)) { 7656 vaddsd(dst, nds, as_Address(src)); 7657 } else { 7658 lea(rscratch1, src); 7659 vaddsd(dst, nds, Address(rscratch1, 0)); 7660 } 7661 } 7662 7663 void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7664 if (reachable(src)) { 7665 vaddss(dst, nds, as_Address(src)); 7666 } else { 7667 lea(rscratch1, src); 7668 vaddss(dst, nds, Address(rscratch1, 0)); 7669 } 7670 } 7671 7672 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7673 if (reachable(src)) { 7674 vandpd(dst, nds, as_Address(src)); 7675 } else { 7676 lea(rscratch1, src); 7677 vandpd(dst, nds, Address(rscratch1, 0)); 7678 } 7679 } 7680 7681 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7682 if (reachable(src)) { 7683 vandps(dst, nds, as_Address(src)); 7684 } else { 7685 lea(rscratch1, src); 7686 vandps(dst, nds, Address(rscratch1, 0)); 7687 } 7688 } 7689 7690 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7691 if (reachable(src)) { 7692 vdivsd(dst, nds, as_Address(src)); 7693 } else { 7694 lea(rscratch1, src); 7695 vdivsd(dst, nds, Address(rscratch1, 0)); 7696 } 7697 } 7698 7699 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7700 if (reachable(src)) { 7701 vdivss(dst, nds, as_Address(src)); 7702 } else { 7703 lea(rscratch1, src); 7704 vdivss(dst, nds, Address(rscratch1, 0)); 7705 } 7706 } 7707 7708 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7709 if (reachable(src)) { 7710 vmulsd(dst, nds, as_Address(src)); 7711 } else { 7712 lea(rscratch1, src); 7713 vmulsd(dst, nds, Address(rscratch1, 0)); 7714 } 7715 } 7716 7717 void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7718 if (reachable(src)) { 7719 vmulss(dst, nds, as_Address(src)); 7720 } else { 7721 lea(rscratch1, src); 7722 vmulss(dst, nds, Address(rscratch1, 0)); 7723 } 7724 } 7725 7726 void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7727 if (reachable(src)) { 7728 vsubsd(dst, nds, as_Address(src)); 7729 } else { 7730 lea(rscratch1, src); 7731 vsubsd(dst, nds, Address(rscratch1, 0)); 7732 } 7733 } 7734 7735 void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7736 if (reachable(src)) { 7737 vsubss(dst, nds, as_Address(src)); 7738 } else { 7739 lea(rscratch1, src); 7740 vsubss(dst, nds, Address(rscratch1, 0)); 7741 } 7742 } 7743 7744 void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7745 if (reachable(src)) { 7746 vxorpd(dst, nds, as_Address(src)); 7747 } else { 7748 lea(rscratch1, src); 7749 vxorpd(dst, nds, Address(rscratch1, 0)); 7750 } 7751 } 7752 7753 void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7754 if (reachable(src)) { 7755 vxorps(dst, nds, as_Address(src)); 7756 } else { 7757 lea(rscratch1, src); 7758 vxorps(dst, nds, Address(rscratch1, 0)); 7759 } 7760 } 7761 7762 7763 ////////////////////////////////////////////////////////////////////////////////// 7764 #ifndef SERIALGC 7765 7766 void MacroAssembler::g1_write_barrier_pre(Register obj, 7767 Register pre_val, 7768 Register thread, 7769 Register tmp, 7770 bool tosca_live, 7771 bool expand_call) { 7772 7773 // If expand_call is true then we expand the call_VM_leaf macro 7774 // directly to skip generating the check by 7775 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 7776 7777 #ifdef _LP64 7778 assert(thread == r15_thread, "must be"); 7779 #endif // _LP64 7780 7781 Label done; 7782 Label runtime; 7783 7784 assert(pre_val != noreg, "check this code"); 7785 7786 if (obj != noreg) { 7787 assert_different_registers(obj, pre_val, tmp); 7788 assert(pre_val != rax, "check this code"); 7789 } 7790 7791 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 7792 PtrQueue::byte_offset_of_active())); 7793 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 7794 PtrQueue::byte_offset_of_index())); 7795 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 7796 PtrQueue::byte_offset_of_buf())); 7797 7798 7799 // Is marking active? 7800 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 7801 cmpl(in_progress, 0); 7802 } else { 7803 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 7804 cmpb(in_progress, 0); 7805 } 7806 jcc(Assembler::equal, done); 7807 7808 // Do we need to load the previous value? 7809 if (obj != noreg) { 7810 load_heap_oop(pre_val, Address(obj, 0)); 7811 } 7812 7813 // Is the previous value null? 7814 cmpptr(pre_val, (int32_t) NULL_WORD); 7815 jcc(Assembler::equal, done); 7816 7817 // Can we store original value in the thread's buffer? 7818 // Is index == 0? 7819 // (The index field is typed as size_t.) 7820 7821 movptr(tmp, index); // tmp := *index_adr 7822 cmpptr(tmp, 0); // tmp == 0? 7823 jcc(Assembler::equal, runtime); // If yes, goto runtime 7824 7825 subptr(tmp, wordSize); // tmp := tmp - wordSize 7826 movptr(index, tmp); // *index_adr := tmp 7827 addptr(tmp, buffer); // tmp := tmp + *buffer_adr 7828 7829 // Record the previous value 7830 movptr(Address(tmp, 0), pre_val); 7831 jmp(done); 7832 7833 bind(runtime); 7834 // save the live input values 7835 if(tosca_live) push(rax); 7836 7837 if (obj != noreg && obj != rax) 7838 push(obj); 7839 7840 if (pre_val != rax) 7841 push(pre_val); 7842 7843 // Calling the runtime using the regular call_VM_leaf mechanism generates 7844 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 7845 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 7846 // 7847 // If we care generating the pre-barrier without a frame (e.g. in the 7848 // intrinsified Reference.get() routine) then ebp might be pointing to 7849 // the caller frame and so this check will most likely fail at runtime. 7850 // 7851 // Expanding the call directly bypasses the generation of the check. 7852 // So when we do not have have a full interpreter frame on the stack 7853 // expand_call should be passed true. 7854 7855 NOT_LP64( push(thread); ) 7856 7857 if (expand_call) { 7858 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 7859 pass_arg1(this, thread); 7860 pass_arg0(this, pre_val); 7861 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 7862 } else { 7863 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 7864 } 7865 7866 NOT_LP64( pop(thread); ) 7867 7868 // save the live input values 7869 if (pre_val != rax) 7870 pop(pre_val); 7871 7872 if (obj != noreg && obj != rax) 7873 pop(obj); 7874 7875 if(tosca_live) pop(rax); 7876 7877 bind(done); 7878 } 7879 7880 void MacroAssembler::g1_write_barrier_post(Register store_addr, 7881 Register new_val, 7882 Register thread, 7883 Register tmp, 7884 Register tmp2) { 7885 #ifdef _LP64 7886 assert(thread == r15_thread, "must be"); 7887 #endif // _LP64 7888 7889 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 7890 PtrQueue::byte_offset_of_index())); 7891 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 7892 PtrQueue::byte_offset_of_buf())); 7893 7894 BarrierSet* bs = Universe::heap()->barrier_set(); 7895 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 7896 Label done; 7897 Label runtime; 7898 7899 // Does store cross heap regions? 7900 7901 movptr(tmp, store_addr); 7902 xorptr(tmp, new_val); 7903 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 7904 jcc(Assembler::equal, done); 7905 7906 // crosses regions, storing NULL? 7907 7908 cmpptr(new_val, (int32_t) NULL_WORD); 7909 jcc(Assembler::equal, done); 7910 7911 // storing region crossing non-NULL, is card already dirty? 7912 7913 ExternalAddress cardtable((address) ct->byte_map_base); 7914 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 7915 #ifdef _LP64 7916 const Register card_addr = tmp; 7917 7918 movq(card_addr, store_addr); 7919 shrq(card_addr, CardTableModRefBS::card_shift); 7920 7921 lea(tmp2, cardtable); 7922 7923 // get the address of the card 7924 addq(card_addr, tmp2); 7925 #else 7926 const Register card_index = tmp; 7927 7928 movl(card_index, store_addr); 7929 shrl(card_index, CardTableModRefBS::card_shift); 7930 7931 Address index(noreg, card_index, Address::times_1); 7932 const Register card_addr = tmp; 7933 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 7934 #endif 7935 cmpb(Address(card_addr, 0), 0); 7936 jcc(Assembler::equal, done); 7937 7938 // storing a region crossing, non-NULL oop, card is clean. 7939 // dirty card and log. 7940 7941 movb(Address(card_addr, 0), 0); 7942 7943 cmpl(queue_index, 0); 7944 jcc(Assembler::equal, runtime); 7945 subl(queue_index, wordSize); 7946 movptr(tmp2, buffer); 7947 #ifdef _LP64 7948 movslq(rscratch1, queue_index); 7949 addq(tmp2, rscratch1); 7950 movq(Address(tmp2, 0), card_addr); 7951 #else 7952 addl(tmp2, queue_index); 7953 movl(Address(tmp2, 0), card_index); 7954 #endif 7955 jmp(done); 7956 7957 bind(runtime); 7958 // save the live input values 7959 push(store_addr); 7960 push(new_val); 7961 #ifdef _LP64 7962 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 7963 #else 7964 push(thread); 7965 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 7966 pop(thread); 7967 #endif 7968 pop(new_val); 7969 pop(store_addr); 7970 7971 bind(done); 7972 } 7973 7974 #endif // SERIALGC 7975 ////////////////////////////////////////////////////////////////////////////////// 7976 7977 7978 void MacroAssembler::store_check(Register obj) { 7979 // Does a store check for the oop in register obj. The content of 7980 // register obj is destroyed afterwards. 7981 store_check_part_1(obj); 7982 store_check_part_2(obj); 7983 } 7984 7985 void MacroAssembler::store_check(Register obj, Address dst) { 7986 store_check(obj); 7987 } 7988 7989 7990 // split the store check operation so that other instructions can be scheduled inbetween 7991 void MacroAssembler::store_check_part_1(Register obj) { 7992 BarrierSet* bs = Universe::heap()->barrier_set(); 7993 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 7994 shrptr(obj, CardTableModRefBS::card_shift); 7995 } 7996 7997 void MacroAssembler::store_check_part_2(Register obj) { 7998 BarrierSet* bs = Universe::heap()->barrier_set(); 7999 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 8000 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 8001 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 8002 8003 // The calculation for byte_map_base is as follows: 8004 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 8005 // So this essentially converts an address to a displacement and 8006 // it will never need to be relocated. On 64bit however the value may be too 8007 // large for a 32bit displacement 8008 8009 intptr_t disp = (intptr_t) ct->byte_map_base; 8010 if (is_simm32(disp)) { 8011 Address cardtable(noreg, obj, Address::times_1, disp); 8012 movb(cardtable, 0); 8013 } else { 8014 // By doing it as an ExternalAddress disp could be converted to a rip-relative 8015 // displacement and done in a single instruction given favorable mapping and 8016 // a smarter version of as_Address. Worst case it is two instructions which 8017 // is no worse off then loading disp into a register and doing as a simple 8018 // Address() as above. 8019 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 8020 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 8021 // in some cases we'll get a single instruction version. 8022 8023 ExternalAddress cardtable((address)disp); 8024 Address index(noreg, obj, Address::times_1); 8025 movb(as_Address(ArrayAddress(cardtable, index)), 0); 8026 } 8027 } 8028 8029 void MacroAssembler::subptr(Register dst, int32_t imm32) { 8030 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 8031 } 8032 8033 // Force generation of a 4 byte immediate value even if it fits into 8bit 8034 void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { 8035 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); 8036 } 8037 8038 void MacroAssembler::subptr(Register dst, Register src) { 8039 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 8040 } 8041 8042 // C++ bool manipulation 8043 void MacroAssembler::testbool(Register dst) { 8044 if(sizeof(bool) == 1) 8045 testb(dst, 0xff); 8046 else if(sizeof(bool) == 2) { 8047 // testw implementation needed for two byte bools 8048 ShouldNotReachHere(); 8049 } else if(sizeof(bool) == 4) 8050 testl(dst, dst); 8051 else 8052 // unsupported 8053 ShouldNotReachHere(); 8054 } 8055 8056 void MacroAssembler::testptr(Register dst, Register src) { 8057 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 8058 } 8059 8060 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 8061 void MacroAssembler::tlab_allocate(Register obj, 8062 Register var_size_in_bytes, 8063 int con_size_in_bytes, 8064 Register t1, 8065 Register t2, 8066 Label& slow_case) { 8067 assert_different_registers(obj, t1, t2); 8068 assert_different_registers(obj, var_size_in_bytes, t1); 8069 Register end = t2; 8070 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 8071 8072 verify_tlab(); 8073 8074 NOT_LP64(get_thread(thread)); 8075 8076 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 8077 if (var_size_in_bytes == noreg) { 8078 lea(end, Address(obj, con_size_in_bytes)); 8079 } else { 8080 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 8081 } 8082 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 8083 jcc(Assembler::above, slow_case); 8084 8085 // update the tlab top pointer 8086 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 8087 8088 // recover var_size_in_bytes if necessary 8089 if (var_size_in_bytes == end) { 8090 subptr(var_size_in_bytes, obj); 8091 } 8092 verify_tlab(); 8093 } 8094 8095 // Preserves rbx, and rdx. 8096 Register MacroAssembler::tlab_refill(Label& retry, 8097 Label& try_eden, 8098 Label& slow_case) { 8099 Register top = rax; 8100 Register t1 = rcx; 8101 Register t2 = rsi; 8102 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 8103 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 8104 Label do_refill, discard_tlab; 8105 8106 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 8107 // No allocation in the shared eden. 8108 jmp(slow_case); 8109 } 8110 8111 NOT_LP64(get_thread(thread_reg)); 8112 8113 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8114 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 8115 8116 // calculate amount of free space 8117 subptr(t1, top); 8118 shrptr(t1, LogHeapWordSize); 8119 8120 // Retain tlab and allocate object in shared space if 8121 // the amount free in the tlab is too large to discard. 8122 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 8123 jcc(Assembler::lessEqual, discard_tlab); 8124 8125 // Retain 8126 // %%% yuck as movptr... 8127 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 8128 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 8129 if (TLABStats) { 8130 // increment number of slow_allocations 8131 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 8132 } 8133 jmp(try_eden); 8134 8135 bind(discard_tlab); 8136 if (TLABStats) { 8137 // increment number of refills 8138 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 8139 // accumulate wastage -- t1 is amount free in tlab 8140 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 8141 } 8142 8143 // if tlab is currently allocated (top or end != null) then 8144 // fill [top, end + alignment_reserve) with array object 8145 testptr(top, top); 8146 jcc(Assembler::zero, do_refill); 8147 8148 // set up the mark word 8149 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 8150 // set the length to the remaining space 8151 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 8152 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 8153 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 8154 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 8155 // set klass to intArrayKlass 8156 // dubious reloc why not an oop reloc? 8157 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); 8158 // store klass last. concurrent gcs assumes klass length is valid if 8159 // klass field is not null. 8160 store_klass(top, t1); 8161 8162 movptr(t1, top); 8163 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 8164 incr_allocated_bytes(thread_reg, t1, 0); 8165 8166 // refill the tlab with an eden allocation 8167 bind(do_refill); 8168 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 8169 shlptr(t1, LogHeapWordSize); 8170 // allocate new tlab, address returned in top 8171 eden_allocate(top, t1, 0, t2, slow_case); 8172 8173 // Check that t1 was preserved in eden_allocate. 8174 #ifdef ASSERT 8175 if (UseTLAB) { 8176 Label ok; 8177 Register tsize = rsi; 8178 assert_different_registers(tsize, thread_reg, t1); 8179 push(tsize); 8180 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 8181 shlptr(tsize, LogHeapWordSize); 8182 cmpptr(t1, tsize); 8183 jcc(Assembler::equal, ok); 8184 stop("assert(t1 != tlab size)"); 8185 should_not_reach_here(); 8186 8187 bind(ok); 8188 pop(tsize); 8189 } 8190 #endif 8191 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 8192 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 8193 addptr(top, t1); 8194 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 8195 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 8196 verify_tlab(); 8197 jmp(retry); 8198 8199 return thread_reg; // for use by caller 8200 } 8201 8202 void MacroAssembler::incr_allocated_bytes(Register thread, 8203 Register var_size_in_bytes, 8204 int con_size_in_bytes, 8205 Register t1) { 8206 if (!thread->is_valid()) { 8207 #ifdef _LP64 8208 thread = r15_thread; 8209 #else 8210 assert(t1->is_valid(), "need temp reg"); 8211 thread = t1; 8212 get_thread(thread); 8213 #endif 8214 } 8215 8216 #ifdef _LP64 8217 if (var_size_in_bytes->is_valid()) { 8218 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 8219 } else { 8220 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 8221 } 8222 #else 8223 if (var_size_in_bytes->is_valid()) { 8224 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 8225 } else { 8226 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 8227 } 8228 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); 8229 #endif 8230 } 8231 8232 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { 8233 pusha(); 8234 8235 // if we are coming from c1, xmm registers may be live 8236 if (UseSSE >= 1) { 8237 subptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); 8238 } 8239 int off = 0; 8240 if (UseSSE == 1) { 8241 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); 8242 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); 8243 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); 8244 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); 8245 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); 8246 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); 8247 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); 8248 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); 8249 } else if (UseSSE >= 2) { 8250 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm0); 8251 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm1); 8252 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm2); 8253 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm3); 8254 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm4); 8255 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm5); 8256 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm6); 8257 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm7); 8258 #ifdef _LP64 8259 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm8); 8260 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm9); 8261 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm10); 8262 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm11); 8263 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm12); 8264 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm13); 8265 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm14); 8266 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm15); 8267 #endif 8268 } 8269 8270 // Preserve registers across runtime call 8271 int incoming_argument_and_return_value_offset = -1; 8272 if (num_fpu_regs_in_use > 1) { 8273 // Must preserve all other FPU regs (could alternatively convert 8274 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash 8275 // FPU state, but can not trust C compiler) 8276 NEEDS_CLEANUP; 8277 // NOTE that in this case we also push the incoming argument(s) to 8278 // the stack and restore it later; we also use this stack slot to 8279 // hold the return value from dsin, dcos etc. 8280 for (int i = 0; i < num_fpu_regs_in_use; i++) { 8281 subptr(rsp, sizeof(jdouble)); 8282 fstp_d(Address(rsp, 0)); 8283 } 8284 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 8285 for (int i = nb_args-1; i >= 0; i--) { 8286 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); 8287 } 8288 } 8289 8290 subptr(rsp, nb_args*sizeof(jdouble)); 8291 for (int i = 0; i < nb_args; i++) { 8292 fstp_d(Address(rsp, i*sizeof(jdouble))); 8293 } 8294 8295 #ifdef _LP64 8296 if (nb_args > 0) { 8297 movdbl(xmm0, Address(rsp, 0)); 8298 } 8299 if (nb_args > 1) { 8300 movdbl(xmm1, Address(rsp, sizeof(jdouble))); 8301 } 8302 assert(nb_args <= 2, "unsupported number of args"); 8303 #endif // _LP64 8304 8305 // NOTE: we must not use call_VM_leaf here because that requires a 8306 // complete interpreter frame in debug mode -- same bug as 4387334 8307 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 8308 // do proper 64bit abi 8309 8310 NEEDS_CLEANUP; 8311 // Need to add stack banging before this runtime call if it needs to 8312 // be taken; however, there is no generic stack banging routine at 8313 // the MacroAssembler level 8314 8315 MacroAssembler::call_VM_leaf_base(runtime_entry, 0); 8316 8317 #ifdef _LP64 8318 movsd(Address(rsp, 0), xmm0); 8319 fld_d(Address(rsp, 0)); 8320 #endif // _LP64 8321 addptr(rsp, sizeof(jdouble) * nb_args); 8322 if (num_fpu_regs_in_use > 1) { 8323 // Must save return value to stack and then restore entire FPU 8324 // stack except incoming arguments 8325 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 8326 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { 8327 fld_d(Address(rsp, 0)); 8328 addptr(rsp, sizeof(jdouble)); 8329 } 8330 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); 8331 addptr(rsp, sizeof(jdouble) * nb_args); 8332 } 8333 8334 off = 0; 8335 if (UseSSE == 1) { 8336 movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); 8337 movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); 8338 movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); 8339 movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); 8340 movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); 8341 movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); 8342 movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); 8343 movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); 8344 } else if (UseSSE >= 2) { 8345 movdbl(xmm0, Address(rsp,off++*sizeof(jdouble))); 8346 movdbl(xmm1, Address(rsp,off++*sizeof(jdouble))); 8347 movdbl(xmm2, Address(rsp,off++*sizeof(jdouble))); 8348 movdbl(xmm3, Address(rsp,off++*sizeof(jdouble))); 8349 movdbl(xmm4, Address(rsp,off++*sizeof(jdouble))); 8350 movdbl(xmm5, Address(rsp,off++*sizeof(jdouble))); 8351 movdbl(xmm6, Address(rsp,off++*sizeof(jdouble))); 8352 movdbl(xmm7, Address(rsp,off++*sizeof(jdouble))); 8353 #ifdef _LP64 8354 movdbl(xmm8, Address(rsp,off++*sizeof(jdouble))); 8355 movdbl(xmm9, Address(rsp,off++*sizeof(jdouble))); 8356 movdbl(xmm10, Address(rsp,off++*sizeof(jdouble))); 8357 movdbl(xmm11, Address(rsp,off++*sizeof(jdouble))); 8358 movdbl(xmm12, Address(rsp,off++*sizeof(jdouble))); 8359 movdbl(xmm13, Address(rsp,off++*sizeof(jdouble))); 8360 movdbl(xmm14, Address(rsp,off++*sizeof(jdouble))); 8361 movdbl(xmm15, Address(rsp,off++*sizeof(jdouble))); 8362 #endif 8363 } 8364 if (UseSSE >= 1) { 8365 addptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); 8366 } 8367 popa(); 8368 } 8369 8370 static const double pi_4 = 0.7853981633974483; 8371 8372 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 8373 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 8374 // was attempted in this code; unfortunately it appears that the 8375 // switch to 80-bit precision and back causes this to be 8376 // unprofitable compared with simply performing a runtime call if 8377 // the argument is out of the (-pi/4, pi/4) range. 8378 8379 Register tmp = noreg; 8380 if (!VM_Version::supports_cmov()) { 8381 // fcmp needs a temporary so preserve rbx, 8382 tmp = rbx; 8383 push(tmp); 8384 } 8385 8386 Label slow_case, done; 8387 8388 ExternalAddress pi4_adr = (address)&pi_4; 8389 if (reachable(pi4_adr)) { 8390 // x ?<= pi/4 8391 fld_d(pi4_adr); 8392 fld_s(1); // Stack: X PI/4 X 8393 fabs(); // Stack: |X| PI/4 X 8394 fcmp(tmp); 8395 jcc(Assembler::above, slow_case); 8396 8397 // fastest case: -pi/4 <= x <= pi/4 8398 switch(trig) { 8399 case 's': 8400 fsin(); 8401 break; 8402 case 'c': 8403 fcos(); 8404 break; 8405 case 't': 8406 ftan(); 8407 break; 8408 default: 8409 assert(false, "bad intrinsic"); 8410 break; 8411 } 8412 jmp(done); 8413 } 8414 8415 // slow case: runtime call 8416 bind(slow_case); 8417 8418 switch(trig) { 8419 case 's': 8420 { 8421 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); 8422 } 8423 break; 8424 case 'c': 8425 { 8426 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); 8427 } 8428 break; 8429 case 't': 8430 { 8431 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); 8432 } 8433 break; 8434 default: 8435 assert(false, "bad intrinsic"); 8436 break; 8437 } 8438 8439 // Come here with result in F-TOS 8440 bind(done); 8441 8442 if (tmp != noreg) { 8443 pop(tmp); 8444 } 8445 } 8446 8447 8448 // Look up the method for a megamorphic invokeinterface call. 8449 // The target method is determined by <intf_klass, itable_index>. 8450 // The receiver klass is in recv_klass. 8451 // On success, the result will be in method_result, and execution falls through. 8452 // On failure, execution transfers to the given label. 8453 void MacroAssembler::lookup_interface_method(Register recv_klass, 8454 Register intf_klass, 8455 RegisterOrConstant itable_index, 8456 Register method_result, 8457 Register scan_temp, 8458 Label& L_no_such_interface) { 8459 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 8460 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 8461 "caller must use same register for non-constant itable index as for method"); 8462 8463 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 8464 int vtable_base = instanceKlass::vtable_start_offset() * wordSize; 8465 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 8466 int scan_step = itableOffsetEntry::size() * wordSize; 8467 int vte_size = vtableEntry::size() * wordSize; 8468 Address::ScaleFactor times_vte_scale = Address::times_ptr; 8469 assert(vte_size == wordSize, "else adjust times_vte_scale"); 8470 8471 movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize)); 8472 8473 // %%% Could store the aligned, prescaled offset in the klassoop. 8474 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 8475 if (HeapWordsPerLong > 1) { 8476 // Round up to align_object_offset boundary 8477 // see code for instanceKlass::start_of_itable! 8478 round_to(scan_temp, BytesPerLong); 8479 } 8480 8481 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 8482 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 8483 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 8484 8485 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 8486 // if (scan->interface() == intf) { 8487 // result = (klass + scan->offset() + itable_index); 8488 // } 8489 // } 8490 Label search, found_method; 8491 8492 for (int peel = 1; peel >= 0; peel--) { 8493 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 8494 cmpptr(intf_klass, method_result); 8495 8496 if (peel) { 8497 jccb(Assembler::equal, found_method); 8498 } else { 8499 jccb(Assembler::notEqual, search); 8500 // (invert the test to fall through to found_method...) 8501 } 8502 8503 if (!peel) break; 8504 8505 bind(search); 8506 8507 // Check that the previous entry is non-null. A null entry means that 8508 // the receiver class doesn't implement the interface, and wasn't the 8509 // same as when the caller was compiled. 8510 testptr(method_result, method_result); 8511 jcc(Assembler::zero, L_no_such_interface); 8512 addptr(scan_temp, scan_step); 8513 } 8514 8515 bind(found_method); 8516 8517 // Got a hit. 8518 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 8519 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 8520 } 8521 8522 8523 void MacroAssembler::check_klass_subtype(Register sub_klass, 8524 Register super_klass, 8525 Register temp_reg, 8526 Label& L_success) { 8527 Label L_failure; 8528 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 8529 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 8530 bind(L_failure); 8531 } 8532 8533 8534 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 8535 Register super_klass, 8536 Register temp_reg, 8537 Label* L_success, 8538 Label* L_failure, 8539 Label* L_slow_path, 8540 RegisterOrConstant super_check_offset) { 8541 assert_different_registers(sub_klass, super_klass, temp_reg); 8542 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 8543 if (super_check_offset.is_register()) { 8544 assert_different_registers(sub_klass, super_klass, 8545 super_check_offset.as_register()); 8546 } else if (must_load_sco) { 8547 assert(temp_reg != noreg, "supply either a temp or a register offset"); 8548 } 8549 8550 Label L_fallthrough; 8551 int label_nulls = 0; 8552 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 8553 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 8554 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 8555 assert(label_nulls <= 1, "at most one NULL in the batch"); 8556 8557 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 8558 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 8559 Address super_check_offset_addr(super_klass, sco_offset); 8560 8561 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 8562 // range of a jccb. If this routine grows larger, reconsider at 8563 // least some of these. 8564 #define local_jcc(assembler_cond, label) \ 8565 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 8566 else jcc( assembler_cond, label) /*omit semi*/ 8567 8568 // Hacked jmp, which may only be used just before L_fallthrough. 8569 #define final_jmp(label) \ 8570 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 8571 else jmp(label) /*omit semi*/ 8572 8573 // If the pointers are equal, we are done (e.g., String[] elements). 8574 // This self-check enables sharing of secondary supertype arrays among 8575 // non-primary types such as array-of-interface. Otherwise, each such 8576 // type would need its own customized SSA. 8577 // We move this check to the front of the fast path because many 8578 // type checks are in fact trivially successful in this manner, 8579 // so we get a nicely predicted branch right at the start of the check. 8580 cmpptr(sub_klass, super_klass); 8581 local_jcc(Assembler::equal, *L_success); 8582 8583 // Check the supertype display: 8584 if (must_load_sco) { 8585 // Positive movl does right thing on LP64. 8586 movl(temp_reg, super_check_offset_addr); 8587 super_check_offset = RegisterOrConstant(temp_reg); 8588 } 8589 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 8590 cmpptr(super_klass, super_check_addr); // load displayed supertype 8591 8592 // This check has worked decisively for primary supers. 8593 // Secondary supers are sought in the super_cache ('super_cache_addr'). 8594 // (Secondary supers are interfaces and very deeply nested subtypes.) 8595 // This works in the same check above because of a tricky aliasing 8596 // between the super_cache and the primary super display elements. 8597 // (The 'super_check_addr' can address either, as the case requires.) 8598 // Note that the cache is updated below if it does not help us find 8599 // what we need immediately. 8600 // So if it was a primary super, we can just fail immediately. 8601 // Otherwise, it's the slow path for us (no success at this point). 8602 8603 if (super_check_offset.is_register()) { 8604 local_jcc(Assembler::equal, *L_success); 8605 cmpl(super_check_offset.as_register(), sc_offset); 8606 if (L_failure == &L_fallthrough) { 8607 local_jcc(Assembler::equal, *L_slow_path); 8608 } else { 8609 local_jcc(Assembler::notEqual, *L_failure); 8610 final_jmp(*L_slow_path); 8611 } 8612 } else if (super_check_offset.as_constant() == sc_offset) { 8613 // Need a slow path; fast failure is impossible. 8614 if (L_slow_path == &L_fallthrough) { 8615 local_jcc(Assembler::equal, *L_success); 8616 } else { 8617 local_jcc(Assembler::notEqual, *L_slow_path); 8618 final_jmp(*L_success); 8619 } 8620 } else { 8621 // No slow path; it's a fast decision. 8622 if (L_failure == &L_fallthrough) { 8623 local_jcc(Assembler::equal, *L_success); 8624 } else { 8625 local_jcc(Assembler::notEqual, *L_failure); 8626 final_jmp(*L_success); 8627 } 8628 } 8629 8630 bind(L_fallthrough); 8631 8632 #undef local_jcc 8633 #undef final_jmp 8634 } 8635 8636 8637 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 8638 Register super_klass, 8639 Register temp_reg, 8640 Register temp2_reg, 8641 Label* L_success, 8642 Label* L_failure, 8643 bool set_cond_codes) { 8644 assert_different_registers(sub_klass, super_klass, temp_reg); 8645 if (temp2_reg != noreg) 8646 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 8647 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 8648 8649 Label L_fallthrough; 8650 int label_nulls = 0; 8651 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 8652 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 8653 assert(label_nulls <= 1, "at most one NULL in the batch"); 8654 8655 // a couple of useful fields in sub_klass: 8656 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 8657 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 8658 Address secondary_supers_addr(sub_klass, ss_offset); 8659 Address super_cache_addr( sub_klass, sc_offset); 8660 8661 // Do a linear scan of the secondary super-klass chain. 8662 // This code is rarely used, so simplicity is a virtue here. 8663 // The repne_scan instruction uses fixed registers, which we must spill. 8664 // Don't worry too much about pre-existing connections with the input regs. 8665 8666 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 8667 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 8668 8669 // Get super_klass value into rax (even if it was in rdi or rcx). 8670 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 8671 if (super_klass != rax || UseCompressedOops) { 8672 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 8673 mov(rax, super_klass); 8674 } 8675 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 8676 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 8677 8678 #ifndef PRODUCT 8679 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 8680 ExternalAddress pst_counter_addr((address) pst_counter); 8681 NOT_LP64( incrementl(pst_counter_addr) ); 8682 LP64_ONLY( lea(rcx, pst_counter_addr) ); 8683 LP64_ONLY( incrementl(Address(rcx, 0)) ); 8684 #endif //PRODUCT 8685 8686 // We will consult the secondary-super array. 8687 movptr(rdi, secondary_supers_addr); 8688 // Load the array length. (Positive movl does right thing on LP64.) 8689 movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 8690 // Skip to start of data. 8691 addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 8692 8693 // Scan RCX words at [RDI] for an occurrence of RAX. 8694 // Set NZ/Z based on last compare. 8695 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 8696 // not change flags (only scas instruction which is repeated sets flags). 8697 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 8698 #ifdef _LP64 8699 // This part is tricky, as values in supers array could be 32 or 64 bit wide 8700 // and we store values in objArrays always encoded, thus we need to encode 8701 // the value of rax before repne. Note that rax is dead after the repne. 8702 if (UseCompressedOops) { 8703 encode_heap_oop_not_null(rax); // Changes flags. 8704 // The superclass is never null; it would be a basic system error if a null 8705 // pointer were to sneak in here. Note that we have already loaded the 8706 // Klass::super_check_offset from the super_klass in the fast path, 8707 // so if there is a null in that register, we are already in the afterlife. 8708 testl(rax,rax); // Set Z = 0 8709 repne_scanl(); 8710 } else 8711 #endif // _LP64 8712 { 8713 testptr(rax,rax); // Set Z = 0 8714 repne_scan(); 8715 } 8716 // Unspill the temp. registers: 8717 if (pushed_rdi) pop(rdi); 8718 if (pushed_rcx) pop(rcx); 8719 if (pushed_rax) pop(rax); 8720 8721 if (set_cond_codes) { 8722 // Special hack for the AD files: rdi is guaranteed non-zero. 8723 assert(!pushed_rdi, "rdi must be left non-NULL"); 8724 // Also, the condition codes are properly set Z/NZ on succeed/failure. 8725 } 8726 8727 if (L_failure == &L_fallthrough) 8728 jccb(Assembler::notEqual, *L_failure); 8729 else jcc(Assembler::notEqual, *L_failure); 8730 8731 // Success. Cache the super we found and proceed in triumph. 8732 movptr(super_cache_addr, super_klass); 8733 8734 if (L_success != &L_fallthrough) { 8735 jmp(*L_success); 8736 } 8737 8738 #undef IS_A_TEMP 8739 8740 bind(L_fallthrough); 8741 } 8742 8743 8744 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 8745 if (VM_Version::supports_cmov()) { 8746 cmovl(cc, dst, src); 8747 } else { 8748 Label L; 8749 jccb(negate_condition(cc), L); 8750 movl(dst, src); 8751 bind(L); 8752 } 8753 } 8754 8755 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { 8756 if (VM_Version::supports_cmov()) { 8757 cmovl(cc, dst, src); 8758 } else { 8759 Label L; 8760 jccb(negate_condition(cc), L); 8761 movl(dst, src); 8762 bind(L); 8763 } 8764 } 8765 8766 void MacroAssembler::verify_oop(Register reg, const char* s) { 8767 if (!VerifyOops) return; 8768 8769 // Pass register number to verify_oop_subroutine 8770 char* b = new char[strlen(s) + 50]; 8771 sprintf(b, "verify_oop: %s: %s", reg->name(), s); 8772 #ifdef _LP64 8773 push(rscratch1); // save r10, trashed by movptr() 8774 #endif 8775 push(rax); // save rax, 8776 push(reg); // pass register argument 8777 ExternalAddress buffer((address) b); 8778 // avoid using pushptr, as it modifies scratch registers 8779 // and our contract is not to modify anything 8780 movptr(rax, buffer.addr()); 8781 push(rax); 8782 // call indirectly to solve generation ordering problem 8783 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 8784 call(rax); 8785 // Caller pops the arguments (oop, message) and restores rax, r10 8786 } 8787 8788 8789 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 8790 Register tmp, 8791 int offset) { 8792 intptr_t value = *delayed_value_addr; 8793 if (value != 0) 8794 return RegisterOrConstant(value + offset); 8795 8796 // load indirectly to solve generation ordering problem 8797 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 8798 8799 #ifdef ASSERT 8800 { Label L; 8801 testptr(tmp, tmp); 8802 if (WizardMode) { 8803 jcc(Assembler::notZero, L); 8804 char* buf = new char[40]; 8805 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 8806 stop(buf); 8807 } else { 8808 jccb(Assembler::notZero, L); 8809 hlt(); 8810 } 8811 bind(L); 8812 } 8813 #endif 8814 8815 if (offset != 0) 8816 addptr(tmp, offset); 8817 8818 return RegisterOrConstant(tmp); 8819 } 8820 8821 8822 // registers on entry: 8823 // - rax ('check' register): required MethodType 8824 // - rcx: method handle 8825 // - rdx, rsi, or ?: killable temp 8826 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg, 8827 Register temp_reg, 8828 Label& wrong_method_type) { 8829 Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)); 8830 // compare method type against that of the receiver 8831 if (UseCompressedOops) { 8832 load_heap_oop(temp_reg, type_addr); 8833 cmpptr(mtype_reg, temp_reg); 8834 } else { 8835 cmpptr(mtype_reg, type_addr); 8836 } 8837 jcc(Assembler::notEqual, wrong_method_type); 8838 } 8839 8840 8841 // A method handle has a "vmslots" field which gives the size of its 8842 // argument list in JVM stack slots. This field is either located directly 8843 // in every method handle, or else is indirectly accessed through the 8844 // method handle's MethodType. This macro hides the distinction. 8845 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg, 8846 Register temp_reg) { 8847 assert_different_registers(vmslots_reg, mh_reg, temp_reg); 8848 // load mh.type.form.vmslots 8849 Register temp2_reg = vmslots_reg; 8850 load_heap_oop(temp2_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg))); 8851 load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg))); 8852 movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg))); 8853 } 8854 8855 8856 // registers on entry: 8857 // - rcx: method handle 8858 // - rdx: killable temp (interpreted only) 8859 // - rax: killable temp (compiled only) 8860 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) { 8861 assert(mh_reg == rcx, "caller must put MH object in rcx"); 8862 assert_different_registers(mh_reg, temp_reg); 8863 8864 // pick out the interpreted side of the handler 8865 // NOTE: vmentry is not an oop! 8866 movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg))); 8867 8868 // off we go... 8869 jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes())); 8870 8871 // for the various stubs which take control at this point, 8872 // see MethodHandles::generate_method_handle_stub 8873 } 8874 8875 8876 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 8877 int extra_slot_offset) { 8878 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 8879 int stackElementSize = Interpreter::stackElementSize; 8880 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 8881 #ifdef ASSERT 8882 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 8883 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 8884 #endif 8885 Register scale_reg = noreg; 8886 Address::ScaleFactor scale_factor = Address::no_scale; 8887 if (arg_slot.is_constant()) { 8888 offset += arg_slot.as_constant() * stackElementSize; 8889 } else { 8890 scale_reg = arg_slot.as_register(); 8891 scale_factor = Address::times(stackElementSize); 8892 } 8893 offset += wordSize; // return PC is on stack 8894 return Address(rsp, scale_reg, scale_factor, offset); 8895 } 8896 8897 8898 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 8899 if (!VerifyOops) return; 8900 8901 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 8902 // Pass register number to verify_oop_subroutine 8903 char* b = new char[strlen(s) + 50]; 8904 sprintf(b, "verify_oop_addr: %s", s); 8905 8906 #ifdef _LP64 8907 push(rscratch1); // save r10, trashed by movptr() 8908 #endif 8909 push(rax); // save rax, 8910 // addr may contain rsp so we will have to adjust it based on the push 8911 // we just did (and on 64 bit we do two pushes) 8912 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 8913 // stores rax into addr which is backwards of what was intended. 8914 if (addr.uses(rsp)) { 8915 lea(rax, addr); 8916 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); 8917 } else { 8918 pushptr(addr); 8919 } 8920 8921 ExternalAddress buffer((address) b); 8922 // pass msg argument 8923 // avoid using pushptr, as it modifies scratch registers 8924 // and our contract is not to modify anything 8925 movptr(rax, buffer.addr()); 8926 push(rax); 8927 8928 // call indirectly to solve generation ordering problem 8929 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 8930 call(rax); 8931 // Caller pops the arguments (addr, message) and restores rax, r10. 8932 } 8933 8934 void MacroAssembler::verify_tlab() { 8935 #ifdef ASSERT 8936 if (UseTLAB && VerifyOops) { 8937 Label next, ok; 8938 Register t1 = rsi; 8939 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 8940 8941 push(t1); 8942 NOT_LP64(push(thread_reg)); 8943 NOT_LP64(get_thread(thread_reg)); 8944 8945 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8946 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 8947 jcc(Assembler::aboveEqual, next); 8948 stop("assert(top >= start)"); 8949 should_not_reach_here(); 8950 8951 bind(next); 8952 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 8953 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8954 jcc(Assembler::aboveEqual, ok); 8955 stop("assert(top <= end)"); 8956 should_not_reach_here(); 8957 8958 bind(ok); 8959 NOT_LP64(pop(thread_reg)); 8960 pop(t1); 8961 } 8962 #endif 8963 } 8964 8965 class ControlWord { 8966 public: 8967 int32_t _value; 8968 8969 int rounding_control() const { return (_value >> 10) & 3 ; } 8970 int precision_control() const { return (_value >> 8) & 3 ; } 8971 bool precision() const { return ((_value >> 5) & 1) != 0; } 8972 bool underflow() const { return ((_value >> 4) & 1) != 0; } 8973 bool overflow() const { return ((_value >> 3) & 1) != 0; } 8974 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 8975 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 8976 bool invalid() const { return ((_value >> 0) & 1) != 0; } 8977 8978 void print() const { 8979 // rounding control 8980 const char* rc; 8981 switch (rounding_control()) { 8982 case 0: rc = "round near"; break; 8983 case 1: rc = "round down"; break; 8984 case 2: rc = "round up "; break; 8985 case 3: rc = "chop "; break; 8986 }; 8987 // precision control 8988 const char* pc; 8989 switch (precision_control()) { 8990 case 0: pc = "24 bits "; break; 8991 case 1: pc = "reserved"; break; 8992 case 2: pc = "53 bits "; break; 8993 case 3: pc = "64 bits "; break; 8994 }; 8995 // flags 8996 char f[9]; 8997 f[0] = ' '; 8998 f[1] = ' '; 8999 f[2] = (precision ()) ? 'P' : 'p'; 9000 f[3] = (underflow ()) ? 'U' : 'u'; 9001 f[4] = (overflow ()) ? 'O' : 'o'; 9002 f[5] = (zero_divide ()) ? 'Z' : 'z'; 9003 f[6] = (denormalized()) ? 'D' : 'd'; 9004 f[7] = (invalid ()) ? 'I' : 'i'; 9005 f[8] = '\x0'; 9006 // output 9007 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 9008 } 9009 9010 }; 9011 9012 class StatusWord { 9013 public: 9014 int32_t _value; 9015 9016 bool busy() const { return ((_value >> 15) & 1) != 0; } 9017 bool C3() const { return ((_value >> 14) & 1) != 0; } 9018 bool C2() const { return ((_value >> 10) & 1) != 0; } 9019 bool C1() const { return ((_value >> 9) & 1) != 0; } 9020 bool C0() const { return ((_value >> 8) & 1) != 0; } 9021 int top() const { return (_value >> 11) & 7 ; } 9022 bool error_status() const { return ((_value >> 7) & 1) != 0; } 9023 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 9024 bool precision() const { return ((_value >> 5) & 1) != 0; } 9025 bool underflow() const { return ((_value >> 4) & 1) != 0; } 9026 bool overflow() const { return ((_value >> 3) & 1) != 0; } 9027 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 9028 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 9029 bool invalid() const { return ((_value >> 0) & 1) != 0; } 9030 9031 void print() const { 9032 // condition codes 9033 char c[5]; 9034 c[0] = (C3()) ? '3' : '-'; 9035 c[1] = (C2()) ? '2' : '-'; 9036 c[2] = (C1()) ? '1' : '-'; 9037 c[3] = (C0()) ? '0' : '-'; 9038 c[4] = '\x0'; 9039 // flags 9040 char f[9]; 9041 f[0] = (error_status()) ? 'E' : '-'; 9042 f[1] = (stack_fault ()) ? 'S' : '-'; 9043 f[2] = (precision ()) ? 'P' : '-'; 9044 f[3] = (underflow ()) ? 'U' : '-'; 9045 f[4] = (overflow ()) ? 'O' : '-'; 9046 f[5] = (zero_divide ()) ? 'Z' : '-'; 9047 f[6] = (denormalized()) ? 'D' : '-'; 9048 f[7] = (invalid ()) ? 'I' : '-'; 9049 f[8] = '\x0'; 9050 // output 9051 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 9052 } 9053 9054 }; 9055 9056 class TagWord { 9057 public: 9058 int32_t _value; 9059 9060 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 9061 9062 void print() const { 9063 printf("%04x", _value & 0xFFFF); 9064 } 9065 9066 }; 9067 9068 class FPU_Register { 9069 public: 9070 int32_t _m0; 9071 int32_t _m1; 9072 int16_t _ex; 9073 9074 bool is_indefinite() const { 9075 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 9076 } 9077 9078 void print() const { 9079 char sign = (_ex < 0) ? '-' : '+'; 9080 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 9081 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 9082 }; 9083 9084 }; 9085 9086 class FPU_State { 9087 public: 9088 enum { 9089 register_size = 10, 9090 number_of_registers = 8, 9091 register_mask = 7 9092 }; 9093 9094 ControlWord _control_word; 9095 StatusWord _status_word; 9096 TagWord _tag_word; 9097 int32_t _error_offset; 9098 int32_t _error_selector; 9099 int32_t _data_offset; 9100 int32_t _data_selector; 9101 int8_t _register[register_size * number_of_registers]; 9102 9103 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 9104 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 9105 9106 const char* tag_as_string(int tag) const { 9107 switch (tag) { 9108 case 0: return "valid"; 9109 case 1: return "zero"; 9110 case 2: return "special"; 9111 case 3: return "empty"; 9112 } 9113 ShouldNotReachHere(); 9114 return NULL; 9115 } 9116 9117 void print() const { 9118 // print computation registers 9119 { int t = _status_word.top(); 9120 for (int i = 0; i < number_of_registers; i++) { 9121 int j = (i - t) & register_mask; 9122 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 9123 st(j)->print(); 9124 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 9125 } 9126 } 9127 printf("\n"); 9128 // print control registers 9129 printf("ctrl = "); _control_word.print(); printf("\n"); 9130 printf("stat = "); _status_word .print(); printf("\n"); 9131 printf("tags = "); _tag_word .print(); printf("\n"); 9132 } 9133 9134 }; 9135 9136 class Flag_Register { 9137 public: 9138 int32_t _value; 9139 9140 bool overflow() const { return ((_value >> 11) & 1) != 0; } 9141 bool direction() const { return ((_value >> 10) & 1) != 0; } 9142 bool sign() const { return ((_value >> 7) & 1) != 0; } 9143 bool zero() const { return ((_value >> 6) & 1) != 0; } 9144 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 9145 bool parity() const { return ((_value >> 2) & 1) != 0; } 9146 bool carry() const { return ((_value >> 0) & 1) != 0; } 9147 9148 void print() const { 9149 // flags 9150 char f[8]; 9151 f[0] = (overflow ()) ? 'O' : '-'; 9152 f[1] = (direction ()) ? 'D' : '-'; 9153 f[2] = (sign ()) ? 'S' : '-'; 9154 f[3] = (zero ()) ? 'Z' : '-'; 9155 f[4] = (auxiliary_carry()) ? 'A' : '-'; 9156 f[5] = (parity ()) ? 'P' : '-'; 9157 f[6] = (carry ()) ? 'C' : '-'; 9158 f[7] = '\x0'; 9159 // output 9160 printf("%08x flags = %s", _value, f); 9161 } 9162 9163 }; 9164 9165 class IU_Register { 9166 public: 9167 int32_t _value; 9168 9169 void print() const { 9170 printf("%08x %11d", _value, _value); 9171 } 9172 9173 }; 9174 9175 class IU_State { 9176 public: 9177 Flag_Register _eflags; 9178 IU_Register _rdi; 9179 IU_Register _rsi; 9180 IU_Register _rbp; 9181 IU_Register _rsp; 9182 IU_Register _rbx; 9183 IU_Register _rdx; 9184 IU_Register _rcx; 9185 IU_Register _rax; 9186 9187 void print() const { 9188 // computation registers 9189 printf("rax, = "); _rax.print(); printf("\n"); 9190 printf("rbx, = "); _rbx.print(); printf("\n"); 9191 printf("rcx = "); _rcx.print(); printf("\n"); 9192 printf("rdx = "); _rdx.print(); printf("\n"); 9193 printf("rdi = "); _rdi.print(); printf("\n"); 9194 printf("rsi = "); _rsi.print(); printf("\n"); 9195 printf("rbp, = "); _rbp.print(); printf("\n"); 9196 printf("rsp = "); _rsp.print(); printf("\n"); 9197 printf("\n"); 9198 // control registers 9199 printf("flgs = "); _eflags.print(); printf("\n"); 9200 } 9201 }; 9202 9203 9204 class CPU_State { 9205 public: 9206 FPU_State _fpu_state; 9207 IU_State _iu_state; 9208 9209 void print() const { 9210 printf("--------------------------------------------------\n"); 9211 _iu_state .print(); 9212 printf("\n"); 9213 _fpu_state.print(); 9214 printf("--------------------------------------------------\n"); 9215 } 9216 9217 }; 9218 9219 9220 static void _print_CPU_state(CPU_State* state) { 9221 state->print(); 9222 }; 9223 9224 9225 void MacroAssembler::print_CPU_state() { 9226 push_CPU_state(); 9227 push(rsp); // pass CPU state 9228 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 9229 addptr(rsp, wordSize); // discard argument 9230 pop_CPU_state(); 9231 } 9232 9233 9234 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 9235 static int counter = 0; 9236 FPU_State* fs = &state->_fpu_state; 9237 counter++; 9238 // For leaf calls, only verify that the top few elements remain empty. 9239 // We only need 1 empty at the top for C2 code. 9240 if( stack_depth < 0 ) { 9241 if( fs->tag_for_st(7) != 3 ) { 9242 printf("FPR7 not empty\n"); 9243 state->print(); 9244 assert(false, "error"); 9245 return false; 9246 } 9247 return true; // All other stack states do not matter 9248 } 9249 9250 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 9251 "bad FPU control word"); 9252 9253 // compute stack depth 9254 int i = 0; 9255 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 9256 int d = i; 9257 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 9258 // verify findings 9259 if (i != FPU_State::number_of_registers) { 9260 // stack not contiguous 9261 printf("%s: stack not contiguous at ST%d\n", s, i); 9262 state->print(); 9263 assert(false, "error"); 9264 return false; 9265 } 9266 // check if computed stack depth corresponds to expected stack depth 9267 if (stack_depth < 0) { 9268 // expected stack depth is -stack_depth or less 9269 if (d > -stack_depth) { 9270 // too many elements on the stack 9271 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 9272 state->print(); 9273 assert(false, "error"); 9274 return false; 9275 } 9276 } else { 9277 // expected stack depth is stack_depth 9278 if (d != stack_depth) { 9279 // wrong stack depth 9280 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 9281 state->print(); 9282 assert(false, "error"); 9283 return false; 9284 } 9285 } 9286 // everything is cool 9287 return true; 9288 } 9289 9290 9291 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 9292 if (!VerifyFPU) return; 9293 push_CPU_state(); 9294 push(rsp); // pass CPU state 9295 ExternalAddress msg((address) s); 9296 // pass message string s 9297 pushptr(msg.addr()); 9298 push(stack_depth); // pass stack depth 9299 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 9300 addptr(rsp, 3 * wordSize); // discard arguments 9301 // check for error 9302 { Label L; 9303 testl(rax, rax); 9304 jcc(Assembler::notZero, L); 9305 int3(); // break if error condition 9306 bind(L); 9307 } 9308 pop_CPU_state(); 9309 } 9310 9311 void MacroAssembler::load_klass(Register dst, Register src) { 9312 #ifdef _LP64 9313 if (UseCompressedOops) { 9314 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9315 decode_heap_oop_not_null(dst); 9316 } else 9317 #endif 9318 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9319 } 9320 9321 void MacroAssembler::load_prototype_header(Register dst, Register src) { 9322 #ifdef _LP64 9323 if (UseCompressedOops) { 9324 assert (Universe::heap() != NULL, "java heap should be initialized"); 9325 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9326 if (Universe::narrow_oop_shift() != 0) { 9327 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9328 if (LogMinObjAlignmentInBytes == Address::times_8) { 9329 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset())); 9330 } else { 9331 // OK to use shift since we don't need to preserve flags. 9332 shlq(dst, LogMinObjAlignmentInBytes); 9333 movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset())); 9334 } 9335 } else { 9336 movq(dst, Address(dst, Klass::prototype_header_offset())); 9337 } 9338 } else 9339 #endif 9340 { 9341 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9342 movptr(dst, Address(dst, Klass::prototype_header_offset())); 9343 } 9344 } 9345 9346 void MacroAssembler::store_klass(Register dst, Register src) { 9347 #ifdef _LP64 9348 if (UseCompressedOops) { 9349 encode_heap_oop_not_null(src); 9350 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 9351 } else 9352 #endif 9353 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 9354 } 9355 9356 void MacroAssembler::load_heap_oop(Register dst, Address src) { 9357 #ifdef _LP64 9358 if (UseCompressedOops) { 9359 movl(dst, src); 9360 decode_heap_oop(dst); 9361 } else 9362 #endif 9363 movptr(dst, src); 9364 } 9365 9366 // Doesn't do verfication, generates fixed size code 9367 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { 9368 #ifdef _LP64 9369 if (UseCompressedOops) { 9370 movl(dst, src); 9371 decode_heap_oop_not_null(dst); 9372 } else 9373 #endif 9374 movptr(dst, src); 9375 } 9376 9377 void MacroAssembler::store_heap_oop(Address dst, Register src) { 9378 #ifdef _LP64 9379 if (UseCompressedOops) { 9380 assert(!dst.uses(src), "not enough registers"); 9381 encode_heap_oop(src); 9382 movl(dst, src); 9383 } else 9384 #endif 9385 movptr(dst, src); 9386 } 9387 9388 // Used for storing NULLs. 9389 void MacroAssembler::store_heap_oop_null(Address dst) { 9390 #ifdef _LP64 9391 if (UseCompressedOops) { 9392 movl(dst, (int32_t)NULL_WORD); 9393 } else { 9394 movslq(dst, (int32_t)NULL_WORD); 9395 } 9396 #else 9397 movl(dst, (int32_t)NULL_WORD); 9398 #endif 9399 } 9400 9401 #ifdef _LP64 9402 void MacroAssembler::store_klass_gap(Register dst, Register src) { 9403 if (UseCompressedOops) { 9404 // Store to klass gap in destination 9405 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 9406 } 9407 } 9408 9409 #ifdef ASSERT 9410 void MacroAssembler::verify_heapbase(const char* msg) { 9411 assert (UseCompressedOops, "should be compressed"); 9412 assert (Universe::heap() != NULL, "java heap should be initialized"); 9413 if (CheckCompressedOops) { 9414 Label ok; 9415 push(rscratch1); // cmpptr trashes rscratch1 9416 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 9417 jcc(Assembler::equal, ok); 9418 stop(msg); 9419 bind(ok); 9420 pop(rscratch1); 9421 } 9422 } 9423 #endif 9424 9425 // Algorithm must match oop.inline.hpp encode_heap_oop. 9426 void MacroAssembler::encode_heap_oop(Register r) { 9427 #ifdef ASSERT 9428 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 9429 #endif 9430 verify_oop(r, "broken oop in encode_heap_oop"); 9431 if (Universe::narrow_oop_base() == NULL) { 9432 if (Universe::narrow_oop_shift() != 0) { 9433 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9434 shrq(r, LogMinObjAlignmentInBytes); 9435 } 9436 return; 9437 } 9438 testq(r, r); 9439 cmovq(Assembler::equal, r, r12_heapbase); 9440 subq(r, r12_heapbase); 9441 shrq(r, LogMinObjAlignmentInBytes); 9442 } 9443 9444 void MacroAssembler::encode_heap_oop_not_null(Register r) { 9445 #ifdef ASSERT 9446 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 9447 if (CheckCompressedOops) { 9448 Label ok; 9449 testq(r, r); 9450 jcc(Assembler::notEqual, ok); 9451 stop("null oop passed to encode_heap_oop_not_null"); 9452 bind(ok); 9453 } 9454 #endif 9455 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 9456 if (Universe::narrow_oop_base() != NULL) { 9457 subq(r, r12_heapbase); 9458 } 9459 if (Universe::narrow_oop_shift() != 0) { 9460 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9461 shrq(r, LogMinObjAlignmentInBytes); 9462 } 9463 } 9464 9465 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 9466 #ifdef ASSERT 9467 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 9468 if (CheckCompressedOops) { 9469 Label ok; 9470 testq(src, src); 9471 jcc(Assembler::notEqual, ok); 9472 stop("null oop passed to encode_heap_oop_not_null2"); 9473 bind(ok); 9474 } 9475 #endif 9476 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 9477 if (dst != src) { 9478 movq(dst, src); 9479 } 9480 if (Universe::narrow_oop_base() != NULL) { 9481 subq(dst, r12_heapbase); 9482 } 9483 if (Universe::narrow_oop_shift() != 0) { 9484 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9485 shrq(dst, LogMinObjAlignmentInBytes); 9486 } 9487 } 9488 9489 void MacroAssembler::decode_heap_oop(Register r) { 9490 #ifdef ASSERT 9491 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 9492 #endif 9493 if (Universe::narrow_oop_base() == NULL) { 9494 if (Universe::narrow_oop_shift() != 0) { 9495 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9496 shlq(r, LogMinObjAlignmentInBytes); 9497 } 9498 } else { 9499 Label done; 9500 shlq(r, LogMinObjAlignmentInBytes); 9501 jccb(Assembler::equal, done); 9502 addq(r, r12_heapbase); 9503 bind(done); 9504 } 9505 verify_oop(r, "broken oop in decode_heap_oop"); 9506 } 9507 9508 void MacroAssembler::decode_heap_oop_not_null(Register r) { 9509 // Note: it will change flags 9510 assert (UseCompressedOops, "should only be used for compressed headers"); 9511 assert (Universe::heap() != NULL, "java heap should be initialized"); 9512 // Cannot assert, unverified entry point counts instructions (see .ad file) 9513 // vtableStubs also counts instructions in pd_code_size_limit. 9514 // Also do not verify_oop as this is called by verify_oop. 9515 if (Universe::narrow_oop_shift() != 0) { 9516 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9517 shlq(r, LogMinObjAlignmentInBytes); 9518 if (Universe::narrow_oop_base() != NULL) { 9519 addq(r, r12_heapbase); 9520 } 9521 } else { 9522 assert (Universe::narrow_oop_base() == NULL, "sanity"); 9523 } 9524 } 9525 9526 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 9527 // Note: it will change flags 9528 assert (UseCompressedOops, "should only be used for compressed headers"); 9529 assert (Universe::heap() != NULL, "java heap should be initialized"); 9530 // Cannot assert, unverified entry point counts instructions (see .ad file) 9531 // vtableStubs also counts instructions in pd_code_size_limit. 9532 // Also do not verify_oop as this is called by verify_oop. 9533 if (Universe::narrow_oop_shift() != 0) { 9534 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9535 if (LogMinObjAlignmentInBytes == Address::times_8) { 9536 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 9537 } else { 9538 if (dst != src) { 9539 movq(dst, src); 9540 } 9541 shlq(dst, LogMinObjAlignmentInBytes); 9542 if (Universe::narrow_oop_base() != NULL) { 9543 addq(dst, r12_heapbase); 9544 } 9545 } 9546 } else { 9547 assert (Universe::narrow_oop_base() == NULL, "sanity"); 9548 if (dst != src) { 9549 movq(dst, src); 9550 } 9551 } 9552 } 9553 9554 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 9555 assert (UseCompressedOops, "should only be used for compressed headers"); 9556 assert (Universe::heap() != NULL, "java heap should be initialized"); 9557 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 9558 int oop_index = oop_recorder()->find_index(obj); 9559 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9560 mov_narrow_oop(dst, oop_index, rspec); 9561 } 9562 9563 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 9564 assert (UseCompressedOops, "should only be used for compressed headers"); 9565 assert (Universe::heap() != NULL, "java heap should be initialized"); 9566 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 9567 int oop_index = oop_recorder()->find_index(obj); 9568 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9569 mov_narrow_oop(dst, oop_index, rspec); 9570 } 9571 9572 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 9573 assert (UseCompressedOops, "should only be used for compressed headers"); 9574 assert (Universe::heap() != NULL, "java heap should be initialized"); 9575 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 9576 int oop_index = oop_recorder()->find_index(obj); 9577 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9578 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 9579 } 9580 9581 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 9582 assert (UseCompressedOops, "should only be used for compressed headers"); 9583 assert (Universe::heap() != NULL, "java heap should be initialized"); 9584 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 9585 int oop_index = oop_recorder()->find_index(obj); 9586 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9587 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 9588 } 9589 9590 void MacroAssembler::reinit_heapbase() { 9591 if (UseCompressedOops) { 9592 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 9593 } 9594 } 9595 #endif // _LP64 9596 9597 9598 // C2 compiled method's prolog code. 9599 void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { 9600 9601 // WARNING: Initial instruction MUST be 5 bytes or longer so that 9602 // NativeJump::patch_verified_entry will be able to patch out the entry 9603 // code safely. The push to verify stack depth is ok at 5 bytes, 9604 // the frame allocation can be either 3 or 6 bytes. So if we don't do 9605 // stack bang then we must use the 6 byte frame allocation even if 9606 // we have no frame. :-( 9607 9608 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 9609 // Remove word for return addr 9610 framesize -= wordSize; 9611 9612 // Calls to C2R adapters often do not accept exceptional returns. 9613 // We require that their callers must bang for them. But be careful, because 9614 // some VM calls (such as call site linkage) can use several kilobytes of 9615 // stack. But the stack safety zone should account for that. 9616 // See bugs 4446381, 4468289, 4497237. 9617 if (stack_bang) { 9618 generate_stack_overflow_check(framesize); 9619 9620 // We always push rbp, so that on return to interpreter rbp, will be 9621 // restored correctly and we can correct the stack. 9622 push(rbp); 9623 // Remove word for ebp 9624 framesize -= wordSize; 9625 9626 // Create frame 9627 if (framesize) { 9628 subptr(rsp, framesize); 9629 } 9630 } else { 9631 // Create frame (force generation of a 4 byte immediate value) 9632 subptr_imm32(rsp, framesize); 9633 9634 // Save RBP register now. 9635 framesize -= wordSize; 9636 movptr(Address(rsp, framesize), rbp); 9637 } 9638 9639 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth 9640 framesize -= wordSize; 9641 movptr(Address(rsp, framesize), (int32_t)0xbadb100d); 9642 } 9643 9644 #ifndef _LP64 9645 // If method sets FPU control word do it now 9646 if (fp_mode_24b) { 9647 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 9648 } 9649 if (UseSSE >= 2 && VerifyFPU) { 9650 verify_FPU(0, "FPU stack must be clean on entry"); 9651 } 9652 #endif 9653 9654 #ifdef ASSERT 9655 if (VerifyStackAtCalls) { 9656 Label L; 9657 push(rax); 9658 mov(rax, rsp); 9659 andptr(rax, StackAlignmentInBytes-1); 9660 cmpptr(rax, StackAlignmentInBytes-wordSize); 9661 pop(rax); 9662 jcc(Assembler::equal, L); 9663 stop("Stack is not properly aligned!"); 9664 bind(L); 9665 } 9666 #endif 9667 9668 } 9669 9670 9671 // IndexOf for constant substrings with size >= 8 chars 9672 // which don't need to be loaded through stack. 9673 void MacroAssembler::string_indexofC8(Register str1, Register str2, 9674 Register cnt1, Register cnt2, 9675 int int_cnt2, Register result, 9676 XMMRegister vec, Register tmp) { 9677 ShortBranchVerifier sbv(this); 9678 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 9679 9680 // This method uses pcmpestri inxtruction with bound registers 9681 // inputs: 9682 // xmm - substring 9683 // rax - substring length (elements count) 9684 // mem - scanned string 9685 // rdx - string length (elements count) 9686 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 9687 // outputs: 9688 // rcx - matched index in string 9689 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 9690 9691 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, 9692 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, 9693 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; 9694 9695 // Note, inline_string_indexOf() generates checks: 9696 // if (substr.count > string.count) return -1; 9697 // if (substr.count == 0) return 0; 9698 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); 9699 9700 // Load substring. 9701 movdqu(vec, Address(str2, 0)); 9702 movl(cnt2, int_cnt2); 9703 movptr(result, str1); // string addr 9704 9705 if (int_cnt2 > 8) { 9706 jmpb(SCAN_TO_SUBSTR); 9707 9708 // Reload substr for rescan, this code 9709 // is executed only for large substrings (> 8 chars) 9710 bind(RELOAD_SUBSTR); 9711 movdqu(vec, Address(str2, 0)); 9712 negptr(cnt2); // Jumped here with negative cnt2, convert to positive 9713 9714 bind(RELOAD_STR); 9715 // We came here after the beginning of the substring was 9716 // matched but the rest of it was not so we need to search 9717 // again. Start from the next element after the previous match. 9718 9719 // cnt2 is number of substring reminding elements and 9720 // cnt1 is number of string reminding elements when cmp failed. 9721 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 9722 subl(cnt1, cnt2); 9723 addl(cnt1, int_cnt2); 9724 movl(cnt2, int_cnt2); // Now restore cnt2 9725 9726 decrementl(cnt1); // Shift to next element 9727 cmpl(cnt1, cnt2); 9728 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 9729 9730 addptr(result, 2); 9731 9732 } // (int_cnt2 > 8) 9733 9734 // Scan string for start of substr in 16-byte vectors 9735 bind(SCAN_TO_SUBSTR); 9736 pcmpestri(vec, Address(result, 0), 0x0d); 9737 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 9738 subl(cnt1, 8); 9739 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 9740 cmpl(cnt1, cnt2); 9741 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 9742 addptr(result, 16); 9743 jmpb(SCAN_TO_SUBSTR); 9744 9745 // Found a potential substr 9746 bind(FOUND_CANDIDATE); 9747 // Matched whole vector if first element matched (tmp(rcx) == 0). 9748 if (int_cnt2 == 8) { 9749 jccb(Assembler::overflow, RET_FOUND); // OF == 1 9750 } else { // int_cnt2 > 8 9751 jccb(Assembler::overflow, FOUND_SUBSTR); 9752 } 9753 // After pcmpestri tmp(rcx) contains matched element index 9754 // Compute start addr of substr 9755 lea(result, Address(result, tmp, Address::times_2)); 9756 9757 // Make sure string is still long enough 9758 subl(cnt1, tmp); 9759 cmpl(cnt1, cnt2); 9760 if (int_cnt2 == 8) { 9761 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 9762 } else { // int_cnt2 > 8 9763 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); 9764 } 9765 // Left less then substring. 9766 9767 bind(RET_NOT_FOUND); 9768 movl(result, -1); 9769 jmpb(EXIT); 9770 9771 if (int_cnt2 > 8) { 9772 // This code is optimized for the case when whole substring 9773 // is matched if its head is matched. 9774 bind(MATCH_SUBSTR_HEAD); 9775 pcmpestri(vec, Address(result, 0), 0x0d); 9776 // Reload only string if does not match 9777 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 9778 9779 Label CONT_SCAN_SUBSTR; 9780 // Compare the rest of substring (> 8 chars). 9781 bind(FOUND_SUBSTR); 9782 // First 8 chars are already matched. 9783 negptr(cnt2); 9784 addptr(cnt2, 8); 9785 9786 bind(SCAN_SUBSTR); 9787 subl(cnt1, 8); 9788 cmpl(cnt2, -8); // Do not read beyond substring 9789 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); 9790 // Back-up strings to avoid reading beyond substring: 9791 // cnt1 = cnt1 - cnt2 + 8 9792 addl(cnt1, cnt2); // cnt2 is negative 9793 addl(cnt1, 8); 9794 movl(cnt2, 8); negptr(cnt2); 9795 bind(CONT_SCAN_SUBSTR); 9796 if (int_cnt2 < (int)G) { 9797 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); 9798 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); 9799 } else { 9800 // calculate index in register to avoid integer overflow (int_cnt2*2) 9801 movl(tmp, int_cnt2); 9802 addptr(tmp, cnt2); 9803 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); 9804 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 9805 } 9806 // Need to reload strings pointers if not matched whole vector 9807 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 9808 addptr(cnt2, 8); 9809 jcc(Assembler::negative, SCAN_SUBSTR); 9810 // Fall through if found full substring 9811 9812 } // (int_cnt2 > 8) 9813 9814 bind(RET_FOUND); 9815 // Found result if we matched full small substring. 9816 // Compute substr offset 9817 subptr(result, str1); 9818 shrl(result, 1); // index 9819 bind(EXIT); 9820 9821 } // string_indexofC8 9822 9823 // Small strings are loaded through stack if they cross page boundary. 9824 void MacroAssembler::string_indexof(Register str1, Register str2, 9825 Register cnt1, Register cnt2, 9826 int int_cnt2, Register result, 9827 XMMRegister vec, Register tmp) { 9828 ShortBranchVerifier sbv(this); 9829 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 9830 // 9831 // int_cnt2 is length of small (< 8 chars) constant substring 9832 // or (-1) for non constant substring in which case its length 9833 // is in cnt2 register. 9834 // 9835 // Note, inline_string_indexOf() generates checks: 9836 // if (substr.count > string.count) return -1; 9837 // if (substr.count == 0) return 0; 9838 // 9839 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); 9840 9841 // This method uses pcmpestri inxtruction with bound registers 9842 // inputs: 9843 // xmm - substring 9844 // rax - substring length (elements count) 9845 // mem - scanned string 9846 // rdx - string length (elements count) 9847 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 9848 // outputs: 9849 // rcx - matched index in string 9850 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 9851 9852 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, 9853 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, 9854 FOUND_CANDIDATE; 9855 9856 { //======================================================== 9857 // We don't know where these strings are located 9858 // and we can't read beyond them. Load them through stack. 9859 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; 9860 9861 movptr(tmp, rsp); // save old SP 9862 9863 if (int_cnt2 > 0) { // small (< 8 chars) constant substring 9864 if (int_cnt2 == 1) { // One char 9865 load_unsigned_short(result, Address(str2, 0)); 9866 movdl(vec, result); // move 32 bits 9867 } else if (int_cnt2 == 2) { // Two chars 9868 movdl(vec, Address(str2, 0)); // move 32 bits 9869 } else if (int_cnt2 == 4) { // Four chars 9870 movq(vec, Address(str2, 0)); // move 64 bits 9871 } else { // cnt2 = { 3, 5, 6, 7 } 9872 // Array header size is 12 bytes in 32-bit VM 9873 // + 6 bytes for 3 chars == 18 bytes, 9874 // enough space to load vec and shift. 9875 assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity"); 9876 movdqu(vec, Address(str2, (int_cnt2*2)-16)); 9877 psrldq(vec, 16-(int_cnt2*2)); 9878 } 9879 } else { // not constant substring 9880 cmpl(cnt2, 8); 9881 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough 9882 9883 // We can read beyond string if srt+16 does not cross page boundary 9884 // since heaps are aligned and mapped by pages. 9885 assert(os::vm_page_size() < (int)G, "default page should be small"); 9886 movl(result, str2); // We need only low 32 bits 9887 andl(result, (os::vm_page_size()-1)); 9888 cmpl(result, (os::vm_page_size()-16)); 9889 jccb(Assembler::belowEqual, CHECK_STR); 9890 9891 // Move small strings to stack to allow load 16 bytes into vec. 9892 subptr(rsp, 16); 9893 int stk_offset = wordSize-2; 9894 push(cnt2); 9895 9896 bind(COPY_SUBSTR); 9897 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); 9898 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 9899 decrement(cnt2); 9900 jccb(Assembler::notZero, COPY_SUBSTR); 9901 9902 pop(cnt2); 9903 movptr(str2, rsp); // New substring address 9904 } // non constant 9905 9906 bind(CHECK_STR); 9907 cmpl(cnt1, 8); 9908 jccb(Assembler::aboveEqual, BIG_STRINGS); 9909 9910 // Check cross page boundary. 9911 movl(result, str1); // We need only low 32 bits 9912 andl(result, (os::vm_page_size()-1)); 9913 cmpl(result, (os::vm_page_size()-16)); 9914 jccb(Assembler::belowEqual, BIG_STRINGS); 9915 9916 subptr(rsp, 16); 9917 int stk_offset = -2; 9918 if (int_cnt2 < 0) { // not constant 9919 push(cnt2); 9920 stk_offset += wordSize; 9921 } 9922 movl(cnt2, cnt1); 9923 9924 bind(COPY_STR); 9925 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); 9926 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 9927 decrement(cnt2); 9928 jccb(Assembler::notZero, COPY_STR); 9929 9930 if (int_cnt2 < 0) { // not constant 9931 pop(cnt2); 9932 } 9933 movptr(str1, rsp); // New string address 9934 9935 bind(BIG_STRINGS); 9936 // Load substring. 9937 if (int_cnt2 < 0) { // -1 9938 movdqu(vec, Address(str2, 0)); 9939 push(cnt2); // substr count 9940 push(str2); // substr addr 9941 push(str1); // string addr 9942 } else { 9943 // Small (< 8 chars) constant substrings are loaded already. 9944 movl(cnt2, int_cnt2); 9945 } 9946 push(tmp); // original SP 9947 9948 } // Finished loading 9949 9950 //======================================================== 9951 // Start search 9952 // 9953 9954 movptr(result, str1); // string addr 9955 9956 if (int_cnt2 < 0) { // Only for non constant substring 9957 jmpb(SCAN_TO_SUBSTR); 9958 9959 // SP saved at sp+0 9960 // String saved at sp+1*wordSize 9961 // Substr saved at sp+2*wordSize 9962 // Substr count saved at sp+3*wordSize 9963 9964 // Reload substr for rescan, this code 9965 // is executed only for large substrings (> 8 chars) 9966 bind(RELOAD_SUBSTR); 9967 movptr(str2, Address(rsp, 2*wordSize)); 9968 movl(cnt2, Address(rsp, 3*wordSize)); 9969 movdqu(vec, Address(str2, 0)); 9970 // We came here after the beginning of the substring was 9971 // matched but the rest of it was not so we need to search 9972 // again. Start from the next element after the previous match. 9973 subptr(str1, result); // Restore counter 9974 shrl(str1, 1); 9975 addl(cnt1, str1); 9976 decrementl(cnt1); // Shift to next element 9977 cmpl(cnt1, cnt2); 9978 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 9979 9980 addptr(result, 2); 9981 } // non constant 9982 9983 // Scan string for start of substr in 16-byte vectors 9984 bind(SCAN_TO_SUBSTR); 9985 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 9986 pcmpestri(vec, Address(result, 0), 0x0d); 9987 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 9988 subl(cnt1, 8); 9989 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 9990 cmpl(cnt1, cnt2); 9991 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 9992 addptr(result, 16); 9993 9994 bind(ADJUST_STR); 9995 cmpl(cnt1, 8); // Do not read beyond string 9996 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 9997 // Back-up string to avoid reading beyond string. 9998 lea(result, Address(result, cnt1, Address::times_2, -16)); 9999 movl(cnt1, 8); 10000 jmpb(SCAN_TO_SUBSTR); 10001 10002 // Found a potential substr 10003 bind(FOUND_CANDIDATE); 10004 // After pcmpestri tmp(rcx) contains matched element index 10005 10006 // Make sure string is still long enough 10007 subl(cnt1, tmp); 10008 cmpl(cnt1, cnt2); 10009 jccb(Assembler::greaterEqual, FOUND_SUBSTR); 10010 // Left less then substring. 10011 10012 bind(RET_NOT_FOUND); 10013 movl(result, -1); 10014 jmpb(CLEANUP); 10015 10016 bind(FOUND_SUBSTR); 10017 // Compute start addr of substr 10018 lea(result, Address(result, tmp, Address::times_2)); 10019 10020 if (int_cnt2 > 0) { // Constant substring 10021 // Repeat search for small substring (< 8 chars) 10022 // from new point without reloading substring. 10023 // Have to check that we don't read beyond string. 10024 cmpl(tmp, 8-int_cnt2); 10025 jccb(Assembler::greater, ADJUST_STR); 10026 // Fall through if matched whole substring. 10027 } else { // non constant 10028 assert(int_cnt2 == -1, "should be != 0"); 10029 10030 addl(tmp, cnt2); 10031 // Found result if we matched whole substring. 10032 cmpl(tmp, 8); 10033 jccb(Assembler::lessEqual, RET_FOUND); 10034 10035 // Repeat search for small substring (<= 8 chars) 10036 // from new point 'str1' without reloading substring. 10037 cmpl(cnt2, 8); 10038 // Have to check that we don't read beyond string. 10039 jccb(Assembler::lessEqual, ADJUST_STR); 10040 10041 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; 10042 // Compare the rest of substring (> 8 chars). 10043 movptr(str1, result); 10044 10045 cmpl(tmp, cnt2); 10046 // First 8 chars are already matched. 10047 jccb(Assembler::equal, CHECK_NEXT); 10048 10049 bind(SCAN_SUBSTR); 10050 pcmpestri(vec, Address(str1, 0), 0x0d); 10051 // Need to reload strings pointers if not matched whole vector 10052 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 10053 10054 bind(CHECK_NEXT); 10055 subl(cnt2, 8); 10056 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring 10057 addptr(str1, 16); 10058 addptr(str2, 16); 10059 subl(cnt1, 8); 10060 cmpl(cnt2, 8); // Do not read beyond substring 10061 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); 10062 // Back-up strings to avoid reading beyond substring. 10063 lea(str2, Address(str2, cnt2, Address::times_2, -16)); 10064 lea(str1, Address(str1, cnt2, Address::times_2, -16)); 10065 subl(cnt1, cnt2); 10066 movl(cnt2, 8); 10067 addl(cnt1, 8); 10068 bind(CONT_SCAN_SUBSTR); 10069 movdqu(vec, Address(str2, 0)); 10070 jmpb(SCAN_SUBSTR); 10071 10072 bind(RET_FOUND_LONG); 10073 movptr(str1, Address(rsp, wordSize)); 10074 } // non constant 10075 10076 bind(RET_FOUND); 10077 // Compute substr offset 10078 subptr(result, str1); 10079 shrl(result, 1); // index 10080 10081 bind(CLEANUP); 10082 pop(rsp); // restore SP 10083 10084 } // string_indexof 10085 10086 // Compare strings. 10087 void MacroAssembler::string_compare(Register str1, Register str2, 10088 Register cnt1, Register cnt2, Register result, 10089 XMMRegister vec1) { 10090 ShortBranchVerifier sbv(this); 10091 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 10092 10093 // Compute the minimum of the string lengths and the 10094 // difference of the string lengths (stack). 10095 // Do the conditional move stuff 10096 movl(result, cnt1); 10097 subl(cnt1, cnt2); 10098 push(cnt1); 10099 cmov32(Assembler::lessEqual, cnt2, result); 10100 10101 // Is the minimum length zero? 10102 testl(cnt2, cnt2); 10103 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 10104 10105 // Load first characters 10106 load_unsigned_short(result, Address(str1, 0)); 10107 load_unsigned_short(cnt1, Address(str2, 0)); 10108 10109 // Compare first characters 10110 subl(result, cnt1); 10111 jcc(Assembler::notZero, POP_LABEL); 10112 decrementl(cnt2); 10113 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 10114 10115 { 10116 // Check after comparing first character to see if strings are equivalent 10117 Label LSkip2; 10118 // Check if the strings start at same location 10119 cmpptr(str1, str2); 10120 jccb(Assembler::notEqual, LSkip2); 10121 10122 // Check if the length difference is zero (from stack) 10123 cmpl(Address(rsp, 0), 0x0); 10124 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 10125 10126 // Strings might not be equivalent 10127 bind(LSkip2); 10128 } 10129 10130 Address::ScaleFactor scale = Address::times_2; 10131 int stride = 8; 10132 10133 // Advance to next element 10134 addptr(str1, 16/stride); 10135 addptr(str2, 16/stride); 10136 10137 if (UseSSE42Intrinsics) { 10138 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 10139 int pcmpmask = 0x19; 10140 // Setup to compare 16-byte vectors 10141 movl(result, cnt2); 10142 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 10143 jccb(Assembler::zero, COMPARE_TAIL); 10144 10145 lea(str1, Address(str1, result, scale)); 10146 lea(str2, Address(str2, result, scale)); 10147 negptr(result); 10148 10149 // pcmpestri 10150 // inputs: 10151 // vec1- substring 10152 // rax - negative string length (elements count) 10153 // mem - scaned string 10154 // rdx - string length (elements count) 10155 // pcmpmask - cmp mode: 11000 (string compare with negated result) 10156 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 10157 // outputs: 10158 // rcx - first mismatched element index 10159 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 10160 10161 bind(COMPARE_WIDE_VECTORS); 10162 movdqu(vec1, Address(str1, result, scale)); 10163 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 10164 // After pcmpestri cnt1(rcx) contains mismatched element index 10165 10166 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 10167 addptr(result, stride); 10168 subptr(cnt2, stride); 10169 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); 10170 10171 // compare wide vectors tail 10172 testl(result, result); 10173 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 10174 10175 movl(cnt2, stride); 10176 movl(result, stride); 10177 negptr(result); 10178 movdqu(vec1, Address(str1, result, scale)); 10179 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 10180 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); 10181 10182 // Mismatched characters in the vectors 10183 bind(VECTOR_NOT_EQUAL); 10184 addptr(result, cnt1); 10185 movptr(cnt2, result); 10186 load_unsigned_short(result, Address(str1, cnt2, scale)); 10187 load_unsigned_short(cnt1, Address(str2, cnt2, scale)); 10188 subl(result, cnt1); 10189 jmpb(POP_LABEL); 10190 10191 bind(COMPARE_TAIL); // limit is zero 10192 movl(cnt2, result); 10193 // Fallthru to tail compare 10194 } 10195 10196 // Shift str2 and str1 to the end of the arrays, negate min 10197 lea(str1, Address(str1, cnt2, scale, 0)); 10198 lea(str2, Address(str2, cnt2, scale, 0)); 10199 negptr(cnt2); 10200 10201 // Compare the rest of the elements 10202 bind(WHILE_HEAD_LABEL); 10203 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); 10204 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); 10205 subl(result, cnt1); 10206 jccb(Assembler::notZero, POP_LABEL); 10207 increment(cnt2); 10208 jccb(Assembler::notZero, WHILE_HEAD_LABEL); 10209 10210 // Strings are equal up to min length. Return the length difference. 10211 bind(LENGTH_DIFF_LABEL); 10212 pop(result); 10213 jmpb(DONE_LABEL); 10214 10215 // Discard the stored length difference 10216 bind(POP_LABEL); 10217 pop(cnt1); 10218 10219 // That's it 10220 bind(DONE_LABEL); 10221 } 10222 10223 // Compare char[] arrays aligned to 4 bytes or substrings. 10224 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 10225 Register limit, Register result, Register chr, 10226 XMMRegister vec1, XMMRegister vec2) { 10227 ShortBranchVerifier sbv(this); 10228 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 10229 10230 int length_offset = arrayOopDesc::length_offset_in_bytes(); 10231 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 10232 10233 // Check the input args 10234 cmpptr(ary1, ary2); 10235 jcc(Assembler::equal, TRUE_LABEL); 10236 10237 if (is_array_equ) { 10238 // Need additional checks for arrays_equals. 10239 testptr(ary1, ary1); 10240 jcc(Assembler::zero, FALSE_LABEL); 10241 testptr(ary2, ary2); 10242 jcc(Assembler::zero, FALSE_LABEL); 10243 10244 // Check the lengths 10245 movl(limit, Address(ary1, length_offset)); 10246 cmpl(limit, Address(ary2, length_offset)); 10247 jcc(Assembler::notEqual, FALSE_LABEL); 10248 } 10249 10250 // count == 0 10251 testl(limit, limit); 10252 jcc(Assembler::zero, TRUE_LABEL); 10253 10254 if (is_array_equ) { 10255 // Load array address 10256 lea(ary1, Address(ary1, base_offset)); 10257 lea(ary2, Address(ary2, base_offset)); 10258 } 10259 10260 shll(limit, 1); // byte count != 0 10261 movl(result, limit); // copy 10262 10263 if (UseSSE42Intrinsics) { 10264 // With SSE4.2, use double quad vector compare 10265 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 10266 10267 // Compare 16-byte vectors 10268 andl(result, 0x0000000e); // tail count (in bytes) 10269 andl(limit, 0xfffffff0); // vector count (in bytes) 10270 jccb(Assembler::zero, COMPARE_TAIL); 10271 10272 lea(ary1, Address(ary1, limit, Address::times_1)); 10273 lea(ary2, Address(ary2, limit, Address::times_1)); 10274 negptr(limit); 10275 10276 bind(COMPARE_WIDE_VECTORS); 10277 movdqu(vec1, Address(ary1, limit, Address::times_1)); 10278 movdqu(vec2, Address(ary2, limit, Address::times_1)); 10279 pxor(vec1, vec2); 10280 10281 ptest(vec1, vec1); 10282 jccb(Assembler::notZero, FALSE_LABEL); 10283 addptr(limit, 16); 10284 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 10285 10286 testl(result, result); 10287 jccb(Assembler::zero, TRUE_LABEL); 10288 10289 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); 10290 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); 10291 pxor(vec1, vec2); 10292 10293 ptest(vec1, vec1); 10294 jccb(Assembler::notZero, FALSE_LABEL); 10295 jmpb(TRUE_LABEL); 10296 10297 bind(COMPARE_TAIL); // limit is zero 10298 movl(limit, result); 10299 // Fallthru to tail compare 10300 } 10301 10302 // Compare 4-byte vectors 10303 andl(limit, 0xfffffffc); // vector count (in bytes) 10304 jccb(Assembler::zero, COMPARE_CHAR); 10305 10306 lea(ary1, Address(ary1, limit, Address::times_1)); 10307 lea(ary2, Address(ary2, limit, Address::times_1)); 10308 negptr(limit); 10309 10310 bind(COMPARE_VECTORS); 10311 movl(chr, Address(ary1, limit, Address::times_1)); 10312 cmpl(chr, Address(ary2, limit, Address::times_1)); 10313 jccb(Assembler::notEqual, FALSE_LABEL); 10314 addptr(limit, 4); 10315 jcc(Assembler::notZero, COMPARE_VECTORS); 10316 10317 // Compare trailing char (final 2 bytes), if any 10318 bind(COMPARE_CHAR); 10319 testl(result, 0x2); // tail char 10320 jccb(Assembler::zero, TRUE_LABEL); 10321 load_unsigned_short(chr, Address(ary1, 0)); 10322 load_unsigned_short(limit, Address(ary2, 0)); 10323 cmpl(chr, limit); 10324 jccb(Assembler::notEqual, FALSE_LABEL); 10325 10326 bind(TRUE_LABEL); 10327 movl(result, 1); // return true 10328 jmpb(DONE); 10329 10330 bind(FALSE_LABEL); 10331 xorl(result, result); // return false 10332 10333 // That's it 10334 bind(DONE); 10335 } 10336 10337 #ifdef PRODUCT 10338 #define BLOCK_COMMENT(str) /* nothing */ 10339 #else 10340 #define BLOCK_COMMENT(str) block_comment(str) 10341 #endif 10342 10343 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 10344 void MacroAssembler::generate_fill(BasicType t, bool aligned, 10345 Register to, Register value, Register count, 10346 Register rtmp, XMMRegister xtmp) { 10347 ShortBranchVerifier sbv(this); 10348 assert_different_registers(to, value, count, rtmp); 10349 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 10350 Label L_fill_2_bytes, L_fill_4_bytes; 10351 10352 int shift = -1; 10353 switch (t) { 10354 case T_BYTE: 10355 shift = 2; 10356 break; 10357 case T_SHORT: 10358 shift = 1; 10359 break; 10360 case T_INT: 10361 shift = 0; 10362 break; 10363 default: ShouldNotReachHere(); 10364 } 10365 10366 if (t == T_BYTE) { 10367 andl(value, 0xff); 10368 movl(rtmp, value); 10369 shll(rtmp, 8); 10370 orl(value, rtmp); 10371 } 10372 if (t == T_SHORT) { 10373 andl(value, 0xffff); 10374 } 10375 if (t == T_BYTE || t == T_SHORT) { 10376 movl(rtmp, value); 10377 shll(rtmp, 16); 10378 orl(value, rtmp); 10379 } 10380 10381 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 10382 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 10383 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 10384 // align source address at 4 bytes address boundary 10385 if (t == T_BYTE) { 10386 // One byte misalignment happens only for byte arrays 10387 testptr(to, 1); 10388 jccb(Assembler::zero, L_skip_align1); 10389 movb(Address(to, 0), value); 10390 increment(to); 10391 decrement(count); 10392 BIND(L_skip_align1); 10393 } 10394 // Two bytes misalignment happens only for byte and short (char) arrays 10395 testptr(to, 2); 10396 jccb(Assembler::zero, L_skip_align2); 10397 movw(Address(to, 0), value); 10398 addptr(to, 2); 10399 subl(count, 1<<(shift-1)); 10400 BIND(L_skip_align2); 10401 } 10402 if (UseSSE < 2) { 10403 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 10404 // Fill 32-byte chunks 10405 subl(count, 8 << shift); 10406 jcc(Assembler::less, L_check_fill_8_bytes); 10407 align(16); 10408 10409 BIND(L_fill_32_bytes_loop); 10410 10411 for (int i = 0; i < 32; i += 4) { 10412 movl(Address(to, i), value); 10413 } 10414 10415 addptr(to, 32); 10416 subl(count, 8 << shift); 10417 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 10418 BIND(L_check_fill_8_bytes); 10419 addl(count, 8 << shift); 10420 jccb(Assembler::zero, L_exit); 10421 jmpb(L_fill_8_bytes); 10422 10423 // 10424 // length is too short, just fill qwords 10425 // 10426 BIND(L_fill_8_bytes_loop); 10427 movl(Address(to, 0), value); 10428 movl(Address(to, 4), value); 10429 addptr(to, 8); 10430 BIND(L_fill_8_bytes); 10431 subl(count, 1 << (shift + 1)); 10432 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 10433 // fall through to fill 4 bytes 10434 } else { 10435 Label L_fill_32_bytes; 10436 if (!UseUnalignedLoadStores) { 10437 // align to 8 bytes, we know we are 4 byte aligned to start 10438 testptr(to, 4); 10439 jccb(Assembler::zero, L_fill_32_bytes); 10440 movl(Address(to, 0), value); 10441 addptr(to, 4); 10442 subl(count, 1<<shift); 10443 } 10444 BIND(L_fill_32_bytes); 10445 { 10446 assert( UseSSE >= 2, "supported cpu only" ); 10447 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 10448 // Fill 32-byte chunks 10449 movdl(xtmp, value); 10450 pshufd(xtmp, xtmp, 0); 10451 10452 subl(count, 8 << shift); 10453 jcc(Assembler::less, L_check_fill_8_bytes); 10454 align(16); 10455 10456 BIND(L_fill_32_bytes_loop); 10457 10458 if (UseUnalignedLoadStores) { 10459 movdqu(Address(to, 0), xtmp); 10460 movdqu(Address(to, 16), xtmp); 10461 } else { 10462 movq(Address(to, 0), xtmp); 10463 movq(Address(to, 8), xtmp); 10464 movq(Address(to, 16), xtmp); 10465 movq(Address(to, 24), xtmp); 10466 } 10467 10468 addptr(to, 32); 10469 subl(count, 8 << shift); 10470 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 10471 BIND(L_check_fill_8_bytes); 10472 addl(count, 8 << shift); 10473 jccb(Assembler::zero, L_exit); 10474 jmpb(L_fill_8_bytes); 10475 10476 // 10477 // length is too short, just fill qwords 10478 // 10479 BIND(L_fill_8_bytes_loop); 10480 movq(Address(to, 0), xtmp); 10481 addptr(to, 8); 10482 BIND(L_fill_8_bytes); 10483 subl(count, 1 << (shift + 1)); 10484 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 10485 } 10486 } 10487 // fill trailing 4 bytes 10488 BIND(L_fill_4_bytes); 10489 testl(count, 1<<shift); 10490 jccb(Assembler::zero, L_fill_2_bytes); 10491 movl(Address(to, 0), value); 10492 if (t == T_BYTE || t == T_SHORT) { 10493 addptr(to, 4); 10494 BIND(L_fill_2_bytes); 10495 // fill trailing 2 bytes 10496 testl(count, 1<<(shift-1)); 10497 jccb(Assembler::zero, L_fill_byte); 10498 movw(Address(to, 0), value); 10499 if (t == T_BYTE) { 10500 addptr(to, 2); 10501 BIND(L_fill_byte); 10502 // fill trailing byte 10503 testl(count, 1); 10504 jccb(Assembler::zero, L_exit); 10505 movb(Address(to, 0), value); 10506 } else { 10507 BIND(L_fill_byte); 10508 } 10509 } else { 10510 BIND(L_fill_2_bytes); 10511 } 10512 BIND(L_exit); 10513 } 10514 #undef BIND 10515 #undef BLOCK_COMMENT 10516 10517 10518 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 10519 switch (cond) { 10520 // Note some conditions are synonyms for others 10521 case Assembler::zero: return Assembler::notZero; 10522 case Assembler::notZero: return Assembler::zero; 10523 case Assembler::less: return Assembler::greaterEqual; 10524 case Assembler::lessEqual: return Assembler::greater; 10525 case Assembler::greater: return Assembler::lessEqual; 10526 case Assembler::greaterEqual: return Assembler::less; 10527 case Assembler::below: return Assembler::aboveEqual; 10528 case Assembler::belowEqual: return Assembler::above; 10529 case Assembler::above: return Assembler::belowEqual; 10530 case Assembler::aboveEqual: return Assembler::below; 10531 case Assembler::overflow: return Assembler::noOverflow; 10532 case Assembler::noOverflow: return Assembler::overflow; 10533 case Assembler::negative: return Assembler::positive; 10534 case Assembler::positive: return Assembler::negative; 10535 case Assembler::parity: return Assembler::noParity; 10536 case Assembler::noParity: return Assembler::parity; 10537 } 10538 ShouldNotReachHere(); return Assembler::overflow; 10539 } 10540 10541 SkipIfEqual::SkipIfEqual( 10542 MacroAssembler* masm, const bool* flag_addr, bool value) { 10543 _masm = masm; 10544 _masm->cmp8(ExternalAddress((address)flag_addr), value); 10545 _masm->jcc(Assembler::equal, _label); 10546 } 10547 10548 SkipIfEqual::~SkipIfEqual() { 10549 _masm->bind(_label); 10550 }