1 /* 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "assembler_x86.inline.hpp" 27 #include "gc_interface/collectedHeap.inline.hpp" 28 #include "interpreter/interpreter.hpp" 29 #include "memory/cardTableModRefBS.hpp" 30 #include "memory/resourceArea.hpp" 31 #include "prims/methodHandles.hpp" 32 #include "runtime/biasedLocking.hpp" 33 #include "runtime/interfaceSupport.hpp" 34 #include "runtime/objectMonitor.hpp" 35 #include "runtime/os.hpp" 36 #include "runtime/sharedRuntime.hpp" 37 #include "runtime/stubRoutines.hpp" 38 #ifndef SERIALGC 39 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 40 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 41 #include "gc_implementation/g1/heapRegion.hpp" 42 #endif 43 44 // Implementation of AddressLiteral 45 46 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { 47 _is_lval = false; 48 _target = target; 49 switch (rtype) { 50 case relocInfo::oop_type: 51 // Oops are a special case. Normally they would be their own section 52 // but in cases like icBuffer they are literals in the code stream that 53 // we don't have a section for. We use none so that we get a literal address 54 // which is always patchable. 55 break; 56 case relocInfo::external_word_type: 57 _rspec = external_word_Relocation::spec(target); 58 break; 59 case relocInfo::internal_word_type: 60 _rspec = internal_word_Relocation::spec(target); 61 break; 62 case relocInfo::opt_virtual_call_type: 63 _rspec = opt_virtual_call_Relocation::spec(); 64 break; 65 case relocInfo::static_call_type: 66 _rspec = static_call_Relocation::spec(); 67 break; 68 case relocInfo::runtime_call_type: 69 _rspec = runtime_call_Relocation::spec(); 70 break; 71 case relocInfo::poll_type: 72 case relocInfo::poll_return_type: 73 _rspec = Relocation::spec_simple(rtype); 74 break; 75 case relocInfo::none: 76 break; 77 default: 78 ShouldNotReachHere(); 79 break; 80 } 81 } 82 83 // Implementation of Address 84 85 #ifdef _LP64 86 87 Address Address::make_array(ArrayAddress adr) { 88 // Not implementable on 64bit machines 89 // Should have been handled higher up the call chain. 90 ShouldNotReachHere(); 91 return Address(); 92 } 93 94 // exceedingly dangerous constructor 95 Address::Address(int disp, address loc, relocInfo::relocType rtype) { 96 _base = noreg; 97 _index = noreg; 98 _scale = no_scale; 99 _disp = disp; 100 switch (rtype) { 101 case relocInfo::external_word_type: 102 _rspec = external_word_Relocation::spec(loc); 103 break; 104 case relocInfo::internal_word_type: 105 _rspec = internal_word_Relocation::spec(loc); 106 break; 107 case relocInfo::runtime_call_type: 108 // HMM 109 _rspec = runtime_call_Relocation::spec(); 110 break; 111 case relocInfo::poll_type: 112 case relocInfo::poll_return_type: 113 _rspec = Relocation::spec_simple(rtype); 114 break; 115 case relocInfo::none: 116 break; 117 default: 118 ShouldNotReachHere(); 119 } 120 } 121 #else // LP64 122 123 Address Address::make_array(ArrayAddress adr) { 124 AddressLiteral base = adr.base(); 125 Address index = adr.index(); 126 assert(index._disp == 0, "must not have disp"); // maybe it can? 127 Address array(index._base, index._index, index._scale, (intptr_t) base.target()); 128 array._rspec = base._rspec; 129 return array; 130 } 131 132 // exceedingly dangerous constructor 133 Address::Address(address loc, RelocationHolder spec) { 134 _base = noreg; 135 _index = noreg; 136 _scale = no_scale; 137 _disp = (intptr_t) loc; 138 _rspec = spec; 139 } 140 141 #endif // _LP64 142 143 144 145 // Convert the raw encoding form into the form expected by the constructor for 146 // Address. An index of 4 (rsp) corresponds to having no index, so convert 147 // that to noreg for the Address constructor. 148 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) { 149 RelocationHolder rspec; 150 if (disp_is_oop) { 151 rspec = Relocation::spec_simple(relocInfo::oop_type); 152 } 153 bool valid_index = index != rsp->encoding(); 154 if (valid_index) { 155 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); 156 madr._rspec = rspec; 157 return madr; 158 } else { 159 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); 160 madr._rspec = rspec; 161 return madr; 162 } 163 } 164 165 // Implementation of Assembler 166 167 int AbstractAssembler::code_fill_byte() { 168 return (u_char)'\xF4'; // hlt 169 } 170 171 // make this go away someday 172 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { 173 if (rtype == relocInfo::none) 174 emit_long(data); 175 else emit_data(data, Relocation::spec_simple(rtype), format); 176 } 177 178 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { 179 assert(imm_operand == 0, "default format must be immediate in this file"); 180 assert(inst_mark() != NULL, "must be inside InstructionMark"); 181 if (rspec.type() != relocInfo::none) { 182 #ifdef ASSERT 183 check_relocation(rspec, format); 184 #endif 185 // Do not use AbstractAssembler::relocate, which is not intended for 186 // embedded words. Instead, relocate to the enclosing instruction. 187 188 // hack. call32 is too wide for mask so use disp32 189 if (format == call32_operand) 190 code_section()->relocate(inst_mark(), rspec, disp32_operand); 191 else 192 code_section()->relocate(inst_mark(), rspec, format); 193 } 194 emit_long(data); 195 } 196 197 static int encode(Register r) { 198 int enc = r->encoding(); 199 if (enc >= 8) { 200 enc -= 8; 201 } 202 return enc; 203 } 204 205 static int encode(XMMRegister r) { 206 int enc = r->encoding(); 207 if (enc >= 8) { 208 enc -= 8; 209 } 210 return enc; 211 } 212 213 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { 214 assert(dst->has_byte_register(), "must have byte register"); 215 assert(isByte(op1) && isByte(op2), "wrong opcode"); 216 assert(isByte(imm8), "not a byte"); 217 assert((op1 & 0x01) == 0, "should be 8bit operation"); 218 emit_byte(op1); 219 emit_byte(op2 | encode(dst)); 220 emit_byte(imm8); 221 } 222 223 224 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 225 assert(isByte(op1) && isByte(op2), "wrong opcode"); 226 assert((op1 & 0x01) == 1, "should be 32bit operation"); 227 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 228 if (is8bit(imm32)) { 229 emit_byte(op1 | 0x02); // set sign bit 230 emit_byte(op2 | encode(dst)); 231 emit_byte(imm32 & 0xFF); 232 } else { 233 emit_byte(op1); 234 emit_byte(op2 | encode(dst)); 235 emit_long(imm32); 236 } 237 } 238 239 // Force generation of a 4 byte immediate value even if it fits into 8bit 240 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) { 241 assert(isByte(op1) && isByte(op2), "wrong opcode"); 242 assert((op1 & 0x01) == 1, "should be 32bit operation"); 243 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 244 emit_byte(op1); 245 emit_byte(op2 | encode(dst)); 246 emit_long(imm32); 247 } 248 249 // immediate-to-memory forms 250 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 251 assert((op1 & 0x01) == 1, "should be 32bit operation"); 252 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 253 if (is8bit(imm32)) { 254 emit_byte(op1 | 0x02); // set sign bit 255 emit_operand(rm, adr, 1); 256 emit_byte(imm32 & 0xFF); 257 } else { 258 emit_byte(op1); 259 emit_operand(rm, adr, 4); 260 emit_long(imm32); 261 } 262 } 263 264 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) { 265 LP64_ONLY(ShouldNotReachHere()); 266 assert(isByte(op1) && isByte(op2), "wrong opcode"); 267 assert((op1 & 0x01) == 1, "should be 32bit operation"); 268 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 269 InstructionMark im(this); 270 emit_byte(op1); 271 emit_byte(op2 | encode(dst)); 272 emit_data((intptr_t)obj, relocInfo::oop_type, 0); 273 } 274 275 276 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { 277 assert(isByte(op1) && isByte(op2), "wrong opcode"); 278 emit_byte(op1); 279 emit_byte(op2 | encode(dst) << 3 | encode(src)); 280 } 281 282 283 void Assembler::emit_operand(Register reg, Register base, Register index, 284 Address::ScaleFactor scale, int disp, 285 RelocationHolder const& rspec, 286 int rip_relative_correction) { 287 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type(); 288 289 // Encode the registers as needed in the fields they are used in 290 291 int regenc = encode(reg) << 3; 292 int indexenc = index->is_valid() ? encode(index) << 3 : 0; 293 int baseenc = base->is_valid() ? encode(base) : 0; 294 295 if (base->is_valid()) { 296 if (index->is_valid()) { 297 assert(scale != Address::no_scale, "inconsistent address"); 298 // [base + index*scale + disp] 299 if (disp == 0 && rtype == relocInfo::none && 300 base != rbp LP64_ONLY(&& base != r13)) { 301 // [base + index*scale] 302 // [00 reg 100][ss index base] 303 assert(index != rsp, "illegal addressing mode"); 304 emit_byte(0x04 | regenc); 305 emit_byte(scale << 6 | indexenc | baseenc); 306 } else if (is8bit(disp) && rtype == relocInfo::none) { 307 // [base + index*scale + imm8] 308 // [01 reg 100][ss index base] imm8 309 assert(index != rsp, "illegal addressing mode"); 310 emit_byte(0x44 | regenc); 311 emit_byte(scale << 6 | indexenc | baseenc); 312 emit_byte(disp & 0xFF); 313 } else { 314 // [base + index*scale + disp32] 315 // [10 reg 100][ss index base] disp32 316 assert(index != rsp, "illegal addressing mode"); 317 emit_byte(0x84 | regenc); 318 emit_byte(scale << 6 | indexenc | baseenc); 319 emit_data(disp, rspec, disp32_operand); 320 } 321 } else if (base == rsp LP64_ONLY(|| base == r12)) { 322 // [rsp + disp] 323 if (disp == 0 && rtype == relocInfo::none) { 324 // [rsp] 325 // [00 reg 100][00 100 100] 326 emit_byte(0x04 | regenc); 327 emit_byte(0x24); 328 } else if (is8bit(disp) && rtype == relocInfo::none) { 329 // [rsp + imm8] 330 // [01 reg 100][00 100 100] disp8 331 emit_byte(0x44 | regenc); 332 emit_byte(0x24); 333 emit_byte(disp & 0xFF); 334 } else { 335 // [rsp + imm32] 336 // [10 reg 100][00 100 100] disp32 337 emit_byte(0x84 | regenc); 338 emit_byte(0x24); 339 emit_data(disp, rspec, disp32_operand); 340 } 341 } else { 342 // [base + disp] 343 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode"); 344 if (disp == 0 && rtype == relocInfo::none && 345 base != rbp LP64_ONLY(&& base != r13)) { 346 // [base] 347 // [00 reg base] 348 emit_byte(0x00 | regenc | baseenc); 349 } else if (is8bit(disp) && rtype == relocInfo::none) { 350 // [base + disp8] 351 // [01 reg base] disp8 352 emit_byte(0x40 | regenc | baseenc); 353 emit_byte(disp & 0xFF); 354 } else { 355 // [base + disp32] 356 // [10 reg base] disp32 357 emit_byte(0x80 | regenc | baseenc); 358 emit_data(disp, rspec, disp32_operand); 359 } 360 } 361 } else { 362 if (index->is_valid()) { 363 assert(scale != Address::no_scale, "inconsistent address"); 364 // [index*scale + disp] 365 // [00 reg 100][ss index 101] disp32 366 assert(index != rsp, "illegal addressing mode"); 367 emit_byte(0x04 | regenc); 368 emit_byte(scale << 6 | indexenc | 0x05); 369 emit_data(disp, rspec, disp32_operand); 370 } else if (rtype != relocInfo::none ) { 371 // [disp] (64bit) RIP-RELATIVE (32bit) abs 372 // [00 000 101] disp32 373 374 emit_byte(0x05 | regenc); 375 // Note that the RIP-rel. correction applies to the generated 376 // disp field, but _not_ to the target address in the rspec. 377 378 // disp was created by converting the target address minus the pc 379 // at the start of the instruction. That needs more correction here. 380 // intptr_t disp = target - next_ip; 381 assert(inst_mark() != NULL, "must be inside InstructionMark"); 382 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction; 383 int64_t adjusted = disp; 384 // Do rip-rel adjustment for 64bit 385 LP64_ONLY(adjusted -= (next_ip - inst_mark())); 386 assert(is_simm32(adjusted), 387 "must be 32bit offset (RIP relative address)"); 388 emit_data((int32_t) adjusted, rspec, disp32_operand); 389 390 } else { 391 // 32bit never did this, did everything as the rip-rel/disp code above 392 // [disp] ABSOLUTE 393 // [00 reg 100][00 100 101] disp32 394 emit_byte(0x04 | regenc); 395 emit_byte(0x25); 396 emit_data(disp, rspec, disp32_operand); 397 } 398 } 399 } 400 401 void Assembler::emit_operand(XMMRegister reg, Register base, Register index, 402 Address::ScaleFactor scale, int disp, 403 RelocationHolder const& rspec) { 404 emit_operand((Register)reg, base, index, scale, disp, rspec); 405 } 406 407 // Secret local extension to Assembler::WhichOperand: 408 #define end_pc_operand (_WhichOperand_limit) 409 410 address Assembler::locate_operand(address inst, WhichOperand which) { 411 // Decode the given instruction, and return the address of 412 // an embedded 32-bit operand word. 413 414 // If "which" is disp32_operand, selects the displacement portion 415 // of an effective address specifier. 416 // If "which" is imm64_operand, selects the trailing immediate constant. 417 // If "which" is call32_operand, selects the displacement of a call or jump. 418 // Caller is responsible for ensuring that there is such an operand, 419 // and that it is 32/64 bits wide. 420 421 // If "which" is end_pc_operand, find the end of the instruction. 422 423 address ip = inst; 424 bool is_64bit = false; 425 426 debug_only(bool has_disp32 = false); 427 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn 428 429 again_after_prefix: 430 switch (0xFF & *ip++) { 431 432 // These convenience macros generate groups of "case" labels for the switch. 433 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3 434 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \ 435 case (x)+4: case (x)+5: case (x)+6: case (x)+7 436 #define REP16(x) REP8((x)+0): \ 437 case REP8((x)+8) 438 439 case CS_segment: 440 case SS_segment: 441 case DS_segment: 442 case ES_segment: 443 case FS_segment: 444 case GS_segment: 445 // Seems dubious 446 LP64_ONLY(assert(false, "shouldn't have that prefix")); 447 assert(ip == inst+1, "only one prefix allowed"); 448 goto again_after_prefix; 449 450 case 0x67: 451 case REX: 452 case REX_B: 453 case REX_X: 454 case REX_XB: 455 case REX_R: 456 case REX_RB: 457 case REX_RX: 458 case REX_RXB: 459 NOT_LP64(assert(false, "64bit prefixes")); 460 goto again_after_prefix; 461 462 case REX_W: 463 case REX_WB: 464 case REX_WX: 465 case REX_WXB: 466 case REX_WR: 467 case REX_WRB: 468 case REX_WRX: 469 case REX_WRXB: 470 NOT_LP64(assert(false, "64bit prefixes")); 471 is_64bit = true; 472 goto again_after_prefix; 473 474 case 0xFF: // pushq a; decl a; incl a; call a; jmp a 475 case 0x88: // movb a, r 476 case 0x89: // movl a, r 477 case 0x8A: // movb r, a 478 case 0x8B: // movl r, a 479 case 0x8F: // popl a 480 debug_only(has_disp32 = true); 481 break; 482 483 case 0x68: // pushq #32 484 if (which == end_pc_operand) { 485 return ip + 4; 486 } 487 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate"); 488 return ip; // not produced by emit_operand 489 490 case 0x66: // movw ... (size prefix) 491 again_after_size_prefix2: 492 switch (0xFF & *ip++) { 493 case REX: 494 case REX_B: 495 case REX_X: 496 case REX_XB: 497 case REX_R: 498 case REX_RB: 499 case REX_RX: 500 case REX_RXB: 501 case REX_W: 502 case REX_WB: 503 case REX_WX: 504 case REX_WXB: 505 case REX_WR: 506 case REX_WRB: 507 case REX_WRX: 508 case REX_WRXB: 509 NOT_LP64(assert(false, "64bit prefix found")); 510 goto again_after_size_prefix2; 511 case 0x8B: // movw r, a 512 case 0x89: // movw a, r 513 debug_only(has_disp32 = true); 514 break; 515 case 0xC7: // movw a, #16 516 debug_only(has_disp32 = true); 517 tail_size = 2; // the imm16 518 break; 519 case 0x0F: // several SSE/SSE2 variants 520 ip--; // reparse the 0x0F 521 goto again_after_prefix; 522 default: 523 ShouldNotReachHere(); 524 } 525 break; 526 527 case REP8(0xB8): // movl/q r, #32/#64(oop?) 528 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4); 529 // these asserts are somewhat nonsensical 530 #ifndef _LP64 531 assert(which == imm_operand || which == disp32_operand, 532 err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip)); 533 #else 534 assert((which == call32_operand || which == imm_operand) && is_64bit || 535 which == narrow_oop_operand && !is_64bit, 536 err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip)); 537 #endif // _LP64 538 return ip; 539 540 case 0x69: // imul r, a, #32 541 case 0xC7: // movl a, #32(oop?) 542 tail_size = 4; 543 debug_only(has_disp32 = true); // has both kinds of operands! 544 break; 545 546 case 0x0F: // movx..., etc. 547 switch (0xFF & *ip++) { 548 case 0x3A: // pcmpestri 549 tail_size = 1; 550 case 0x38: // ptest, pmovzxbw 551 ip++; // skip opcode 552 debug_only(has_disp32 = true); // has both kinds of operands! 553 break; 554 555 case 0x70: // pshufd r, r/a, #8 556 debug_only(has_disp32 = true); // has both kinds of operands! 557 case 0x73: // psrldq r, #8 558 tail_size = 1; 559 break; 560 561 case 0x12: // movlps 562 case 0x28: // movaps 563 case 0x2E: // ucomiss 564 case 0x2F: // comiss 565 case 0x54: // andps 566 case 0x55: // andnps 567 case 0x56: // orps 568 case 0x57: // xorps 569 case 0x6E: // movd 570 case 0x7E: // movd 571 case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush 572 debug_only(has_disp32 = true); 573 break; 574 575 case 0xAD: // shrd r, a, %cl 576 case 0xAF: // imul r, a 577 case 0xBE: // movsbl r, a (movsxb) 578 case 0xBF: // movswl r, a (movsxw) 579 case 0xB6: // movzbl r, a (movzxb) 580 case 0xB7: // movzwl r, a (movzxw) 581 case REP16(0x40): // cmovl cc, r, a 582 case 0xB0: // cmpxchgb 583 case 0xB1: // cmpxchg 584 case 0xC1: // xaddl 585 case 0xC7: // cmpxchg8 586 case REP16(0x90): // setcc a 587 debug_only(has_disp32 = true); 588 // fall out of the switch to decode the address 589 break; 590 591 case 0xC4: // pinsrw r, a, #8 592 debug_only(has_disp32 = true); 593 case 0xC5: // pextrw r, r, #8 594 tail_size = 1; // the imm8 595 break; 596 597 case 0xAC: // shrd r, a, #8 598 debug_only(has_disp32 = true); 599 tail_size = 1; // the imm8 600 break; 601 602 case REP16(0x80): // jcc rdisp32 603 if (which == end_pc_operand) return ip + 4; 604 assert(which == call32_operand, "jcc has no disp32 or imm"); 605 return ip; 606 default: 607 ShouldNotReachHere(); 608 } 609 break; 610 611 case 0x81: // addl a, #32; addl r, #32 612 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 613 // on 32bit in the case of cmpl, the imm might be an oop 614 tail_size = 4; 615 debug_only(has_disp32 = true); // has both kinds of operands! 616 break; 617 618 case 0x83: // addl a, #8; addl r, #8 619 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 620 debug_only(has_disp32 = true); // has both kinds of operands! 621 tail_size = 1; 622 break; 623 624 case 0x9B: 625 switch (0xFF & *ip++) { 626 case 0xD9: // fnstcw a 627 debug_only(has_disp32 = true); 628 break; 629 default: 630 ShouldNotReachHere(); 631 } 632 break; 633 634 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a 635 case REP4(0x10): // adc... 636 case REP4(0x20): // and... 637 case REP4(0x30): // xor... 638 case REP4(0x08): // or... 639 case REP4(0x18): // sbb... 640 case REP4(0x28): // sub... 641 case 0xF7: // mull a 642 case 0x8D: // lea r, a 643 case 0x87: // xchg r, a 644 case REP4(0x38): // cmp... 645 case 0x85: // test r, a 646 debug_only(has_disp32 = true); // has both kinds of operands! 647 break; 648 649 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8 650 case 0xC6: // movb a, #8 651 case 0x80: // cmpb a, #8 652 case 0x6B: // imul r, a, #8 653 debug_only(has_disp32 = true); // has both kinds of operands! 654 tail_size = 1; // the imm8 655 break; 656 657 case 0xC4: // VEX_3bytes 658 case 0xC5: // VEX_2bytes 659 assert((UseAVX > 0), "shouldn't have VEX prefix"); 660 assert(ip == inst+1, "no prefixes allowed"); 661 // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions 662 // but they have prefix 0x0F and processed when 0x0F processed above. 663 // 664 // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES 665 // instructions (these instructions are not supported in 64-bit mode). 666 // To distinguish them bits [7:6] are set in the VEX second byte since 667 // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set 668 // those VEX bits REX and vvvv bits are inverted. 669 // 670 // Fortunately C2 doesn't generate these instructions so we don't need 671 // to check for them in product version. 672 673 // Check second byte 674 NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions")); 675 676 // First byte 677 if ((0xFF & *inst) == VEX_3bytes) { 678 ip++; // third byte 679 is_64bit = ((VEX_W & *ip) == VEX_W); 680 } 681 ip++; // opcode 682 // To find the end of instruction (which == end_pc_operand). 683 switch (0xFF & *ip) { 684 case 0x61: // pcmpestri r, r/a, #8 685 case 0x70: // pshufd r, r/a, #8 686 case 0x73: // psrldq r, #8 687 tail_size = 1; // the imm8 688 break; 689 default: 690 break; 691 } 692 ip++; // skip opcode 693 debug_only(has_disp32 = true); // has both kinds of operands! 694 break; 695 696 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 697 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 698 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a 699 case 0xDD: // fld_d a; fst_d a; fstp_d a 700 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a 701 case 0xDF: // fild_d a; fistp_d a 702 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a 703 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a 704 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a 705 debug_only(has_disp32 = true); 706 break; 707 708 case 0xE8: // call rdisp32 709 case 0xE9: // jmp rdisp32 710 if (which == end_pc_operand) return ip + 4; 711 assert(which == call32_operand, "call has no disp32 or imm"); 712 return ip; 713 714 case 0xF0: // Lock 715 assert(os::is_MP(), "only on MP"); 716 goto again_after_prefix; 717 718 case 0xF3: // For SSE 719 case 0xF2: // For SSE2 720 switch (0xFF & *ip++) { 721 case REX: 722 case REX_B: 723 case REX_X: 724 case REX_XB: 725 case REX_R: 726 case REX_RB: 727 case REX_RX: 728 case REX_RXB: 729 case REX_W: 730 case REX_WB: 731 case REX_WX: 732 case REX_WXB: 733 case REX_WR: 734 case REX_WRB: 735 case REX_WRX: 736 case REX_WRXB: 737 NOT_LP64(assert(false, "found 64bit prefix")); 738 ip++; 739 default: 740 ip++; 741 } 742 debug_only(has_disp32 = true); // has both kinds of operands! 743 break; 744 745 default: 746 ShouldNotReachHere(); 747 748 #undef REP8 749 #undef REP16 750 } 751 752 assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); 753 #ifdef _LP64 754 assert(which != imm_operand, "instruction is not a movq reg, imm64"); 755 #else 756 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field"); 757 assert(which != imm_operand || has_disp32, "instruction has no imm32 field"); 758 #endif // LP64 759 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field"); 760 761 // parse the output of emit_operand 762 int op2 = 0xFF & *ip++; 763 int base = op2 & 0x07; 764 int op3 = -1; 765 const int b100 = 4; 766 const int b101 = 5; 767 if (base == b100 && (op2 >> 6) != 3) { 768 op3 = 0xFF & *ip++; 769 base = op3 & 0x07; // refetch the base 770 } 771 // now ip points at the disp (if any) 772 773 switch (op2 >> 6) { 774 case 0: 775 // [00 reg 100][ss index base] 776 // [00 reg 100][00 100 esp] 777 // [00 reg base] 778 // [00 reg 100][ss index 101][disp32] 779 // [00 reg 101] [disp32] 780 781 if (base == b101) { 782 if (which == disp32_operand) 783 return ip; // caller wants the disp32 784 ip += 4; // skip the disp32 785 } 786 break; 787 788 case 1: 789 // [01 reg 100][ss index base][disp8] 790 // [01 reg 100][00 100 esp][disp8] 791 // [01 reg base] [disp8] 792 ip += 1; // skip the disp8 793 break; 794 795 case 2: 796 // [10 reg 100][ss index base][disp32] 797 // [10 reg 100][00 100 esp][disp32] 798 // [10 reg base] [disp32] 799 if (which == disp32_operand) 800 return ip; // caller wants the disp32 801 ip += 4; // skip the disp32 802 break; 803 804 case 3: 805 // [11 reg base] (not a memory addressing mode) 806 break; 807 } 808 809 if (which == end_pc_operand) { 810 return ip + tail_size; 811 } 812 813 #ifdef _LP64 814 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32"); 815 #else 816 assert(which == imm_operand, "instruction has only an imm field"); 817 #endif // LP64 818 return ip; 819 } 820 821 address Assembler::locate_next_instruction(address inst) { 822 // Secretly share code with locate_operand: 823 return locate_operand(inst, end_pc_operand); 824 } 825 826 827 #ifdef ASSERT 828 void Assembler::check_relocation(RelocationHolder const& rspec, int format) { 829 address inst = inst_mark(); 830 assert(inst != NULL && inst < pc(), "must point to beginning of instruction"); 831 address opnd; 832 833 Relocation* r = rspec.reloc(); 834 if (r->type() == relocInfo::none) { 835 return; 836 } else if (r->is_call() || format == call32_operand) { 837 // assert(format == imm32_operand, "cannot specify a nonzero format"); 838 opnd = locate_operand(inst, call32_operand); 839 } else if (r->is_data()) { 840 assert(format == imm_operand || format == disp32_operand 841 LP64_ONLY(|| format == narrow_oop_operand), "format ok"); 842 opnd = locate_operand(inst, (WhichOperand)format); 843 } else { 844 assert(format == imm_operand, "cannot specify a format"); 845 return; 846 } 847 assert(opnd == pc(), "must put operand where relocs can find it"); 848 } 849 #endif // ASSERT 850 851 void Assembler::emit_operand32(Register reg, Address adr) { 852 assert(reg->encoding() < 8, "no extended registers"); 853 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 854 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 855 adr._rspec); 856 } 857 858 void Assembler::emit_operand(Register reg, Address adr, 859 int rip_relative_correction) { 860 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 861 adr._rspec, 862 rip_relative_correction); 863 } 864 865 void Assembler::emit_operand(XMMRegister reg, Address adr) { 866 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 867 adr._rspec); 868 } 869 870 // MMX operations 871 void Assembler::emit_operand(MMXRegister reg, Address adr) { 872 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 873 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 874 } 875 876 // work around gcc (3.2.1-7a) bug 877 void Assembler::emit_operand(Address adr, MMXRegister reg) { 878 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 879 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 880 } 881 882 883 void Assembler::emit_farith(int b1, int b2, int i) { 884 assert(isByte(b1) && isByte(b2), "wrong opcode"); 885 assert(0 <= i && i < 8, "illegal stack offset"); 886 emit_byte(b1); 887 emit_byte(b2 + i); 888 } 889 890 891 // Now the Assembler instructions (identical for 32/64 bits) 892 893 void Assembler::adcl(Address dst, int32_t imm32) { 894 InstructionMark im(this); 895 prefix(dst); 896 emit_arith_operand(0x81, rdx, dst, imm32); 897 } 898 899 void Assembler::adcl(Address dst, Register src) { 900 InstructionMark im(this); 901 prefix(dst, src); 902 emit_byte(0x11); 903 emit_operand(src, dst); 904 } 905 906 void Assembler::adcl(Register dst, int32_t imm32) { 907 prefix(dst); 908 emit_arith(0x81, 0xD0, dst, imm32); 909 } 910 911 void Assembler::adcl(Register dst, Address src) { 912 InstructionMark im(this); 913 prefix(src, dst); 914 emit_byte(0x13); 915 emit_operand(dst, src); 916 } 917 918 void Assembler::adcl(Register dst, Register src) { 919 (void) prefix_and_encode(dst->encoding(), src->encoding()); 920 emit_arith(0x13, 0xC0, dst, src); 921 } 922 923 void Assembler::addl(Address dst, int32_t imm32) { 924 InstructionMark im(this); 925 prefix(dst); 926 emit_arith_operand(0x81, rax, dst, imm32); 927 } 928 929 void Assembler::addl(Address dst, Register src) { 930 InstructionMark im(this); 931 prefix(dst, src); 932 emit_byte(0x01); 933 emit_operand(src, dst); 934 } 935 936 void Assembler::addl(Register dst, int32_t imm32) { 937 prefix(dst); 938 emit_arith(0x81, 0xC0, dst, imm32); 939 } 940 941 void Assembler::addl(Register dst, Address src) { 942 InstructionMark im(this); 943 prefix(src, dst); 944 emit_byte(0x03); 945 emit_operand(dst, src); 946 } 947 948 void Assembler::addl(Register dst, Register src) { 949 (void) prefix_and_encode(dst->encoding(), src->encoding()); 950 emit_arith(0x03, 0xC0, dst, src); 951 } 952 953 void Assembler::addr_nop_4() { 954 assert(UseAddressNop, "no CPU support"); 955 // 4 bytes: NOP DWORD PTR [EAX+0] 956 emit_byte(0x0F); 957 emit_byte(0x1F); 958 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 959 emit_byte(0); // 8-bits offset (1 byte) 960 } 961 962 void Assembler::addr_nop_5() { 963 assert(UseAddressNop, "no CPU support"); 964 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 965 emit_byte(0x0F); 966 emit_byte(0x1F); 967 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 968 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 969 emit_byte(0); // 8-bits offset (1 byte) 970 } 971 972 void Assembler::addr_nop_7() { 973 assert(UseAddressNop, "no CPU support"); 974 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 975 emit_byte(0x0F); 976 emit_byte(0x1F); 977 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 978 emit_long(0); // 32-bits offset (4 bytes) 979 } 980 981 void Assembler::addr_nop_8() { 982 assert(UseAddressNop, "no CPU support"); 983 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 984 emit_byte(0x0F); 985 emit_byte(0x1F); 986 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 987 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 988 emit_long(0); // 32-bits offset (4 bytes) 989 } 990 991 void Assembler::addsd(XMMRegister dst, XMMRegister src) { 992 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 993 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 994 emit_byte(0x58); 995 emit_byte(0xC0 | encode); 996 } 997 998 void Assembler::addsd(XMMRegister dst, Address src) { 999 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1000 InstructionMark im(this); 1001 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1002 emit_byte(0x58); 1003 emit_operand(dst, src); 1004 } 1005 1006 void Assembler::addss(XMMRegister dst, XMMRegister src) { 1007 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1008 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1009 emit_byte(0x58); 1010 emit_byte(0xC0 | encode); 1011 } 1012 1013 void Assembler::addss(XMMRegister dst, Address src) { 1014 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1015 InstructionMark im(this); 1016 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1017 emit_byte(0x58); 1018 emit_operand(dst, src); 1019 } 1020 1021 void Assembler::andl(Address dst, int32_t imm32) { 1022 InstructionMark im(this); 1023 prefix(dst); 1024 emit_byte(0x81); 1025 emit_operand(rsp, dst, 4); 1026 emit_long(imm32); 1027 } 1028 1029 void Assembler::andl(Register dst, int32_t imm32) { 1030 prefix(dst); 1031 emit_arith(0x81, 0xE0, dst, imm32); 1032 } 1033 1034 void Assembler::andl(Register dst, Address src) { 1035 InstructionMark im(this); 1036 prefix(src, dst); 1037 emit_byte(0x23); 1038 emit_operand(dst, src); 1039 } 1040 1041 void Assembler::andl(Register dst, Register src) { 1042 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1043 emit_arith(0x23, 0xC0, dst, src); 1044 } 1045 1046 void Assembler::andpd(XMMRegister dst, Address src) { 1047 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1048 InstructionMark im(this); 1049 simd_prefix(dst, dst, src, VEX_SIMD_66); 1050 emit_byte(0x54); 1051 emit_operand(dst, src); 1052 } 1053 1054 void Assembler::andpd(XMMRegister dst, XMMRegister src) { 1055 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1056 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 1057 emit_byte(0x54); 1058 emit_byte(0xC0 | encode); 1059 } 1060 1061 void Assembler::andps(XMMRegister dst, Address src) { 1062 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1063 InstructionMark im(this); 1064 simd_prefix(dst, dst, src, VEX_SIMD_NONE); 1065 emit_byte(0x54); 1066 emit_operand(dst, src); 1067 } 1068 1069 void Assembler::andps(XMMRegister dst, XMMRegister src) { 1070 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1071 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); 1072 emit_byte(0x54); 1073 emit_byte(0xC0 | encode); 1074 } 1075 1076 void Assembler::bsfl(Register dst, Register src) { 1077 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1078 emit_byte(0x0F); 1079 emit_byte(0xBC); 1080 emit_byte(0xC0 | encode); 1081 } 1082 1083 void Assembler::bsrl(Register dst, Register src) { 1084 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 1085 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1086 emit_byte(0x0F); 1087 emit_byte(0xBD); 1088 emit_byte(0xC0 | encode); 1089 } 1090 1091 void Assembler::bswapl(Register reg) { // bswap 1092 int encode = prefix_and_encode(reg->encoding()); 1093 emit_byte(0x0F); 1094 emit_byte(0xC8 | encode); 1095 } 1096 1097 void Assembler::call(Label& L, relocInfo::relocType rtype) { 1098 // suspect disp32 is always good 1099 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); 1100 1101 if (L.is_bound()) { 1102 const int long_size = 5; 1103 int offs = (int)( target(L) - pc() ); 1104 assert(offs <= 0, "assembler error"); 1105 InstructionMark im(this); 1106 // 1110 1000 #32-bit disp 1107 emit_byte(0xE8); 1108 emit_data(offs - long_size, rtype, operand); 1109 } else { 1110 InstructionMark im(this); 1111 // 1110 1000 #32-bit disp 1112 L.add_patch_at(code(), locator()); 1113 1114 emit_byte(0xE8); 1115 emit_data(int(0), rtype, operand); 1116 } 1117 } 1118 1119 void Assembler::call(Register dst) { 1120 int encode = prefix_and_encode(dst->encoding()); 1121 emit_byte(0xFF); 1122 emit_byte(0xD0 | encode); 1123 } 1124 1125 1126 void Assembler::call(Address adr) { 1127 InstructionMark im(this); 1128 prefix(adr); 1129 emit_byte(0xFF); 1130 emit_operand(rdx, adr); 1131 } 1132 1133 void Assembler::call_literal(address entry, RelocationHolder const& rspec) { 1134 assert(entry != NULL, "call most probably wrong"); 1135 InstructionMark im(this); 1136 emit_byte(0xE8); 1137 intptr_t disp = entry - (_code_pos + sizeof(int32_t)); 1138 assert(is_simm32(disp), "must be 32bit offset (call2)"); 1139 // Technically, should use call32_operand, but this format is 1140 // implied by the fact that we're emitting a call instruction. 1141 1142 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand); 1143 emit_data((int) disp, rspec, operand); 1144 } 1145 1146 void Assembler::cdql() { 1147 emit_byte(0x99); 1148 } 1149 1150 void Assembler::cmovl(Condition cc, Register dst, Register src) { 1151 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1152 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1153 emit_byte(0x0F); 1154 emit_byte(0x40 | cc); 1155 emit_byte(0xC0 | encode); 1156 } 1157 1158 1159 void Assembler::cmovl(Condition cc, Register dst, Address src) { 1160 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1161 prefix(src, dst); 1162 emit_byte(0x0F); 1163 emit_byte(0x40 | cc); 1164 emit_operand(dst, src); 1165 } 1166 1167 void Assembler::cmpb(Address dst, int imm8) { 1168 InstructionMark im(this); 1169 prefix(dst); 1170 emit_byte(0x80); 1171 emit_operand(rdi, dst, 1); 1172 emit_byte(imm8); 1173 } 1174 1175 void Assembler::cmpl(Address dst, int32_t imm32) { 1176 InstructionMark im(this); 1177 prefix(dst); 1178 emit_byte(0x81); 1179 emit_operand(rdi, dst, 4); 1180 emit_long(imm32); 1181 } 1182 1183 void Assembler::cmpl(Register dst, int32_t imm32) { 1184 prefix(dst); 1185 emit_arith(0x81, 0xF8, dst, imm32); 1186 } 1187 1188 void Assembler::cmpl(Register dst, Register src) { 1189 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1190 emit_arith(0x3B, 0xC0, dst, src); 1191 } 1192 1193 1194 void Assembler::cmpl(Register dst, Address src) { 1195 InstructionMark im(this); 1196 prefix(src, dst); 1197 emit_byte(0x3B); 1198 emit_operand(dst, src); 1199 } 1200 1201 void Assembler::cmpw(Address dst, int imm16) { 1202 InstructionMark im(this); 1203 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers"); 1204 emit_byte(0x66); 1205 emit_byte(0x81); 1206 emit_operand(rdi, dst, 2); 1207 emit_word(imm16); 1208 } 1209 1210 // The 32-bit cmpxchg compares the value at adr with the contents of rax, 1211 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1212 // The ZF is set if the compared values were equal, and cleared otherwise. 1213 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg 1214 if (Atomics & 2) { 1215 // caveat: no instructionmark, so this isn't relocatable. 1216 // Emit a synthetic, non-atomic, CAS equivalent. 1217 // Beware. The synthetic form sets all ICCs, not just ZF. 1218 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r) 1219 cmpl(rax, adr); 1220 movl(rax, adr); 1221 if (reg != rax) { 1222 Label L ; 1223 jcc(Assembler::notEqual, L); 1224 movl(adr, reg); 1225 bind(L); 1226 } 1227 } else { 1228 InstructionMark im(this); 1229 prefix(adr, reg); 1230 emit_byte(0x0F); 1231 emit_byte(0xB1); 1232 emit_operand(reg, adr); 1233 } 1234 } 1235 1236 void Assembler::comisd(XMMRegister dst, Address src) { 1237 // NOTE: dbx seems to decode this as comiss even though the 1238 // 0x66 is there. Strangly ucomisd comes out correct 1239 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1240 InstructionMark im(this); 1241 simd_prefix(dst, src, VEX_SIMD_66); 1242 emit_byte(0x2F); 1243 emit_operand(dst, src); 1244 } 1245 1246 void Assembler::comisd(XMMRegister dst, XMMRegister src) { 1247 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1248 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1249 emit_byte(0x2F); 1250 emit_byte(0xC0 | encode); 1251 } 1252 1253 void Assembler::comiss(XMMRegister dst, Address src) { 1254 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1255 InstructionMark im(this); 1256 simd_prefix(dst, src, VEX_SIMD_NONE); 1257 emit_byte(0x2F); 1258 emit_operand(dst, src); 1259 } 1260 1261 void Assembler::comiss(XMMRegister dst, XMMRegister src) { 1262 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1263 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 1264 emit_byte(0x2F); 1265 emit_byte(0xC0 | encode); 1266 } 1267 1268 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 1269 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1270 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1271 emit_byte(0xE6); 1272 emit_byte(0xC0 | encode); 1273 } 1274 1275 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 1276 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1277 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 1278 emit_byte(0x5B); 1279 emit_byte(0xC0 | encode); 1280 } 1281 1282 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 1283 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1284 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1285 emit_byte(0x5A); 1286 emit_byte(0xC0 | encode); 1287 } 1288 1289 void Assembler::cvtsd2ss(XMMRegister dst, Address src) { 1290 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1291 InstructionMark im(this); 1292 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1293 emit_byte(0x5A); 1294 emit_operand(dst, src); 1295 } 1296 1297 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { 1298 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1299 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1300 emit_byte(0x2A); 1301 emit_byte(0xC0 | encode); 1302 } 1303 1304 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { 1305 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1306 InstructionMark im(this); 1307 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1308 emit_byte(0x2A); 1309 emit_operand(dst, src); 1310 } 1311 1312 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { 1313 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1314 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1315 emit_byte(0x2A); 1316 emit_byte(0xC0 | encode); 1317 } 1318 1319 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { 1320 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1321 InstructionMark im(this); 1322 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1323 emit_byte(0x2A); 1324 emit_operand(dst, src); 1325 } 1326 1327 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { 1328 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1329 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1330 emit_byte(0x5A); 1331 emit_byte(0xC0 | encode); 1332 } 1333 1334 void Assembler::cvtss2sd(XMMRegister dst, Address src) { 1335 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1336 InstructionMark im(this); 1337 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1338 emit_byte(0x5A); 1339 emit_operand(dst, src); 1340 } 1341 1342 1343 void Assembler::cvttsd2sil(Register dst, XMMRegister src) { 1344 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1345 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); 1346 emit_byte(0x2C); 1347 emit_byte(0xC0 | encode); 1348 } 1349 1350 void Assembler::cvttss2sil(Register dst, XMMRegister src) { 1351 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1352 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1353 emit_byte(0x2C); 1354 emit_byte(0xC0 | encode); 1355 } 1356 1357 void Assembler::decl(Address dst) { 1358 // Don't use it directly. Use MacroAssembler::decrement() instead. 1359 InstructionMark im(this); 1360 prefix(dst); 1361 emit_byte(0xFF); 1362 emit_operand(rcx, dst); 1363 } 1364 1365 void Assembler::divsd(XMMRegister dst, Address src) { 1366 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1367 InstructionMark im(this); 1368 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1369 emit_byte(0x5E); 1370 emit_operand(dst, src); 1371 } 1372 1373 void Assembler::divsd(XMMRegister dst, XMMRegister src) { 1374 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1375 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1376 emit_byte(0x5E); 1377 emit_byte(0xC0 | encode); 1378 } 1379 1380 void Assembler::divss(XMMRegister dst, Address src) { 1381 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1382 InstructionMark im(this); 1383 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1384 emit_byte(0x5E); 1385 emit_operand(dst, src); 1386 } 1387 1388 void Assembler::divss(XMMRegister dst, XMMRegister src) { 1389 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1390 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1391 emit_byte(0x5E); 1392 emit_byte(0xC0 | encode); 1393 } 1394 1395 void Assembler::emms() { 1396 NOT_LP64(assert(VM_Version::supports_mmx(), "")); 1397 emit_byte(0x0F); 1398 emit_byte(0x77); 1399 } 1400 1401 void Assembler::hlt() { 1402 emit_byte(0xF4); 1403 } 1404 1405 void Assembler::idivl(Register src) { 1406 int encode = prefix_and_encode(src->encoding()); 1407 emit_byte(0xF7); 1408 emit_byte(0xF8 | encode); 1409 } 1410 1411 void Assembler::divl(Register src) { // Unsigned 1412 int encode = prefix_and_encode(src->encoding()); 1413 emit_byte(0xF7); 1414 emit_byte(0xF0 | encode); 1415 } 1416 1417 void Assembler::imull(Register dst, Register src) { 1418 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1419 emit_byte(0x0F); 1420 emit_byte(0xAF); 1421 emit_byte(0xC0 | encode); 1422 } 1423 1424 1425 void Assembler::imull(Register dst, Register src, int value) { 1426 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1427 if (is8bit(value)) { 1428 emit_byte(0x6B); 1429 emit_byte(0xC0 | encode); 1430 emit_byte(value & 0xFF); 1431 } else { 1432 emit_byte(0x69); 1433 emit_byte(0xC0 | encode); 1434 emit_long(value); 1435 } 1436 } 1437 1438 void Assembler::incl(Address dst) { 1439 // Don't use it directly. Use MacroAssembler::increment() instead. 1440 InstructionMark im(this); 1441 prefix(dst); 1442 emit_byte(0xFF); 1443 emit_operand(rax, dst); 1444 } 1445 1446 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) { 1447 InstructionMark im(this); 1448 assert((0 <= cc) && (cc < 16), "illegal cc"); 1449 if (L.is_bound()) { 1450 address dst = target(L); 1451 assert(dst != NULL, "jcc most probably wrong"); 1452 1453 const int short_size = 2; 1454 const int long_size = 6; 1455 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos; 1456 if (maybe_short && is8bit(offs - short_size)) { 1457 // 0111 tttn #8-bit disp 1458 emit_byte(0x70 | cc); 1459 emit_byte((offs - short_size) & 0xFF); 1460 } else { 1461 // 0000 1111 1000 tttn #32-bit disp 1462 assert(is_simm32(offs - long_size), 1463 "must be 32bit offset (call4)"); 1464 emit_byte(0x0F); 1465 emit_byte(0x80 | cc); 1466 emit_long(offs - long_size); 1467 } 1468 } else { 1469 // Note: could eliminate cond. jumps to this jump if condition 1470 // is the same however, seems to be rather unlikely case. 1471 // Note: use jccb() if label to be bound is very close to get 1472 // an 8-bit displacement 1473 L.add_patch_at(code(), locator()); 1474 emit_byte(0x0F); 1475 emit_byte(0x80 | cc); 1476 emit_long(0); 1477 } 1478 } 1479 1480 void Assembler::jccb(Condition cc, Label& L) { 1481 if (L.is_bound()) { 1482 const int short_size = 2; 1483 address entry = target(L); 1484 #ifdef ASSERT 1485 intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); 1486 intptr_t delta = short_branch_delta(); 1487 if (delta != 0) { 1488 dist += (dist < 0 ? (-delta) :delta); 1489 } 1490 assert(is8bit(dist), "Dispacement too large for a short jmp"); 1491 #endif 1492 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; 1493 // 0111 tttn #8-bit disp 1494 emit_byte(0x70 | cc); 1495 emit_byte((offs - short_size) & 0xFF); 1496 } else { 1497 InstructionMark im(this); 1498 L.add_patch_at(code(), locator()); 1499 emit_byte(0x70 | cc); 1500 emit_byte(0); 1501 } 1502 } 1503 1504 void Assembler::jmp(Address adr) { 1505 InstructionMark im(this); 1506 prefix(adr); 1507 emit_byte(0xFF); 1508 emit_operand(rsp, adr); 1509 } 1510 1511 void Assembler::jmp(Label& L, bool maybe_short) { 1512 if (L.is_bound()) { 1513 address entry = target(L); 1514 assert(entry != NULL, "jmp most probably wrong"); 1515 InstructionMark im(this); 1516 const int short_size = 2; 1517 const int long_size = 5; 1518 intptr_t offs = entry - _code_pos; 1519 if (maybe_short && is8bit(offs - short_size)) { 1520 emit_byte(0xEB); 1521 emit_byte((offs - short_size) & 0xFF); 1522 } else { 1523 emit_byte(0xE9); 1524 emit_long(offs - long_size); 1525 } 1526 } else { 1527 // By default, forward jumps are always 32-bit displacements, since 1528 // we can't yet know where the label will be bound. If you're sure that 1529 // the forward jump will not run beyond 256 bytes, use jmpb to 1530 // force an 8-bit displacement. 1531 InstructionMark im(this); 1532 L.add_patch_at(code(), locator()); 1533 emit_byte(0xE9); 1534 emit_long(0); 1535 } 1536 } 1537 1538 void Assembler::jmp(Register entry) { 1539 int encode = prefix_and_encode(entry->encoding()); 1540 emit_byte(0xFF); 1541 emit_byte(0xE0 | encode); 1542 } 1543 1544 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { 1545 InstructionMark im(this); 1546 emit_byte(0xE9); 1547 assert(dest != NULL, "must have a target"); 1548 intptr_t disp = dest - (_code_pos + sizeof(int32_t)); 1549 assert(is_simm32(disp), "must be 32bit offset (jmp)"); 1550 emit_data(disp, rspec.reloc(), call32_operand); 1551 } 1552 1553 void Assembler::jmpb(Label& L) { 1554 if (L.is_bound()) { 1555 const int short_size = 2; 1556 address entry = target(L); 1557 assert(entry != NULL, "jmp most probably wrong"); 1558 #ifdef ASSERT 1559 intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); 1560 intptr_t delta = short_branch_delta(); 1561 if (delta != 0) { 1562 dist += (dist < 0 ? (-delta) :delta); 1563 } 1564 assert(is8bit(dist), "Dispacement too large for a short jmp"); 1565 #endif 1566 intptr_t offs = entry - _code_pos; 1567 emit_byte(0xEB); 1568 emit_byte((offs - short_size) & 0xFF); 1569 } else { 1570 InstructionMark im(this); 1571 L.add_patch_at(code(), locator()); 1572 emit_byte(0xEB); 1573 emit_byte(0); 1574 } 1575 } 1576 1577 void Assembler::ldmxcsr( Address src) { 1578 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1579 InstructionMark im(this); 1580 prefix(src); 1581 emit_byte(0x0F); 1582 emit_byte(0xAE); 1583 emit_operand(as_Register(2), src); 1584 } 1585 1586 void Assembler::leal(Register dst, Address src) { 1587 InstructionMark im(this); 1588 #ifdef _LP64 1589 emit_byte(0x67); // addr32 1590 prefix(src, dst); 1591 #endif // LP64 1592 emit_byte(0x8D); 1593 emit_operand(dst, src); 1594 } 1595 1596 void Assembler::lock() { 1597 if (Atomics & 1) { 1598 // Emit either nothing, a NOP, or a NOP: prefix 1599 emit_byte(0x90) ; 1600 } else { 1601 emit_byte(0xF0); 1602 } 1603 } 1604 1605 void Assembler::lzcntl(Register dst, Register src) { 1606 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 1607 emit_byte(0xF3); 1608 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1609 emit_byte(0x0F); 1610 emit_byte(0xBD); 1611 emit_byte(0xC0 | encode); 1612 } 1613 1614 // Emit mfence instruction 1615 void Assembler::mfence() { 1616 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) 1617 emit_byte( 0x0F ); 1618 emit_byte( 0xAE ); 1619 emit_byte( 0xF0 ); 1620 } 1621 1622 void Assembler::mov(Register dst, Register src) { 1623 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 1624 } 1625 1626 void Assembler::movapd(XMMRegister dst, XMMRegister src) { 1627 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1628 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1629 emit_byte(0x28); 1630 emit_byte(0xC0 | encode); 1631 } 1632 1633 void Assembler::movaps(XMMRegister dst, XMMRegister src) { 1634 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1635 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 1636 emit_byte(0x28); 1637 emit_byte(0xC0 | encode); 1638 } 1639 1640 void Assembler::movb(Register dst, Address src) { 1641 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 1642 InstructionMark im(this); 1643 prefix(src, dst, true); 1644 emit_byte(0x8A); 1645 emit_operand(dst, src); 1646 } 1647 1648 1649 void Assembler::movb(Address dst, int imm8) { 1650 InstructionMark im(this); 1651 prefix(dst); 1652 emit_byte(0xC6); 1653 emit_operand(rax, dst, 1); 1654 emit_byte(imm8); 1655 } 1656 1657 1658 void Assembler::movb(Address dst, Register src) { 1659 assert(src->has_byte_register(), "must have byte register"); 1660 InstructionMark im(this); 1661 prefix(dst, src, true); 1662 emit_byte(0x88); 1663 emit_operand(src, dst); 1664 } 1665 1666 void Assembler::movdl(XMMRegister dst, Register src) { 1667 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1668 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1669 emit_byte(0x6E); 1670 emit_byte(0xC0 | encode); 1671 } 1672 1673 void Assembler::movdl(Register dst, XMMRegister src) { 1674 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1675 // swap src/dst to get correct prefix 1676 int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66); 1677 emit_byte(0x7E); 1678 emit_byte(0xC0 | encode); 1679 } 1680 1681 void Assembler::movdl(XMMRegister dst, Address src) { 1682 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1683 InstructionMark im(this); 1684 simd_prefix(dst, src, VEX_SIMD_66); 1685 emit_byte(0x6E); 1686 emit_operand(dst, src); 1687 } 1688 1689 void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 1690 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1691 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1692 emit_byte(0x6F); 1693 emit_byte(0xC0 | encode); 1694 } 1695 1696 void Assembler::movdqu(XMMRegister dst, Address src) { 1697 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1698 InstructionMark im(this); 1699 simd_prefix(dst, src, VEX_SIMD_F3); 1700 emit_byte(0x6F); 1701 emit_operand(dst, src); 1702 } 1703 1704 void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 1705 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1706 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1707 emit_byte(0x6F); 1708 emit_byte(0xC0 | encode); 1709 } 1710 1711 void Assembler::movdqu(Address dst, XMMRegister src) { 1712 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1713 InstructionMark im(this); 1714 simd_prefix(dst, src, VEX_SIMD_F3); 1715 emit_byte(0x7F); 1716 emit_operand(src, dst); 1717 } 1718 1719 // Uses zero extension on 64bit 1720 1721 void Assembler::movl(Register dst, int32_t imm32) { 1722 int encode = prefix_and_encode(dst->encoding()); 1723 emit_byte(0xB8 | encode); 1724 emit_long(imm32); 1725 } 1726 1727 void Assembler::movl(Register dst, Register src) { 1728 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1729 emit_byte(0x8B); 1730 emit_byte(0xC0 | encode); 1731 } 1732 1733 void Assembler::movl(Register dst, Address src) { 1734 InstructionMark im(this); 1735 prefix(src, dst); 1736 emit_byte(0x8B); 1737 emit_operand(dst, src); 1738 } 1739 1740 void Assembler::movl(Address dst, int32_t imm32) { 1741 InstructionMark im(this); 1742 prefix(dst); 1743 emit_byte(0xC7); 1744 emit_operand(rax, dst, 4); 1745 emit_long(imm32); 1746 } 1747 1748 void Assembler::movl(Address dst, Register src) { 1749 InstructionMark im(this); 1750 prefix(dst, src); 1751 emit_byte(0x89); 1752 emit_operand(src, dst); 1753 } 1754 1755 // New cpus require to use movsd and movss to avoid partial register stall 1756 // when loading from memory. But for old Opteron use movlpd instead of movsd. 1757 // The selection is done in MacroAssembler::movdbl() and movflt(). 1758 void Assembler::movlpd(XMMRegister dst, Address src) { 1759 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1760 InstructionMark im(this); 1761 simd_prefix(dst, dst, src, VEX_SIMD_66); 1762 emit_byte(0x12); 1763 emit_operand(dst, src); 1764 } 1765 1766 void Assembler::movq( MMXRegister dst, Address src ) { 1767 assert( VM_Version::supports_mmx(), "" ); 1768 emit_byte(0x0F); 1769 emit_byte(0x6F); 1770 emit_operand(dst, src); 1771 } 1772 1773 void Assembler::movq( Address dst, MMXRegister src ) { 1774 assert( VM_Version::supports_mmx(), "" ); 1775 emit_byte(0x0F); 1776 emit_byte(0x7F); 1777 // workaround gcc (3.2.1-7a) bug 1778 // In that version of gcc with only an emit_operand(MMX, Address) 1779 // gcc will tail jump and try and reverse the parameters completely 1780 // obliterating dst in the process. By having a version available 1781 // that doesn't need to swap the args at the tail jump the bug is 1782 // avoided. 1783 emit_operand(dst, src); 1784 } 1785 1786 void Assembler::movq(XMMRegister dst, Address src) { 1787 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1788 InstructionMark im(this); 1789 simd_prefix(dst, src, VEX_SIMD_F3); 1790 emit_byte(0x7E); 1791 emit_operand(dst, src); 1792 } 1793 1794 void Assembler::movq(Address dst, XMMRegister src) { 1795 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1796 InstructionMark im(this); 1797 simd_prefix(dst, src, VEX_SIMD_66); 1798 emit_byte(0xD6); 1799 emit_operand(src, dst); 1800 } 1801 1802 void Assembler::movsbl(Register dst, Address src) { // movsxb 1803 InstructionMark im(this); 1804 prefix(src, dst); 1805 emit_byte(0x0F); 1806 emit_byte(0xBE); 1807 emit_operand(dst, src); 1808 } 1809 1810 void Assembler::movsbl(Register dst, Register src) { // movsxb 1811 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1812 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1813 emit_byte(0x0F); 1814 emit_byte(0xBE); 1815 emit_byte(0xC0 | encode); 1816 } 1817 1818 void Assembler::movsd(XMMRegister dst, XMMRegister src) { 1819 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1820 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1821 emit_byte(0x10); 1822 emit_byte(0xC0 | encode); 1823 } 1824 1825 void Assembler::movsd(XMMRegister dst, Address src) { 1826 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1827 InstructionMark im(this); 1828 simd_prefix(dst, src, VEX_SIMD_F2); 1829 emit_byte(0x10); 1830 emit_operand(dst, src); 1831 } 1832 1833 void Assembler::movsd(Address dst, XMMRegister src) { 1834 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1835 InstructionMark im(this); 1836 simd_prefix(dst, src, VEX_SIMD_F2); 1837 emit_byte(0x11); 1838 emit_operand(src, dst); 1839 } 1840 1841 void Assembler::movss(XMMRegister dst, XMMRegister src) { 1842 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1843 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1844 emit_byte(0x10); 1845 emit_byte(0xC0 | encode); 1846 } 1847 1848 void Assembler::movss(XMMRegister dst, Address src) { 1849 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1850 InstructionMark im(this); 1851 simd_prefix(dst, src, VEX_SIMD_F3); 1852 emit_byte(0x10); 1853 emit_operand(dst, src); 1854 } 1855 1856 void Assembler::movss(Address dst, XMMRegister src) { 1857 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1858 InstructionMark im(this); 1859 simd_prefix(dst, src, VEX_SIMD_F3); 1860 emit_byte(0x11); 1861 emit_operand(src, dst); 1862 } 1863 1864 void Assembler::movswl(Register dst, Address src) { // movsxw 1865 InstructionMark im(this); 1866 prefix(src, dst); 1867 emit_byte(0x0F); 1868 emit_byte(0xBF); 1869 emit_operand(dst, src); 1870 } 1871 1872 void Assembler::movswl(Register dst, Register src) { // movsxw 1873 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1874 emit_byte(0x0F); 1875 emit_byte(0xBF); 1876 emit_byte(0xC0 | encode); 1877 } 1878 1879 void Assembler::movw(Address dst, int imm16) { 1880 InstructionMark im(this); 1881 1882 emit_byte(0x66); // switch to 16-bit mode 1883 prefix(dst); 1884 emit_byte(0xC7); 1885 emit_operand(rax, dst, 2); 1886 emit_word(imm16); 1887 } 1888 1889 void Assembler::movw(Register dst, Address src) { 1890 InstructionMark im(this); 1891 emit_byte(0x66); 1892 prefix(src, dst); 1893 emit_byte(0x8B); 1894 emit_operand(dst, src); 1895 } 1896 1897 void Assembler::movw(Address dst, Register src) { 1898 InstructionMark im(this); 1899 emit_byte(0x66); 1900 prefix(dst, src); 1901 emit_byte(0x89); 1902 emit_operand(src, dst); 1903 } 1904 1905 void Assembler::movzbl(Register dst, Address src) { // movzxb 1906 InstructionMark im(this); 1907 prefix(src, dst); 1908 emit_byte(0x0F); 1909 emit_byte(0xB6); 1910 emit_operand(dst, src); 1911 } 1912 1913 void Assembler::movzbl(Register dst, Register src) { // movzxb 1914 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1915 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1916 emit_byte(0x0F); 1917 emit_byte(0xB6); 1918 emit_byte(0xC0 | encode); 1919 } 1920 1921 void Assembler::movzwl(Register dst, Address src) { // movzxw 1922 InstructionMark im(this); 1923 prefix(src, dst); 1924 emit_byte(0x0F); 1925 emit_byte(0xB7); 1926 emit_operand(dst, src); 1927 } 1928 1929 void Assembler::movzwl(Register dst, Register src) { // movzxw 1930 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1931 emit_byte(0x0F); 1932 emit_byte(0xB7); 1933 emit_byte(0xC0 | encode); 1934 } 1935 1936 void Assembler::mull(Address src) { 1937 InstructionMark im(this); 1938 prefix(src); 1939 emit_byte(0xF7); 1940 emit_operand(rsp, src); 1941 } 1942 1943 void Assembler::mull(Register src) { 1944 int encode = prefix_and_encode(src->encoding()); 1945 emit_byte(0xF7); 1946 emit_byte(0xE0 | encode); 1947 } 1948 1949 void Assembler::mulsd(XMMRegister dst, Address src) { 1950 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1951 InstructionMark im(this); 1952 simd_prefix(dst, dst, src, VEX_SIMD_F2); 1953 emit_byte(0x59); 1954 emit_operand(dst, src); 1955 } 1956 1957 void Assembler::mulsd(XMMRegister dst, XMMRegister src) { 1958 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1959 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1960 emit_byte(0x59); 1961 emit_byte(0xC0 | encode); 1962 } 1963 1964 void Assembler::mulss(XMMRegister dst, Address src) { 1965 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1966 InstructionMark im(this); 1967 simd_prefix(dst, dst, src, VEX_SIMD_F3); 1968 emit_byte(0x59); 1969 emit_operand(dst, src); 1970 } 1971 1972 void Assembler::mulss(XMMRegister dst, XMMRegister src) { 1973 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1974 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1975 emit_byte(0x59); 1976 emit_byte(0xC0 | encode); 1977 } 1978 1979 void Assembler::negl(Register dst) { 1980 int encode = prefix_and_encode(dst->encoding()); 1981 emit_byte(0xF7); 1982 emit_byte(0xD8 | encode); 1983 } 1984 1985 void Assembler::nop(int i) { 1986 #ifdef ASSERT 1987 assert(i > 0, " "); 1988 // The fancy nops aren't currently recognized by debuggers making it a 1989 // pain to disassemble code while debugging. If asserts are on clearly 1990 // speed is not an issue so simply use the single byte traditional nop 1991 // to do alignment. 1992 1993 for (; i > 0 ; i--) emit_byte(0x90); 1994 return; 1995 1996 #endif // ASSERT 1997 1998 if (UseAddressNop && VM_Version::is_intel()) { 1999 // 2000 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 2001 // 1: 0x90 2002 // 2: 0x66 0x90 2003 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2004 // 4: 0x0F 0x1F 0x40 0x00 2005 // 5: 0x0F 0x1F 0x44 0x00 0x00 2006 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2007 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2008 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2009 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2010 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2011 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2012 2013 // The rest coding is Intel specific - don't use consecutive address nops 2014 2015 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2016 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2017 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2018 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2019 2020 while(i >= 15) { 2021 // For Intel don't generate consecutive addess nops (mix with regular nops) 2022 i -= 15; 2023 emit_byte(0x66); // size prefix 2024 emit_byte(0x66); // size prefix 2025 emit_byte(0x66); // size prefix 2026 addr_nop_8(); 2027 emit_byte(0x66); // size prefix 2028 emit_byte(0x66); // size prefix 2029 emit_byte(0x66); // size prefix 2030 emit_byte(0x90); // nop 2031 } 2032 switch (i) { 2033 case 14: 2034 emit_byte(0x66); // size prefix 2035 case 13: 2036 emit_byte(0x66); // size prefix 2037 case 12: 2038 addr_nop_8(); 2039 emit_byte(0x66); // size prefix 2040 emit_byte(0x66); // size prefix 2041 emit_byte(0x66); // size prefix 2042 emit_byte(0x90); // nop 2043 break; 2044 case 11: 2045 emit_byte(0x66); // size prefix 2046 case 10: 2047 emit_byte(0x66); // size prefix 2048 case 9: 2049 emit_byte(0x66); // size prefix 2050 case 8: 2051 addr_nop_8(); 2052 break; 2053 case 7: 2054 addr_nop_7(); 2055 break; 2056 case 6: 2057 emit_byte(0x66); // size prefix 2058 case 5: 2059 addr_nop_5(); 2060 break; 2061 case 4: 2062 addr_nop_4(); 2063 break; 2064 case 3: 2065 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2066 emit_byte(0x66); // size prefix 2067 case 2: 2068 emit_byte(0x66); // size prefix 2069 case 1: 2070 emit_byte(0x90); // nop 2071 break; 2072 default: 2073 assert(i == 0, " "); 2074 } 2075 return; 2076 } 2077 if (UseAddressNop && VM_Version::is_amd()) { 2078 // 2079 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD. 2080 // 1: 0x90 2081 // 2: 0x66 0x90 2082 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2083 // 4: 0x0F 0x1F 0x40 0x00 2084 // 5: 0x0F 0x1F 0x44 0x00 0x00 2085 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2086 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2087 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2088 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2089 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2090 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2091 2092 // The rest coding is AMD specific - use consecutive address nops 2093 2094 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2095 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2096 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2097 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2098 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2099 // Size prefixes (0x66) are added for larger sizes 2100 2101 while(i >= 22) { 2102 i -= 11; 2103 emit_byte(0x66); // size prefix 2104 emit_byte(0x66); // size prefix 2105 emit_byte(0x66); // size prefix 2106 addr_nop_8(); 2107 } 2108 // Generate first nop for size between 21-12 2109 switch (i) { 2110 case 21: 2111 i -= 1; 2112 emit_byte(0x66); // size prefix 2113 case 20: 2114 case 19: 2115 i -= 1; 2116 emit_byte(0x66); // size prefix 2117 case 18: 2118 case 17: 2119 i -= 1; 2120 emit_byte(0x66); // size prefix 2121 case 16: 2122 case 15: 2123 i -= 8; 2124 addr_nop_8(); 2125 break; 2126 case 14: 2127 case 13: 2128 i -= 7; 2129 addr_nop_7(); 2130 break; 2131 case 12: 2132 i -= 6; 2133 emit_byte(0x66); // size prefix 2134 addr_nop_5(); 2135 break; 2136 default: 2137 assert(i < 12, " "); 2138 } 2139 2140 // Generate second nop for size between 11-1 2141 switch (i) { 2142 case 11: 2143 emit_byte(0x66); // size prefix 2144 case 10: 2145 emit_byte(0x66); // size prefix 2146 case 9: 2147 emit_byte(0x66); // size prefix 2148 case 8: 2149 addr_nop_8(); 2150 break; 2151 case 7: 2152 addr_nop_7(); 2153 break; 2154 case 6: 2155 emit_byte(0x66); // size prefix 2156 case 5: 2157 addr_nop_5(); 2158 break; 2159 case 4: 2160 addr_nop_4(); 2161 break; 2162 case 3: 2163 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2164 emit_byte(0x66); // size prefix 2165 case 2: 2166 emit_byte(0x66); // size prefix 2167 case 1: 2168 emit_byte(0x90); // nop 2169 break; 2170 default: 2171 assert(i == 0, " "); 2172 } 2173 return; 2174 } 2175 2176 // Using nops with size prefixes "0x66 0x90". 2177 // From AMD Optimization Guide: 2178 // 1: 0x90 2179 // 2: 0x66 0x90 2180 // 3: 0x66 0x66 0x90 2181 // 4: 0x66 0x66 0x66 0x90 2182 // 5: 0x66 0x66 0x90 0x66 0x90 2183 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2184 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2185 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2186 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2187 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2188 // 2189 while(i > 12) { 2190 i -= 4; 2191 emit_byte(0x66); // size prefix 2192 emit_byte(0x66); 2193 emit_byte(0x66); 2194 emit_byte(0x90); // nop 2195 } 2196 // 1 - 12 nops 2197 if(i > 8) { 2198 if(i > 9) { 2199 i -= 1; 2200 emit_byte(0x66); 2201 } 2202 i -= 3; 2203 emit_byte(0x66); 2204 emit_byte(0x66); 2205 emit_byte(0x90); 2206 } 2207 // 1 - 8 nops 2208 if(i > 4) { 2209 if(i > 6) { 2210 i -= 1; 2211 emit_byte(0x66); 2212 } 2213 i -= 3; 2214 emit_byte(0x66); 2215 emit_byte(0x66); 2216 emit_byte(0x90); 2217 } 2218 switch (i) { 2219 case 4: 2220 emit_byte(0x66); 2221 case 3: 2222 emit_byte(0x66); 2223 case 2: 2224 emit_byte(0x66); 2225 case 1: 2226 emit_byte(0x90); 2227 break; 2228 default: 2229 assert(i == 0, " "); 2230 } 2231 } 2232 2233 void Assembler::notl(Register dst) { 2234 int encode = prefix_and_encode(dst->encoding()); 2235 emit_byte(0xF7); 2236 emit_byte(0xD0 | encode ); 2237 } 2238 2239 void Assembler::orl(Address dst, int32_t imm32) { 2240 InstructionMark im(this); 2241 prefix(dst); 2242 emit_arith_operand(0x81, rcx, dst, imm32); 2243 } 2244 2245 void Assembler::orl(Register dst, int32_t imm32) { 2246 prefix(dst); 2247 emit_arith(0x81, 0xC8, dst, imm32); 2248 } 2249 2250 void Assembler::orl(Register dst, Address src) { 2251 InstructionMark im(this); 2252 prefix(src, dst); 2253 emit_byte(0x0B); 2254 emit_operand(dst, src); 2255 } 2256 2257 void Assembler::orl(Register dst, Register src) { 2258 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2259 emit_arith(0x0B, 0xC0, dst, src); 2260 } 2261 2262 void Assembler::packuswb(XMMRegister dst, Address src) { 2263 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2264 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2265 InstructionMark im(this); 2266 simd_prefix(dst, dst, src, VEX_SIMD_66); 2267 emit_byte(0x67); 2268 emit_operand(dst, src); 2269 } 2270 2271 void Assembler::packuswb(XMMRegister dst, XMMRegister src) { 2272 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2273 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2274 emit_byte(0x67); 2275 emit_byte(0xC0 | encode); 2276 } 2277 2278 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { 2279 assert(VM_Version::supports_sse4_2(), ""); 2280 InstructionMark im(this); 2281 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2282 emit_byte(0x61); 2283 emit_operand(dst, src); 2284 emit_byte(imm8); 2285 } 2286 2287 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { 2288 assert(VM_Version::supports_sse4_2(), ""); 2289 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2290 emit_byte(0x61); 2291 emit_byte(0xC0 | encode); 2292 emit_byte(imm8); 2293 } 2294 2295 void Assembler::pmovzxbw(XMMRegister dst, Address src) { 2296 assert(VM_Version::supports_sse4_1(), ""); 2297 InstructionMark im(this); 2298 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2299 emit_byte(0x30); 2300 emit_operand(dst, src); 2301 } 2302 2303 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { 2304 assert(VM_Version::supports_sse4_1(), ""); 2305 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2306 emit_byte(0x30); 2307 emit_byte(0xC0 | encode); 2308 } 2309 2310 // generic 2311 void Assembler::pop(Register dst) { 2312 int encode = prefix_and_encode(dst->encoding()); 2313 emit_byte(0x58 | encode); 2314 } 2315 2316 void Assembler::popcntl(Register dst, Address src) { 2317 assert(VM_Version::supports_popcnt(), "must support"); 2318 InstructionMark im(this); 2319 emit_byte(0xF3); 2320 prefix(src, dst); 2321 emit_byte(0x0F); 2322 emit_byte(0xB8); 2323 emit_operand(dst, src); 2324 } 2325 2326 void Assembler::popcntl(Register dst, Register src) { 2327 assert(VM_Version::supports_popcnt(), "must support"); 2328 emit_byte(0xF3); 2329 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2330 emit_byte(0x0F); 2331 emit_byte(0xB8); 2332 emit_byte(0xC0 | encode); 2333 } 2334 2335 void Assembler::popf() { 2336 emit_byte(0x9D); 2337 } 2338 2339 #ifndef _LP64 // no 32bit push/pop on amd64 2340 void Assembler::popl(Address dst) { 2341 // NOTE: this will adjust stack by 8byte on 64bits 2342 InstructionMark im(this); 2343 prefix(dst); 2344 emit_byte(0x8F); 2345 emit_operand(rax, dst); 2346 } 2347 #endif 2348 2349 void Assembler::prefetch_prefix(Address src) { 2350 prefix(src); 2351 emit_byte(0x0F); 2352 } 2353 2354 void Assembler::prefetchnta(Address src) { 2355 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2356 InstructionMark im(this); 2357 prefetch_prefix(src); 2358 emit_byte(0x18); 2359 emit_operand(rax, src); // 0, src 2360 } 2361 2362 void Assembler::prefetchr(Address src) { 2363 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2364 InstructionMark im(this); 2365 prefetch_prefix(src); 2366 emit_byte(0x0D); 2367 emit_operand(rax, src); // 0, src 2368 } 2369 2370 void Assembler::prefetcht0(Address src) { 2371 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2372 InstructionMark im(this); 2373 prefetch_prefix(src); 2374 emit_byte(0x18); 2375 emit_operand(rcx, src); // 1, src 2376 } 2377 2378 void Assembler::prefetcht1(Address src) { 2379 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2380 InstructionMark im(this); 2381 prefetch_prefix(src); 2382 emit_byte(0x18); 2383 emit_operand(rdx, src); // 2, src 2384 } 2385 2386 void Assembler::prefetcht2(Address src) { 2387 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2388 InstructionMark im(this); 2389 prefetch_prefix(src); 2390 emit_byte(0x18); 2391 emit_operand(rbx, src); // 3, src 2392 } 2393 2394 void Assembler::prefetchw(Address src) { 2395 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2396 InstructionMark im(this); 2397 prefetch_prefix(src); 2398 emit_byte(0x0D); 2399 emit_operand(rcx, src); // 1, src 2400 } 2401 2402 void Assembler::prefix(Prefix p) { 2403 a_byte(p); 2404 } 2405 2406 void Assembler::por(XMMRegister dst, XMMRegister src) { 2407 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2408 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2409 emit_byte(0xEB); 2410 emit_byte(0xC0 | encode); 2411 } 2412 2413 void Assembler::por(XMMRegister dst, Address src) { 2414 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2415 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2416 InstructionMark im(this); 2417 simd_prefix(dst, dst, src, VEX_SIMD_66); 2418 emit_byte(0xEB); 2419 emit_operand(dst, src); 2420 } 2421 2422 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 2423 assert(isByte(mode), "invalid value"); 2424 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2425 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 2426 emit_byte(0x70); 2427 emit_byte(0xC0 | encode); 2428 emit_byte(mode & 0xFF); 2429 2430 } 2431 2432 void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 2433 assert(isByte(mode), "invalid value"); 2434 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2435 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2436 InstructionMark im(this); 2437 simd_prefix(dst, src, VEX_SIMD_66); 2438 emit_byte(0x70); 2439 emit_operand(dst, src); 2440 emit_byte(mode & 0xFF); 2441 } 2442 2443 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 2444 assert(isByte(mode), "invalid value"); 2445 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2446 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); 2447 emit_byte(0x70); 2448 emit_byte(0xC0 | encode); 2449 emit_byte(mode & 0xFF); 2450 } 2451 2452 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 2453 assert(isByte(mode), "invalid value"); 2454 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2455 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2456 InstructionMark im(this); 2457 simd_prefix(dst, src, VEX_SIMD_F2); 2458 emit_byte(0x70); 2459 emit_operand(dst, src); 2460 emit_byte(mode & 0xFF); 2461 } 2462 2463 void Assembler::psrlq(XMMRegister dst, int shift) { 2464 // Shift 64 bit value logically right by specified number of bits. 2465 // HMM Table D-1 says sse2 or mmx. 2466 // Do not confuse it with psrldq SSE2 instruction which 2467 // shifts 128 bit value in xmm register by number of bytes. 2468 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2469 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 2470 emit_byte(0x73); 2471 emit_byte(0xC0 | encode); 2472 emit_byte(shift); 2473 } 2474 2475 void Assembler::psrldq(XMMRegister dst, int shift) { 2476 // Shift 128 bit value in xmm register by number of bytes. 2477 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2478 int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66); 2479 emit_byte(0x73); 2480 emit_byte(0xC0 | encode); 2481 emit_byte(shift); 2482 } 2483 2484 void Assembler::ptest(XMMRegister dst, Address src) { 2485 assert(VM_Version::supports_sse4_1(), ""); 2486 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2487 InstructionMark im(this); 2488 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2489 emit_byte(0x17); 2490 emit_operand(dst, src); 2491 } 2492 2493 void Assembler::ptest(XMMRegister dst, XMMRegister src) { 2494 assert(VM_Version::supports_sse4_1(), ""); 2495 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2496 emit_byte(0x17); 2497 emit_byte(0xC0 | encode); 2498 } 2499 2500 void Assembler::punpcklbw(XMMRegister dst, Address src) { 2501 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2502 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2503 InstructionMark im(this); 2504 simd_prefix(dst, dst, src, VEX_SIMD_66); 2505 emit_byte(0x60); 2506 emit_operand(dst, src); 2507 } 2508 2509 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 2510 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2511 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2512 emit_byte(0x60); 2513 emit_byte(0xC0 | encode); 2514 } 2515 2516 void Assembler::punpckldq(XMMRegister dst, Address src) { 2517 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2518 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2519 InstructionMark im(this); 2520 simd_prefix(dst, dst, src, VEX_SIMD_66); 2521 emit_byte(0x62); 2522 emit_operand(dst, src); 2523 } 2524 2525 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { 2526 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2527 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2528 emit_byte(0x62); 2529 emit_byte(0xC0 | encode); 2530 } 2531 2532 void Assembler::push(int32_t imm32) { 2533 // in 64bits we push 64bits onto the stack but only 2534 // take a 32bit immediate 2535 emit_byte(0x68); 2536 emit_long(imm32); 2537 } 2538 2539 void Assembler::push(Register src) { 2540 int encode = prefix_and_encode(src->encoding()); 2541 2542 emit_byte(0x50 | encode); 2543 } 2544 2545 void Assembler::pushf() { 2546 emit_byte(0x9C); 2547 } 2548 2549 #ifndef _LP64 // no 32bit push/pop on amd64 2550 void Assembler::pushl(Address src) { 2551 // Note this will push 64bit on 64bit 2552 InstructionMark im(this); 2553 prefix(src); 2554 emit_byte(0xFF); 2555 emit_operand(rsi, src); 2556 } 2557 #endif 2558 2559 void Assembler::pxor(XMMRegister dst, Address src) { 2560 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2561 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2562 InstructionMark im(this); 2563 simd_prefix(dst, dst, src, VEX_SIMD_66); 2564 emit_byte(0xEF); 2565 emit_operand(dst, src); 2566 } 2567 2568 void Assembler::pxor(XMMRegister dst, XMMRegister src) { 2569 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2570 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2571 emit_byte(0xEF); 2572 emit_byte(0xC0 | encode); 2573 } 2574 2575 void Assembler::rcll(Register dst, int imm8) { 2576 assert(isShiftCount(imm8), "illegal shift count"); 2577 int encode = prefix_and_encode(dst->encoding()); 2578 if (imm8 == 1) { 2579 emit_byte(0xD1); 2580 emit_byte(0xD0 | encode); 2581 } else { 2582 emit_byte(0xC1); 2583 emit_byte(0xD0 | encode); 2584 emit_byte(imm8); 2585 } 2586 } 2587 2588 // copies data from [esi] to [edi] using rcx pointer sized words 2589 // generic 2590 void Assembler::rep_mov() { 2591 emit_byte(0xF3); 2592 // MOVSQ 2593 LP64_ONLY(prefix(REX_W)); 2594 emit_byte(0xA5); 2595 } 2596 2597 // sets rcx pointer sized words with rax, value at [edi] 2598 // generic 2599 void Assembler::rep_set() { // rep_set 2600 emit_byte(0xF3); 2601 // STOSQ 2602 LP64_ONLY(prefix(REX_W)); 2603 emit_byte(0xAB); 2604 } 2605 2606 // scans rcx pointer sized words at [edi] for occurance of rax, 2607 // generic 2608 void Assembler::repne_scan() { // repne_scan 2609 emit_byte(0xF2); 2610 // SCASQ 2611 LP64_ONLY(prefix(REX_W)); 2612 emit_byte(0xAF); 2613 } 2614 2615 #ifdef _LP64 2616 // scans rcx 4 byte words at [edi] for occurance of rax, 2617 // generic 2618 void Assembler::repne_scanl() { // repne_scan 2619 emit_byte(0xF2); 2620 // SCASL 2621 emit_byte(0xAF); 2622 } 2623 #endif 2624 2625 void Assembler::ret(int imm16) { 2626 if (imm16 == 0) { 2627 emit_byte(0xC3); 2628 } else { 2629 emit_byte(0xC2); 2630 emit_word(imm16); 2631 } 2632 } 2633 2634 void Assembler::sahf() { 2635 #ifdef _LP64 2636 // Not supported in 64bit mode 2637 ShouldNotReachHere(); 2638 #endif 2639 emit_byte(0x9E); 2640 } 2641 2642 void Assembler::sarl(Register dst, int imm8) { 2643 int encode = prefix_and_encode(dst->encoding()); 2644 assert(isShiftCount(imm8), "illegal shift count"); 2645 if (imm8 == 1) { 2646 emit_byte(0xD1); 2647 emit_byte(0xF8 | encode); 2648 } else { 2649 emit_byte(0xC1); 2650 emit_byte(0xF8 | encode); 2651 emit_byte(imm8); 2652 } 2653 } 2654 2655 void Assembler::sarl(Register dst) { 2656 int encode = prefix_and_encode(dst->encoding()); 2657 emit_byte(0xD3); 2658 emit_byte(0xF8 | encode); 2659 } 2660 2661 void Assembler::sbbl(Address dst, int32_t imm32) { 2662 InstructionMark im(this); 2663 prefix(dst); 2664 emit_arith_operand(0x81, rbx, dst, imm32); 2665 } 2666 2667 void Assembler::sbbl(Register dst, int32_t imm32) { 2668 prefix(dst); 2669 emit_arith(0x81, 0xD8, dst, imm32); 2670 } 2671 2672 2673 void Assembler::sbbl(Register dst, Address src) { 2674 InstructionMark im(this); 2675 prefix(src, dst); 2676 emit_byte(0x1B); 2677 emit_operand(dst, src); 2678 } 2679 2680 void Assembler::sbbl(Register dst, Register src) { 2681 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2682 emit_arith(0x1B, 0xC0, dst, src); 2683 } 2684 2685 void Assembler::setb(Condition cc, Register dst) { 2686 assert(0 <= cc && cc < 16, "illegal cc"); 2687 int encode = prefix_and_encode(dst->encoding(), true); 2688 emit_byte(0x0F); 2689 emit_byte(0x90 | cc); 2690 emit_byte(0xC0 | encode); 2691 } 2692 2693 void Assembler::shll(Register dst, int imm8) { 2694 assert(isShiftCount(imm8), "illegal shift count"); 2695 int encode = prefix_and_encode(dst->encoding()); 2696 if (imm8 == 1 ) { 2697 emit_byte(0xD1); 2698 emit_byte(0xE0 | encode); 2699 } else { 2700 emit_byte(0xC1); 2701 emit_byte(0xE0 | encode); 2702 emit_byte(imm8); 2703 } 2704 } 2705 2706 void Assembler::shll(Register dst) { 2707 int encode = prefix_and_encode(dst->encoding()); 2708 emit_byte(0xD3); 2709 emit_byte(0xE0 | encode); 2710 } 2711 2712 void Assembler::shrl(Register dst, int imm8) { 2713 assert(isShiftCount(imm8), "illegal shift count"); 2714 int encode = prefix_and_encode(dst->encoding()); 2715 emit_byte(0xC1); 2716 emit_byte(0xE8 | encode); 2717 emit_byte(imm8); 2718 } 2719 2720 void Assembler::shrl(Register dst) { 2721 int encode = prefix_and_encode(dst->encoding()); 2722 emit_byte(0xD3); 2723 emit_byte(0xE8 | encode); 2724 } 2725 2726 // copies a single word from [esi] to [edi] 2727 void Assembler::smovl() { 2728 emit_byte(0xA5); 2729 } 2730 2731 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { 2732 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2733 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 2734 emit_byte(0x51); 2735 emit_byte(0xC0 | encode); 2736 } 2737 2738 void Assembler::sqrtsd(XMMRegister dst, Address src) { 2739 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2740 InstructionMark im(this); 2741 simd_prefix(dst, dst, src, VEX_SIMD_F2); 2742 emit_byte(0x51); 2743 emit_operand(dst, src); 2744 } 2745 2746 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { 2747 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2748 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 2749 emit_byte(0x51); 2750 emit_byte(0xC0 | encode); 2751 } 2752 2753 void Assembler::sqrtss(XMMRegister dst, Address src) { 2754 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2755 InstructionMark im(this); 2756 simd_prefix(dst, dst, src, VEX_SIMD_F3); 2757 emit_byte(0x51); 2758 emit_operand(dst, src); 2759 } 2760 2761 void Assembler::stmxcsr( Address dst) { 2762 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2763 InstructionMark im(this); 2764 prefix(dst); 2765 emit_byte(0x0F); 2766 emit_byte(0xAE); 2767 emit_operand(as_Register(3), dst); 2768 } 2769 2770 void Assembler::subl(Address dst, int32_t imm32) { 2771 InstructionMark im(this); 2772 prefix(dst); 2773 emit_arith_operand(0x81, rbp, dst, imm32); 2774 } 2775 2776 void Assembler::subl(Address dst, Register src) { 2777 InstructionMark im(this); 2778 prefix(dst, src); 2779 emit_byte(0x29); 2780 emit_operand(src, dst); 2781 } 2782 2783 void Assembler::subl(Register dst, int32_t imm32) { 2784 prefix(dst); 2785 emit_arith(0x81, 0xE8, dst, imm32); 2786 } 2787 2788 // Force generation of a 4 byte immediate value even if it fits into 8bit 2789 void Assembler::subl_imm32(Register dst, int32_t imm32) { 2790 prefix(dst); 2791 emit_arith_imm32(0x81, 0xE8, dst, imm32); 2792 } 2793 2794 void Assembler::subl(Register dst, Address src) { 2795 InstructionMark im(this); 2796 prefix(src, dst); 2797 emit_byte(0x2B); 2798 emit_operand(dst, src); 2799 } 2800 2801 void Assembler::subl(Register dst, Register src) { 2802 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2803 emit_arith(0x2B, 0xC0, dst, src); 2804 } 2805 2806 void Assembler::subsd(XMMRegister dst, XMMRegister src) { 2807 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2808 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 2809 emit_byte(0x5C); 2810 emit_byte(0xC0 | encode); 2811 } 2812 2813 void Assembler::subsd(XMMRegister dst, Address src) { 2814 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2815 InstructionMark im(this); 2816 simd_prefix(dst, dst, src, VEX_SIMD_F2); 2817 emit_byte(0x5C); 2818 emit_operand(dst, src); 2819 } 2820 2821 void Assembler::subss(XMMRegister dst, XMMRegister src) { 2822 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2823 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 2824 emit_byte(0x5C); 2825 emit_byte(0xC0 | encode); 2826 } 2827 2828 void Assembler::subss(XMMRegister dst, Address src) { 2829 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2830 InstructionMark im(this); 2831 simd_prefix(dst, dst, src, VEX_SIMD_F3); 2832 emit_byte(0x5C); 2833 emit_operand(dst, src); 2834 } 2835 2836 void Assembler::testb(Register dst, int imm8) { 2837 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 2838 (void) prefix_and_encode(dst->encoding(), true); 2839 emit_arith_b(0xF6, 0xC0, dst, imm8); 2840 } 2841 2842 void Assembler::testl(Register dst, int32_t imm32) { 2843 // not using emit_arith because test 2844 // doesn't support sign-extension of 2845 // 8bit operands 2846 int encode = dst->encoding(); 2847 if (encode == 0) { 2848 emit_byte(0xA9); 2849 } else { 2850 encode = prefix_and_encode(encode); 2851 emit_byte(0xF7); 2852 emit_byte(0xC0 | encode); 2853 } 2854 emit_long(imm32); 2855 } 2856 2857 void Assembler::testl(Register dst, Register src) { 2858 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2859 emit_arith(0x85, 0xC0, dst, src); 2860 } 2861 2862 void Assembler::testl(Register dst, Address src) { 2863 InstructionMark im(this); 2864 prefix(src, dst); 2865 emit_byte(0x85); 2866 emit_operand(dst, src); 2867 } 2868 2869 void Assembler::ucomisd(XMMRegister dst, Address src) { 2870 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2871 InstructionMark im(this); 2872 simd_prefix(dst, src, VEX_SIMD_66); 2873 emit_byte(0x2E); 2874 emit_operand(dst, src); 2875 } 2876 2877 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { 2878 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2879 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 2880 emit_byte(0x2E); 2881 emit_byte(0xC0 | encode); 2882 } 2883 2884 void Assembler::ucomiss(XMMRegister dst, Address src) { 2885 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2886 InstructionMark im(this); 2887 simd_prefix(dst, src, VEX_SIMD_NONE); 2888 emit_byte(0x2E); 2889 emit_operand(dst, src); 2890 } 2891 2892 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { 2893 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2894 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); 2895 emit_byte(0x2E); 2896 emit_byte(0xC0 | encode); 2897 } 2898 2899 2900 void Assembler::xaddl(Address dst, Register src) { 2901 InstructionMark im(this); 2902 prefix(dst, src); 2903 emit_byte(0x0F); 2904 emit_byte(0xC1); 2905 emit_operand(src, dst); 2906 } 2907 2908 void Assembler::xchgl(Register dst, Address src) { // xchg 2909 InstructionMark im(this); 2910 prefix(src, dst); 2911 emit_byte(0x87); 2912 emit_operand(dst, src); 2913 } 2914 2915 void Assembler::xchgl(Register dst, Register src) { 2916 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2917 emit_byte(0x87); 2918 emit_byte(0xc0 | encode); 2919 } 2920 2921 void Assembler::xorl(Register dst, int32_t imm32) { 2922 prefix(dst); 2923 emit_arith(0x81, 0xF0, dst, imm32); 2924 } 2925 2926 void Assembler::xorl(Register dst, Address src) { 2927 InstructionMark im(this); 2928 prefix(src, dst); 2929 emit_byte(0x33); 2930 emit_operand(dst, src); 2931 } 2932 2933 void Assembler::xorl(Register dst, Register src) { 2934 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2935 emit_arith(0x33, 0xC0, dst, src); 2936 } 2937 2938 void Assembler::xorpd(XMMRegister dst, XMMRegister src) { 2939 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2940 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); 2941 emit_byte(0x57); 2942 emit_byte(0xC0 | encode); 2943 } 2944 2945 void Assembler::xorpd(XMMRegister dst, Address src) { 2946 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2947 InstructionMark im(this); 2948 simd_prefix(dst, dst, src, VEX_SIMD_66); 2949 emit_byte(0x57); 2950 emit_operand(dst, src); 2951 } 2952 2953 2954 void Assembler::xorps(XMMRegister dst, XMMRegister src) { 2955 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2956 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); 2957 emit_byte(0x57); 2958 emit_byte(0xC0 | encode); 2959 } 2960 2961 void Assembler::xorps(XMMRegister dst, Address src) { 2962 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2963 InstructionMark im(this); 2964 simd_prefix(dst, dst, src, VEX_SIMD_NONE); 2965 emit_byte(0x57); 2966 emit_operand(dst, src); 2967 } 2968 2969 // AVX 3-operands non destructive source instructions (encoded with VEX prefix) 2970 2971 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { 2972 assert(VM_Version::supports_avx(), ""); 2973 InstructionMark im(this); 2974 vex_prefix(dst, nds, src, VEX_SIMD_F2); 2975 emit_byte(0x58); 2976 emit_operand(dst, src); 2977 } 2978 2979 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2980 assert(VM_Version::supports_avx(), ""); 2981 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 2982 emit_byte(0x58); 2983 emit_byte(0xC0 | encode); 2984 } 2985 2986 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { 2987 assert(VM_Version::supports_avx(), ""); 2988 InstructionMark im(this); 2989 vex_prefix(dst, nds, src, VEX_SIMD_F3); 2990 emit_byte(0x58); 2991 emit_operand(dst, src); 2992 } 2993 2994 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2995 assert(VM_Version::supports_avx(), ""); 2996 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 2997 emit_byte(0x58); 2998 emit_byte(0xC0 | encode); 2999 } 3000 3001 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) { 3002 assert(VM_Version::supports_avx(), ""); 3003 InstructionMark im(this); 3004 vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector 3005 emit_byte(0x54); 3006 emit_operand(dst, src); 3007 } 3008 3009 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) { 3010 assert(VM_Version::supports_avx(), ""); 3011 InstructionMark im(this); 3012 vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector 3013 emit_byte(0x54); 3014 emit_operand(dst, src); 3015 } 3016 3017 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { 3018 assert(VM_Version::supports_avx(), ""); 3019 InstructionMark im(this); 3020 vex_prefix(dst, nds, src, VEX_SIMD_F2); 3021 emit_byte(0x5E); 3022 emit_operand(dst, src); 3023 } 3024 3025 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3026 assert(VM_Version::supports_avx(), ""); 3027 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 3028 emit_byte(0x5E); 3029 emit_byte(0xC0 | encode); 3030 } 3031 3032 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { 3033 assert(VM_Version::supports_avx(), ""); 3034 InstructionMark im(this); 3035 vex_prefix(dst, nds, src, VEX_SIMD_F3); 3036 emit_byte(0x5E); 3037 emit_operand(dst, src); 3038 } 3039 3040 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3041 assert(VM_Version::supports_avx(), ""); 3042 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 3043 emit_byte(0x5E); 3044 emit_byte(0xC0 | encode); 3045 } 3046 3047 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { 3048 assert(VM_Version::supports_avx(), ""); 3049 InstructionMark im(this); 3050 vex_prefix(dst, nds, src, VEX_SIMD_F2); 3051 emit_byte(0x59); 3052 emit_operand(dst, src); 3053 } 3054 3055 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3056 assert(VM_Version::supports_avx(), ""); 3057 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 3058 emit_byte(0x59); 3059 emit_byte(0xC0 | encode); 3060 } 3061 3062 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { 3063 InstructionMark im(this); 3064 vex_prefix(dst, nds, src, VEX_SIMD_F3); 3065 emit_byte(0x59); 3066 emit_operand(dst, src); 3067 } 3068 3069 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3070 assert(VM_Version::supports_avx(), ""); 3071 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 3072 emit_byte(0x59); 3073 emit_byte(0xC0 | encode); 3074 } 3075 3076 3077 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { 3078 assert(VM_Version::supports_avx(), ""); 3079 InstructionMark im(this); 3080 vex_prefix(dst, nds, src, VEX_SIMD_F2); 3081 emit_byte(0x5C); 3082 emit_operand(dst, src); 3083 } 3084 3085 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3086 assert(VM_Version::supports_avx(), ""); 3087 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); 3088 emit_byte(0x5C); 3089 emit_byte(0xC0 | encode); 3090 } 3091 3092 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { 3093 assert(VM_Version::supports_avx(), ""); 3094 InstructionMark im(this); 3095 vex_prefix(dst, nds, src, VEX_SIMD_F3); 3096 emit_byte(0x5C); 3097 emit_operand(dst, src); 3098 } 3099 3100 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3101 assert(VM_Version::supports_avx(), ""); 3102 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); 3103 emit_byte(0x5C); 3104 emit_byte(0xC0 | encode); 3105 } 3106 3107 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) { 3108 assert(VM_Version::supports_avx(), ""); 3109 InstructionMark im(this); 3110 vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector 3111 emit_byte(0x57); 3112 emit_operand(dst, src); 3113 } 3114 3115 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) { 3116 assert(VM_Version::supports_avx(), ""); 3117 InstructionMark im(this); 3118 vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector 3119 emit_byte(0x57); 3120 emit_operand(dst, src); 3121 } 3122 3123 3124 #ifndef _LP64 3125 // 32bit only pieces of the assembler 3126 3127 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { 3128 // NO PREFIX AS NEVER 64BIT 3129 InstructionMark im(this); 3130 emit_byte(0x81); 3131 emit_byte(0xF8 | src1->encoding()); 3132 emit_data(imm32, rspec, 0); 3133 } 3134 3135 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { 3136 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs 3137 InstructionMark im(this); 3138 emit_byte(0x81); 3139 emit_operand(rdi, src1); 3140 emit_data(imm32, rspec, 0); 3141 } 3142 3143 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax, 3144 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded 3145 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. 3146 void Assembler::cmpxchg8(Address adr) { 3147 InstructionMark im(this); 3148 emit_byte(0x0F); 3149 emit_byte(0xc7); 3150 emit_operand(rcx, adr); 3151 } 3152 3153 void Assembler::decl(Register dst) { 3154 // Don't use it directly. Use MacroAssembler::decrementl() instead. 3155 emit_byte(0x48 | dst->encoding()); 3156 } 3157 3158 #endif // _LP64 3159 3160 // 64bit typically doesn't use the x87 but needs to for the trig funcs 3161 3162 void Assembler::fabs() { 3163 emit_byte(0xD9); 3164 emit_byte(0xE1); 3165 } 3166 3167 void Assembler::fadd(int i) { 3168 emit_farith(0xD8, 0xC0, i); 3169 } 3170 3171 void Assembler::fadd_d(Address src) { 3172 InstructionMark im(this); 3173 emit_byte(0xDC); 3174 emit_operand32(rax, src); 3175 } 3176 3177 void Assembler::fadd_s(Address src) { 3178 InstructionMark im(this); 3179 emit_byte(0xD8); 3180 emit_operand32(rax, src); 3181 } 3182 3183 void Assembler::fadda(int i) { 3184 emit_farith(0xDC, 0xC0, i); 3185 } 3186 3187 void Assembler::faddp(int i) { 3188 emit_farith(0xDE, 0xC0, i); 3189 } 3190 3191 void Assembler::fchs() { 3192 emit_byte(0xD9); 3193 emit_byte(0xE0); 3194 } 3195 3196 void Assembler::fcom(int i) { 3197 emit_farith(0xD8, 0xD0, i); 3198 } 3199 3200 void Assembler::fcomp(int i) { 3201 emit_farith(0xD8, 0xD8, i); 3202 } 3203 3204 void Assembler::fcomp_d(Address src) { 3205 InstructionMark im(this); 3206 emit_byte(0xDC); 3207 emit_operand32(rbx, src); 3208 } 3209 3210 void Assembler::fcomp_s(Address src) { 3211 InstructionMark im(this); 3212 emit_byte(0xD8); 3213 emit_operand32(rbx, src); 3214 } 3215 3216 void Assembler::fcompp() { 3217 emit_byte(0xDE); 3218 emit_byte(0xD9); 3219 } 3220 3221 void Assembler::fcos() { 3222 emit_byte(0xD9); 3223 emit_byte(0xFF); 3224 } 3225 3226 void Assembler::fdecstp() { 3227 emit_byte(0xD9); 3228 emit_byte(0xF6); 3229 } 3230 3231 void Assembler::fdiv(int i) { 3232 emit_farith(0xD8, 0xF0, i); 3233 } 3234 3235 void Assembler::fdiv_d(Address src) { 3236 InstructionMark im(this); 3237 emit_byte(0xDC); 3238 emit_operand32(rsi, src); 3239 } 3240 3241 void Assembler::fdiv_s(Address src) { 3242 InstructionMark im(this); 3243 emit_byte(0xD8); 3244 emit_operand32(rsi, src); 3245 } 3246 3247 void Assembler::fdiva(int i) { 3248 emit_farith(0xDC, 0xF8, i); 3249 } 3250 3251 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) 3252 // is erroneous for some of the floating-point instructions below. 3253 3254 void Assembler::fdivp(int i) { 3255 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) 3256 } 3257 3258 void Assembler::fdivr(int i) { 3259 emit_farith(0xD8, 0xF8, i); 3260 } 3261 3262 void Assembler::fdivr_d(Address src) { 3263 InstructionMark im(this); 3264 emit_byte(0xDC); 3265 emit_operand32(rdi, src); 3266 } 3267 3268 void Assembler::fdivr_s(Address src) { 3269 InstructionMark im(this); 3270 emit_byte(0xD8); 3271 emit_operand32(rdi, src); 3272 } 3273 3274 void Assembler::fdivra(int i) { 3275 emit_farith(0xDC, 0xF0, i); 3276 } 3277 3278 void Assembler::fdivrp(int i) { 3279 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) 3280 } 3281 3282 void Assembler::ffree(int i) { 3283 emit_farith(0xDD, 0xC0, i); 3284 } 3285 3286 void Assembler::fild_d(Address adr) { 3287 InstructionMark im(this); 3288 emit_byte(0xDF); 3289 emit_operand32(rbp, adr); 3290 } 3291 3292 void Assembler::fild_s(Address adr) { 3293 InstructionMark im(this); 3294 emit_byte(0xDB); 3295 emit_operand32(rax, adr); 3296 } 3297 3298 void Assembler::fincstp() { 3299 emit_byte(0xD9); 3300 emit_byte(0xF7); 3301 } 3302 3303 void Assembler::finit() { 3304 emit_byte(0x9B); 3305 emit_byte(0xDB); 3306 emit_byte(0xE3); 3307 } 3308 3309 void Assembler::fist_s(Address adr) { 3310 InstructionMark im(this); 3311 emit_byte(0xDB); 3312 emit_operand32(rdx, adr); 3313 } 3314 3315 void Assembler::fistp_d(Address adr) { 3316 InstructionMark im(this); 3317 emit_byte(0xDF); 3318 emit_operand32(rdi, adr); 3319 } 3320 3321 void Assembler::fistp_s(Address adr) { 3322 InstructionMark im(this); 3323 emit_byte(0xDB); 3324 emit_operand32(rbx, adr); 3325 } 3326 3327 void Assembler::fld1() { 3328 emit_byte(0xD9); 3329 emit_byte(0xE8); 3330 } 3331 3332 void Assembler::fld_d(Address adr) { 3333 InstructionMark im(this); 3334 emit_byte(0xDD); 3335 emit_operand32(rax, adr); 3336 } 3337 3338 void Assembler::fld_s(Address adr) { 3339 InstructionMark im(this); 3340 emit_byte(0xD9); 3341 emit_operand32(rax, adr); 3342 } 3343 3344 3345 void Assembler::fld_s(int index) { 3346 emit_farith(0xD9, 0xC0, index); 3347 } 3348 3349 void Assembler::fld_x(Address adr) { 3350 InstructionMark im(this); 3351 emit_byte(0xDB); 3352 emit_operand32(rbp, adr); 3353 } 3354 3355 void Assembler::fldcw(Address src) { 3356 InstructionMark im(this); 3357 emit_byte(0xd9); 3358 emit_operand32(rbp, src); 3359 } 3360 3361 void Assembler::fldenv(Address src) { 3362 InstructionMark im(this); 3363 emit_byte(0xD9); 3364 emit_operand32(rsp, src); 3365 } 3366 3367 void Assembler::fldlg2() { 3368 emit_byte(0xD9); 3369 emit_byte(0xEC); 3370 } 3371 3372 void Assembler::fldln2() { 3373 emit_byte(0xD9); 3374 emit_byte(0xED); 3375 } 3376 3377 void Assembler::fldz() { 3378 emit_byte(0xD9); 3379 emit_byte(0xEE); 3380 } 3381 3382 void Assembler::flog() { 3383 fldln2(); 3384 fxch(); 3385 fyl2x(); 3386 } 3387 3388 void Assembler::flog10() { 3389 fldlg2(); 3390 fxch(); 3391 fyl2x(); 3392 } 3393 3394 void Assembler::fmul(int i) { 3395 emit_farith(0xD8, 0xC8, i); 3396 } 3397 3398 void Assembler::fmul_d(Address src) { 3399 InstructionMark im(this); 3400 emit_byte(0xDC); 3401 emit_operand32(rcx, src); 3402 } 3403 3404 void Assembler::fmul_s(Address src) { 3405 InstructionMark im(this); 3406 emit_byte(0xD8); 3407 emit_operand32(rcx, src); 3408 } 3409 3410 void Assembler::fmula(int i) { 3411 emit_farith(0xDC, 0xC8, i); 3412 } 3413 3414 void Assembler::fmulp(int i) { 3415 emit_farith(0xDE, 0xC8, i); 3416 } 3417 3418 void Assembler::fnsave(Address dst) { 3419 InstructionMark im(this); 3420 emit_byte(0xDD); 3421 emit_operand32(rsi, dst); 3422 } 3423 3424 void Assembler::fnstcw(Address src) { 3425 InstructionMark im(this); 3426 emit_byte(0x9B); 3427 emit_byte(0xD9); 3428 emit_operand32(rdi, src); 3429 } 3430 3431 void Assembler::fnstsw_ax() { 3432 emit_byte(0xdF); 3433 emit_byte(0xE0); 3434 } 3435 3436 void Assembler::fprem() { 3437 emit_byte(0xD9); 3438 emit_byte(0xF8); 3439 } 3440 3441 void Assembler::fprem1() { 3442 emit_byte(0xD9); 3443 emit_byte(0xF5); 3444 } 3445 3446 void Assembler::frstor(Address src) { 3447 InstructionMark im(this); 3448 emit_byte(0xDD); 3449 emit_operand32(rsp, src); 3450 } 3451 3452 void Assembler::fsin() { 3453 emit_byte(0xD9); 3454 emit_byte(0xFE); 3455 } 3456 3457 void Assembler::fsqrt() { 3458 emit_byte(0xD9); 3459 emit_byte(0xFA); 3460 } 3461 3462 void Assembler::fst_d(Address adr) { 3463 InstructionMark im(this); 3464 emit_byte(0xDD); 3465 emit_operand32(rdx, adr); 3466 } 3467 3468 void Assembler::fst_s(Address adr) { 3469 InstructionMark im(this); 3470 emit_byte(0xD9); 3471 emit_operand32(rdx, adr); 3472 } 3473 3474 void Assembler::fstp_d(Address adr) { 3475 InstructionMark im(this); 3476 emit_byte(0xDD); 3477 emit_operand32(rbx, adr); 3478 } 3479 3480 void Assembler::fstp_d(int index) { 3481 emit_farith(0xDD, 0xD8, index); 3482 } 3483 3484 void Assembler::fstp_s(Address adr) { 3485 InstructionMark im(this); 3486 emit_byte(0xD9); 3487 emit_operand32(rbx, adr); 3488 } 3489 3490 void Assembler::fstp_x(Address adr) { 3491 InstructionMark im(this); 3492 emit_byte(0xDB); 3493 emit_operand32(rdi, adr); 3494 } 3495 3496 void Assembler::fsub(int i) { 3497 emit_farith(0xD8, 0xE0, i); 3498 } 3499 3500 void Assembler::fsub_d(Address src) { 3501 InstructionMark im(this); 3502 emit_byte(0xDC); 3503 emit_operand32(rsp, src); 3504 } 3505 3506 void Assembler::fsub_s(Address src) { 3507 InstructionMark im(this); 3508 emit_byte(0xD8); 3509 emit_operand32(rsp, src); 3510 } 3511 3512 void Assembler::fsuba(int i) { 3513 emit_farith(0xDC, 0xE8, i); 3514 } 3515 3516 void Assembler::fsubp(int i) { 3517 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) 3518 } 3519 3520 void Assembler::fsubr(int i) { 3521 emit_farith(0xD8, 0xE8, i); 3522 } 3523 3524 void Assembler::fsubr_d(Address src) { 3525 InstructionMark im(this); 3526 emit_byte(0xDC); 3527 emit_operand32(rbp, src); 3528 } 3529 3530 void Assembler::fsubr_s(Address src) { 3531 InstructionMark im(this); 3532 emit_byte(0xD8); 3533 emit_operand32(rbp, src); 3534 } 3535 3536 void Assembler::fsubra(int i) { 3537 emit_farith(0xDC, 0xE0, i); 3538 } 3539 3540 void Assembler::fsubrp(int i) { 3541 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) 3542 } 3543 3544 void Assembler::ftan() { 3545 emit_byte(0xD9); 3546 emit_byte(0xF2); 3547 emit_byte(0xDD); 3548 emit_byte(0xD8); 3549 } 3550 3551 void Assembler::ftst() { 3552 emit_byte(0xD9); 3553 emit_byte(0xE4); 3554 } 3555 3556 void Assembler::fucomi(int i) { 3557 // make sure the instruction is supported (introduced for P6, together with cmov) 3558 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3559 emit_farith(0xDB, 0xE8, i); 3560 } 3561 3562 void Assembler::fucomip(int i) { 3563 // make sure the instruction is supported (introduced for P6, together with cmov) 3564 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3565 emit_farith(0xDF, 0xE8, i); 3566 } 3567 3568 void Assembler::fwait() { 3569 emit_byte(0x9B); 3570 } 3571 3572 void Assembler::fxch(int i) { 3573 emit_farith(0xD9, 0xC8, i); 3574 } 3575 3576 void Assembler::fyl2x() { 3577 emit_byte(0xD9); 3578 emit_byte(0xF1); 3579 } 3580 3581 void Assembler::frndint() { 3582 emit_byte(0xD9); 3583 emit_byte(0xFC); 3584 } 3585 3586 void Assembler::f2xm1() { 3587 emit_byte(0xD9); 3588 emit_byte(0xF0); 3589 } 3590 3591 void Assembler::fldl2e() { 3592 emit_byte(0xD9); 3593 emit_byte(0xEA); 3594 } 3595 3596 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. 3597 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; 3598 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding. 3599 static int simd_opc[4] = { 0, 0, 0x38, 0x3A }; 3600 3601 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding. 3602 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 3603 if (pre > 0) { 3604 emit_byte(simd_pre[pre]); 3605 } 3606 if (rex_w) { 3607 prefixq(adr, xreg); 3608 } else { 3609 prefix(adr, xreg); 3610 } 3611 if (opc > 0) { 3612 emit_byte(0x0F); 3613 int opc2 = simd_opc[opc]; 3614 if (opc2 > 0) { 3615 emit_byte(opc2); 3616 } 3617 } 3618 } 3619 3620 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 3621 if (pre > 0) { 3622 emit_byte(simd_pre[pre]); 3623 } 3624 int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : 3625 prefix_and_encode(dst_enc, src_enc); 3626 if (opc > 0) { 3627 emit_byte(0x0F); 3628 int opc2 = simd_opc[opc]; 3629 if (opc2 > 0) { 3630 emit_byte(opc2); 3631 } 3632 } 3633 return encode; 3634 } 3635 3636 3637 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) { 3638 if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { 3639 prefix(VEX_3bytes); 3640 3641 int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0); 3642 byte1 = (~byte1) & 0xE0; 3643 byte1 |= opc; 3644 a_byte(byte1); 3645 3646 int byte2 = ((~nds_enc) & 0xf) << 3; 3647 byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre; 3648 emit_byte(byte2); 3649 } else { 3650 prefix(VEX_2bytes); 3651 3652 int byte1 = vex_r ? VEX_R : 0; 3653 byte1 = (~byte1) & 0x80; 3654 byte1 |= ((~nds_enc) & 0xf) << 3; 3655 byte1 |= (vector256 ? 4 : 0) | pre; 3656 emit_byte(byte1); 3657 } 3658 } 3659 3660 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){ 3661 bool vex_r = (xreg_enc >= 8); 3662 bool vex_b = adr.base_needs_rex(); 3663 bool vex_x = adr.index_needs_rex(); 3664 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 3665 } 3666 3667 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) { 3668 bool vex_r = (dst_enc >= 8); 3669 bool vex_b = (src_enc >= 8); 3670 bool vex_x = false; 3671 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 3672 return (((dst_enc & 7) << 3) | (src_enc & 7)); 3673 } 3674 3675 3676 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 3677 if (UseAVX > 0) { 3678 int xreg_enc = xreg->encoding(); 3679 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 3680 vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256); 3681 } else { 3682 assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); 3683 rex_prefix(adr, xreg, pre, opc, rex_w); 3684 } 3685 } 3686 3687 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 3688 int dst_enc = dst->encoding(); 3689 int src_enc = src->encoding(); 3690 if (UseAVX > 0) { 3691 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 3692 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256); 3693 } else { 3694 assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); 3695 return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); 3696 } 3697 } 3698 3699 #ifndef _LP64 3700 3701 void Assembler::incl(Register dst) { 3702 // Don't use it directly. Use MacroAssembler::incrementl() instead. 3703 emit_byte(0x40 | dst->encoding()); 3704 } 3705 3706 void Assembler::lea(Register dst, Address src) { 3707 leal(dst, src); 3708 } 3709 3710 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { 3711 InstructionMark im(this); 3712 emit_byte(0xC7); 3713 emit_operand(rax, dst); 3714 emit_data((int)imm32, rspec, 0); 3715 } 3716 3717 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { 3718 InstructionMark im(this); 3719 int encode = prefix_and_encode(dst->encoding()); 3720 emit_byte(0xB8 | encode); 3721 emit_data((int)imm32, rspec, 0); 3722 } 3723 3724 void Assembler::popa() { // 32bit 3725 emit_byte(0x61); 3726 } 3727 3728 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { 3729 InstructionMark im(this); 3730 emit_byte(0x68); 3731 emit_data(imm32, rspec, 0); 3732 } 3733 3734 void Assembler::pusha() { // 32bit 3735 emit_byte(0x60); 3736 } 3737 3738 void Assembler::set_byte_if_not_zero(Register dst) { 3739 emit_byte(0x0F); 3740 emit_byte(0x95); 3741 emit_byte(0xE0 | dst->encoding()); 3742 } 3743 3744 void Assembler::shldl(Register dst, Register src) { 3745 emit_byte(0x0F); 3746 emit_byte(0xA5); 3747 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3748 } 3749 3750 void Assembler::shrdl(Register dst, Register src) { 3751 emit_byte(0x0F); 3752 emit_byte(0xAD); 3753 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 3754 } 3755 3756 #else // LP64 3757 3758 void Assembler::set_byte_if_not_zero(Register dst) { 3759 int enc = prefix_and_encode(dst->encoding(), true); 3760 emit_byte(0x0F); 3761 emit_byte(0x95); 3762 emit_byte(0xE0 | enc); 3763 } 3764 3765 // 64bit only pieces of the assembler 3766 // This should only be used by 64bit instructions that can use rip-relative 3767 // it cannot be used by instructions that want an immediate value. 3768 3769 bool Assembler::reachable(AddressLiteral adr) { 3770 int64_t disp; 3771 // None will force a 64bit literal to the code stream. Likely a placeholder 3772 // for something that will be patched later and we need to certain it will 3773 // always be reachable. 3774 if (adr.reloc() == relocInfo::none) { 3775 return false; 3776 } 3777 if (adr.reloc() == relocInfo::internal_word_type) { 3778 // This should be rip relative and easily reachable. 3779 return true; 3780 } 3781 if (adr.reloc() == relocInfo::virtual_call_type || 3782 adr.reloc() == relocInfo::opt_virtual_call_type || 3783 adr.reloc() == relocInfo::static_call_type || 3784 adr.reloc() == relocInfo::static_stub_type ) { 3785 // This should be rip relative within the code cache and easily 3786 // reachable until we get huge code caches. (At which point 3787 // ic code is going to have issues). 3788 return true; 3789 } 3790 if (adr.reloc() != relocInfo::external_word_type && 3791 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special 3792 adr.reloc() != relocInfo::poll_type && // relocs to identify them 3793 adr.reloc() != relocInfo::runtime_call_type ) { 3794 return false; 3795 } 3796 3797 // Stress the correction code 3798 if (ForceUnreachable) { 3799 // Must be runtimecall reloc, see if it is in the codecache 3800 // Flipping stuff in the codecache to be unreachable causes issues 3801 // with things like inline caches where the additional instructions 3802 // are not handled. 3803 if (CodeCache::find_blob(adr._target) == NULL) { 3804 return false; 3805 } 3806 } 3807 // For external_word_type/runtime_call_type if it is reachable from where we 3808 // are now (possibly a temp buffer) and where we might end up 3809 // anywhere in the codeCache then we are always reachable. 3810 // This would have to change if we ever save/restore shared code 3811 // to be more pessimistic. 3812 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int)); 3813 if (!is_simm32(disp)) return false; 3814 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int)); 3815 if (!is_simm32(disp)) return false; 3816 3817 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int)); 3818 3819 // Because rip relative is a disp + address_of_next_instruction and we 3820 // don't know the value of address_of_next_instruction we apply a fudge factor 3821 // to make sure we will be ok no matter the size of the instruction we get placed into. 3822 // We don't have to fudge the checks above here because they are already worst case. 3823 3824 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal 3825 // + 4 because better safe than sorry. 3826 const int fudge = 12 + 4; 3827 if (disp < 0) { 3828 disp -= fudge; 3829 } else { 3830 disp += fudge; 3831 } 3832 return is_simm32(disp); 3833 } 3834 3835 // Check if the polling page is not reachable from the code cache using rip-relative 3836 // addressing. 3837 bool Assembler::is_polling_page_far() { 3838 intptr_t addr = (intptr_t)os::get_polling_page(); 3839 return ForceUnreachable || 3840 !is_simm32(addr - (intptr_t)CodeCache::low_bound()) || 3841 !is_simm32(addr - (intptr_t)CodeCache::high_bound()); 3842 } 3843 3844 void Assembler::emit_data64(jlong data, 3845 relocInfo::relocType rtype, 3846 int format) { 3847 if (rtype == relocInfo::none) { 3848 emit_long64(data); 3849 } else { 3850 emit_data64(data, Relocation::spec_simple(rtype), format); 3851 } 3852 } 3853 3854 void Assembler::emit_data64(jlong data, 3855 RelocationHolder const& rspec, 3856 int format) { 3857 assert(imm_operand == 0, "default format must be immediate in this file"); 3858 assert(imm_operand == format, "must be immediate"); 3859 assert(inst_mark() != NULL, "must be inside InstructionMark"); 3860 // Do not use AbstractAssembler::relocate, which is not intended for 3861 // embedded words. Instead, relocate to the enclosing instruction. 3862 code_section()->relocate(inst_mark(), rspec, format); 3863 #ifdef ASSERT 3864 check_relocation(rspec, format); 3865 #endif 3866 emit_long64(data); 3867 } 3868 3869 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { 3870 if (reg_enc >= 8) { 3871 prefix(REX_B); 3872 reg_enc -= 8; 3873 } else if (byteinst && reg_enc >= 4) { 3874 prefix(REX); 3875 } 3876 return reg_enc; 3877 } 3878 3879 int Assembler::prefixq_and_encode(int reg_enc) { 3880 if (reg_enc < 8) { 3881 prefix(REX_W); 3882 } else { 3883 prefix(REX_WB); 3884 reg_enc -= 8; 3885 } 3886 return reg_enc; 3887 } 3888 3889 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { 3890 if (dst_enc < 8) { 3891 if (src_enc >= 8) { 3892 prefix(REX_B); 3893 src_enc -= 8; 3894 } else if (byteinst && src_enc >= 4) { 3895 prefix(REX); 3896 } 3897 } else { 3898 if (src_enc < 8) { 3899 prefix(REX_R); 3900 } else { 3901 prefix(REX_RB); 3902 src_enc -= 8; 3903 } 3904 dst_enc -= 8; 3905 } 3906 return dst_enc << 3 | src_enc; 3907 } 3908 3909 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { 3910 if (dst_enc < 8) { 3911 if (src_enc < 8) { 3912 prefix(REX_W); 3913 } else { 3914 prefix(REX_WB); 3915 src_enc -= 8; 3916 } 3917 } else { 3918 if (src_enc < 8) { 3919 prefix(REX_WR); 3920 } else { 3921 prefix(REX_WRB); 3922 src_enc -= 8; 3923 } 3924 dst_enc -= 8; 3925 } 3926 return dst_enc << 3 | src_enc; 3927 } 3928 3929 void Assembler::prefix(Register reg) { 3930 if (reg->encoding() >= 8) { 3931 prefix(REX_B); 3932 } 3933 } 3934 3935 void Assembler::prefix(Address adr) { 3936 if (adr.base_needs_rex()) { 3937 if (adr.index_needs_rex()) { 3938 prefix(REX_XB); 3939 } else { 3940 prefix(REX_B); 3941 } 3942 } else { 3943 if (adr.index_needs_rex()) { 3944 prefix(REX_X); 3945 } 3946 } 3947 } 3948 3949 void Assembler::prefixq(Address adr) { 3950 if (adr.base_needs_rex()) { 3951 if (adr.index_needs_rex()) { 3952 prefix(REX_WXB); 3953 } else { 3954 prefix(REX_WB); 3955 } 3956 } else { 3957 if (adr.index_needs_rex()) { 3958 prefix(REX_WX); 3959 } else { 3960 prefix(REX_W); 3961 } 3962 } 3963 } 3964 3965 3966 void Assembler::prefix(Address adr, Register reg, bool byteinst) { 3967 if (reg->encoding() < 8) { 3968 if (adr.base_needs_rex()) { 3969 if (adr.index_needs_rex()) { 3970 prefix(REX_XB); 3971 } else { 3972 prefix(REX_B); 3973 } 3974 } else { 3975 if (adr.index_needs_rex()) { 3976 prefix(REX_X); 3977 } else if (byteinst && reg->encoding() >= 4 ) { 3978 prefix(REX); 3979 } 3980 } 3981 } else { 3982 if (adr.base_needs_rex()) { 3983 if (adr.index_needs_rex()) { 3984 prefix(REX_RXB); 3985 } else { 3986 prefix(REX_RB); 3987 } 3988 } else { 3989 if (adr.index_needs_rex()) { 3990 prefix(REX_RX); 3991 } else { 3992 prefix(REX_R); 3993 } 3994 } 3995 } 3996 } 3997 3998 void Assembler::prefixq(Address adr, Register src) { 3999 if (src->encoding() < 8) { 4000 if (adr.base_needs_rex()) { 4001 if (adr.index_needs_rex()) { 4002 prefix(REX_WXB); 4003 } else { 4004 prefix(REX_WB); 4005 } 4006 } else { 4007 if (adr.index_needs_rex()) { 4008 prefix(REX_WX); 4009 } else { 4010 prefix(REX_W); 4011 } 4012 } 4013 } else { 4014 if (adr.base_needs_rex()) { 4015 if (adr.index_needs_rex()) { 4016 prefix(REX_WRXB); 4017 } else { 4018 prefix(REX_WRB); 4019 } 4020 } else { 4021 if (adr.index_needs_rex()) { 4022 prefix(REX_WRX); 4023 } else { 4024 prefix(REX_WR); 4025 } 4026 } 4027 } 4028 } 4029 4030 void Assembler::prefix(Address adr, XMMRegister reg) { 4031 if (reg->encoding() < 8) { 4032 if (adr.base_needs_rex()) { 4033 if (adr.index_needs_rex()) { 4034 prefix(REX_XB); 4035 } else { 4036 prefix(REX_B); 4037 } 4038 } else { 4039 if (adr.index_needs_rex()) { 4040 prefix(REX_X); 4041 } 4042 } 4043 } else { 4044 if (adr.base_needs_rex()) { 4045 if (adr.index_needs_rex()) { 4046 prefix(REX_RXB); 4047 } else { 4048 prefix(REX_RB); 4049 } 4050 } else { 4051 if (adr.index_needs_rex()) { 4052 prefix(REX_RX); 4053 } else { 4054 prefix(REX_R); 4055 } 4056 } 4057 } 4058 } 4059 4060 void Assembler::prefixq(Address adr, XMMRegister src) { 4061 if (src->encoding() < 8) { 4062 if (adr.base_needs_rex()) { 4063 if (adr.index_needs_rex()) { 4064 prefix(REX_WXB); 4065 } else { 4066 prefix(REX_WB); 4067 } 4068 } else { 4069 if (adr.index_needs_rex()) { 4070 prefix(REX_WX); 4071 } else { 4072 prefix(REX_W); 4073 } 4074 } 4075 } else { 4076 if (adr.base_needs_rex()) { 4077 if (adr.index_needs_rex()) { 4078 prefix(REX_WRXB); 4079 } else { 4080 prefix(REX_WRB); 4081 } 4082 } else { 4083 if (adr.index_needs_rex()) { 4084 prefix(REX_WRX); 4085 } else { 4086 prefix(REX_WR); 4087 } 4088 } 4089 } 4090 } 4091 4092 void Assembler::adcq(Register dst, int32_t imm32) { 4093 (void) prefixq_and_encode(dst->encoding()); 4094 emit_arith(0x81, 0xD0, dst, imm32); 4095 } 4096 4097 void Assembler::adcq(Register dst, Address src) { 4098 InstructionMark im(this); 4099 prefixq(src, dst); 4100 emit_byte(0x13); 4101 emit_operand(dst, src); 4102 } 4103 4104 void Assembler::adcq(Register dst, Register src) { 4105 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 4106 emit_arith(0x13, 0xC0, dst, src); 4107 } 4108 4109 void Assembler::addq(Address dst, int32_t imm32) { 4110 InstructionMark im(this); 4111 prefixq(dst); 4112 emit_arith_operand(0x81, rax, dst,imm32); 4113 } 4114 4115 void Assembler::addq(Address dst, Register src) { 4116 InstructionMark im(this); 4117 prefixq(dst, src); 4118 emit_byte(0x01); 4119 emit_operand(src, dst); 4120 } 4121 4122 void Assembler::addq(Register dst, int32_t imm32) { 4123 (void) prefixq_and_encode(dst->encoding()); 4124 emit_arith(0x81, 0xC0, dst, imm32); 4125 } 4126 4127 void Assembler::addq(Register dst, Address src) { 4128 InstructionMark im(this); 4129 prefixq(src, dst); 4130 emit_byte(0x03); 4131 emit_operand(dst, src); 4132 } 4133 4134 void Assembler::addq(Register dst, Register src) { 4135 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4136 emit_arith(0x03, 0xC0, dst, src); 4137 } 4138 4139 void Assembler::andq(Address dst, int32_t imm32) { 4140 InstructionMark im(this); 4141 prefixq(dst); 4142 emit_byte(0x81); 4143 emit_operand(rsp, dst, 4); 4144 emit_long(imm32); 4145 } 4146 4147 void Assembler::andq(Register dst, int32_t imm32) { 4148 (void) prefixq_and_encode(dst->encoding()); 4149 emit_arith(0x81, 0xE0, dst, imm32); 4150 } 4151 4152 void Assembler::andq(Register dst, Address src) { 4153 InstructionMark im(this); 4154 prefixq(src, dst); 4155 emit_byte(0x23); 4156 emit_operand(dst, src); 4157 } 4158 4159 void Assembler::andq(Register dst, Register src) { 4160 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 4161 emit_arith(0x23, 0xC0, dst, src); 4162 } 4163 4164 void Assembler::bsfq(Register dst, Register src) { 4165 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4166 emit_byte(0x0F); 4167 emit_byte(0xBC); 4168 emit_byte(0xC0 | encode); 4169 } 4170 4171 void Assembler::bsrq(Register dst, Register src) { 4172 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 4173 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4174 emit_byte(0x0F); 4175 emit_byte(0xBD); 4176 emit_byte(0xC0 | encode); 4177 } 4178 4179 void Assembler::bswapq(Register reg) { 4180 int encode = prefixq_and_encode(reg->encoding()); 4181 emit_byte(0x0F); 4182 emit_byte(0xC8 | encode); 4183 } 4184 4185 void Assembler::cdqq() { 4186 prefix(REX_W); 4187 emit_byte(0x99); 4188 } 4189 4190 void Assembler::clflush(Address adr) { 4191 prefix(adr); 4192 emit_byte(0x0F); 4193 emit_byte(0xAE); 4194 emit_operand(rdi, adr); 4195 } 4196 4197 void Assembler::cmovq(Condition cc, Register dst, Register src) { 4198 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4199 emit_byte(0x0F); 4200 emit_byte(0x40 | cc); 4201 emit_byte(0xC0 | encode); 4202 } 4203 4204 void Assembler::cmovq(Condition cc, Register dst, Address src) { 4205 InstructionMark im(this); 4206 prefixq(src, dst); 4207 emit_byte(0x0F); 4208 emit_byte(0x40 | cc); 4209 emit_operand(dst, src); 4210 } 4211 4212 void Assembler::cmpq(Address dst, int32_t imm32) { 4213 InstructionMark im(this); 4214 prefixq(dst); 4215 emit_byte(0x81); 4216 emit_operand(rdi, dst, 4); 4217 emit_long(imm32); 4218 } 4219 4220 void Assembler::cmpq(Register dst, int32_t imm32) { 4221 (void) prefixq_and_encode(dst->encoding()); 4222 emit_arith(0x81, 0xF8, dst, imm32); 4223 } 4224 4225 void Assembler::cmpq(Address dst, Register src) { 4226 InstructionMark im(this); 4227 prefixq(dst, src); 4228 emit_byte(0x3B); 4229 emit_operand(src, dst); 4230 } 4231 4232 void Assembler::cmpq(Register dst, Register src) { 4233 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4234 emit_arith(0x3B, 0xC0, dst, src); 4235 } 4236 4237 void Assembler::cmpq(Register dst, Address src) { 4238 InstructionMark im(this); 4239 prefixq(src, dst); 4240 emit_byte(0x3B); 4241 emit_operand(dst, src); 4242 } 4243 4244 void Assembler::cmpxchgq(Register reg, Address adr) { 4245 InstructionMark im(this); 4246 prefixq(adr, reg); 4247 emit_byte(0x0F); 4248 emit_byte(0xB1); 4249 emit_operand(reg, adr); 4250 } 4251 4252 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { 4253 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4254 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2); 4255 emit_byte(0x2A); 4256 emit_byte(0xC0 | encode); 4257 } 4258 4259 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { 4260 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4261 InstructionMark im(this); 4262 simd_prefix_q(dst, dst, src, VEX_SIMD_F2); 4263 emit_byte(0x2A); 4264 emit_operand(dst, src); 4265 } 4266 4267 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { 4268 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4269 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3); 4270 emit_byte(0x2A); 4271 emit_byte(0xC0 | encode); 4272 } 4273 4274 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { 4275 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4276 InstructionMark im(this); 4277 simd_prefix_q(dst, dst, src, VEX_SIMD_F3); 4278 emit_byte(0x2A); 4279 emit_operand(dst, src); 4280 } 4281 4282 void Assembler::cvttsd2siq(Register dst, XMMRegister src) { 4283 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4284 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2); 4285 emit_byte(0x2C); 4286 emit_byte(0xC0 | encode); 4287 } 4288 4289 void Assembler::cvttss2siq(Register dst, XMMRegister src) { 4290 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4291 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3); 4292 emit_byte(0x2C); 4293 emit_byte(0xC0 | encode); 4294 } 4295 4296 void Assembler::decl(Register dst) { 4297 // Don't use it directly. Use MacroAssembler::decrementl() instead. 4298 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 4299 int encode = prefix_and_encode(dst->encoding()); 4300 emit_byte(0xFF); 4301 emit_byte(0xC8 | encode); 4302 } 4303 4304 void Assembler::decq(Register dst) { 4305 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4306 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4307 int encode = prefixq_and_encode(dst->encoding()); 4308 emit_byte(0xFF); 4309 emit_byte(0xC8 | encode); 4310 } 4311 4312 void Assembler::decq(Address dst) { 4313 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4314 InstructionMark im(this); 4315 prefixq(dst); 4316 emit_byte(0xFF); 4317 emit_operand(rcx, dst); 4318 } 4319 4320 void Assembler::fxrstor(Address src) { 4321 prefixq(src); 4322 emit_byte(0x0F); 4323 emit_byte(0xAE); 4324 emit_operand(as_Register(1), src); 4325 } 4326 4327 void Assembler::fxsave(Address dst) { 4328 prefixq(dst); 4329 emit_byte(0x0F); 4330 emit_byte(0xAE); 4331 emit_operand(as_Register(0), dst); 4332 } 4333 4334 void Assembler::idivq(Register src) { 4335 int encode = prefixq_and_encode(src->encoding()); 4336 emit_byte(0xF7); 4337 emit_byte(0xF8 | encode); 4338 } 4339 4340 void Assembler::imulq(Register dst, Register src) { 4341 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4342 emit_byte(0x0F); 4343 emit_byte(0xAF); 4344 emit_byte(0xC0 | encode); 4345 } 4346 4347 void Assembler::imulq(Register dst, Register src, int value) { 4348 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4349 if (is8bit(value)) { 4350 emit_byte(0x6B); 4351 emit_byte(0xC0 | encode); 4352 emit_byte(value & 0xFF); 4353 } else { 4354 emit_byte(0x69); 4355 emit_byte(0xC0 | encode); 4356 emit_long(value); 4357 } 4358 } 4359 4360 void Assembler::incl(Register dst) { 4361 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4362 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4363 int encode = prefix_and_encode(dst->encoding()); 4364 emit_byte(0xFF); 4365 emit_byte(0xC0 | encode); 4366 } 4367 4368 void Assembler::incq(Register dst) { 4369 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4370 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4371 int encode = prefixq_and_encode(dst->encoding()); 4372 emit_byte(0xFF); 4373 emit_byte(0xC0 | encode); 4374 } 4375 4376 void Assembler::incq(Address dst) { 4377 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4378 InstructionMark im(this); 4379 prefixq(dst); 4380 emit_byte(0xFF); 4381 emit_operand(rax, dst); 4382 } 4383 4384 void Assembler::lea(Register dst, Address src) { 4385 leaq(dst, src); 4386 } 4387 4388 void Assembler::leaq(Register dst, Address src) { 4389 InstructionMark im(this); 4390 prefixq(src, dst); 4391 emit_byte(0x8D); 4392 emit_operand(dst, src); 4393 } 4394 4395 void Assembler::mov64(Register dst, int64_t imm64) { 4396 InstructionMark im(this); 4397 int encode = prefixq_and_encode(dst->encoding()); 4398 emit_byte(0xB8 | encode); 4399 emit_long64(imm64); 4400 } 4401 4402 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { 4403 InstructionMark im(this); 4404 int encode = prefixq_and_encode(dst->encoding()); 4405 emit_byte(0xB8 | encode); 4406 emit_data64(imm64, rspec); 4407 } 4408 4409 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4410 InstructionMark im(this); 4411 int encode = prefix_and_encode(dst->encoding()); 4412 emit_byte(0xB8 | encode); 4413 emit_data((int)imm32, rspec, narrow_oop_operand); 4414 } 4415 4416 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4417 InstructionMark im(this); 4418 prefix(dst); 4419 emit_byte(0xC7); 4420 emit_operand(rax, dst, 4); 4421 emit_data((int)imm32, rspec, narrow_oop_operand); 4422 } 4423 4424 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) { 4425 InstructionMark im(this); 4426 int encode = prefix_and_encode(src1->encoding()); 4427 emit_byte(0x81); 4428 emit_byte(0xF8 | encode); 4429 emit_data((int)imm32, rspec, narrow_oop_operand); 4430 } 4431 4432 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) { 4433 InstructionMark im(this); 4434 prefix(src1); 4435 emit_byte(0x81); 4436 emit_operand(rax, src1, 4); 4437 emit_data((int)imm32, rspec, narrow_oop_operand); 4438 } 4439 4440 void Assembler::lzcntq(Register dst, Register src) { 4441 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 4442 emit_byte(0xF3); 4443 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4444 emit_byte(0x0F); 4445 emit_byte(0xBD); 4446 emit_byte(0xC0 | encode); 4447 } 4448 4449 void Assembler::movdq(XMMRegister dst, Register src) { 4450 // table D-1 says MMX/SSE2 4451 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4452 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66); 4453 emit_byte(0x6E); 4454 emit_byte(0xC0 | encode); 4455 } 4456 4457 void Assembler::movdq(Register dst, XMMRegister src) { 4458 // table D-1 says MMX/SSE2 4459 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4460 // swap src/dst to get correct prefix 4461 int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66); 4462 emit_byte(0x7E); 4463 emit_byte(0xC0 | encode); 4464 } 4465 4466 void Assembler::movq(Register dst, Register src) { 4467 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4468 emit_byte(0x8B); 4469 emit_byte(0xC0 | encode); 4470 } 4471 4472 void Assembler::movq(Register dst, Address src) { 4473 InstructionMark im(this); 4474 prefixq(src, dst); 4475 emit_byte(0x8B); 4476 emit_operand(dst, src); 4477 } 4478 4479 void Assembler::movq(Address dst, Register src) { 4480 InstructionMark im(this); 4481 prefixq(dst, src); 4482 emit_byte(0x89); 4483 emit_operand(src, dst); 4484 } 4485 4486 void Assembler::movsbq(Register dst, Address src) { 4487 InstructionMark im(this); 4488 prefixq(src, dst); 4489 emit_byte(0x0F); 4490 emit_byte(0xBE); 4491 emit_operand(dst, src); 4492 } 4493 4494 void Assembler::movsbq(Register dst, Register src) { 4495 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4496 emit_byte(0x0F); 4497 emit_byte(0xBE); 4498 emit_byte(0xC0 | encode); 4499 } 4500 4501 void Assembler::movslq(Register dst, int32_t imm32) { 4502 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx) 4503 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx) 4504 // as a result we shouldn't use until tested at runtime... 4505 ShouldNotReachHere(); 4506 InstructionMark im(this); 4507 int encode = prefixq_and_encode(dst->encoding()); 4508 emit_byte(0xC7 | encode); 4509 emit_long(imm32); 4510 } 4511 4512 void Assembler::movslq(Address dst, int32_t imm32) { 4513 assert(is_simm32(imm32), "lost bits"); 4514 InstructionMark im(this); 4515 prefixq(dst); 4516 emit_byte(0xC7); 4517 emit_operand(rax, dst, 4); 4518 emit_long(imm32); 4519 } 4520 4521 void Assembler::movslq(Register dst, Address src) { 4522 InstructionMark im(this); 4523 prefixq(src, dst); 4524 emit_byte(0x63); 4525 emit_operand(dst, src); 4526 } 4527 4528 void Assembler::movslq(Register dst, Register src) { 4529 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4530 emit_byte(0x63); 4531 emit_byte(0xC0 | encode); 4532 } 4533 4534 void Assembler::movswq(Register dst, Address src) { 4535 InstructionMark im(this); 4536 prefixq(src, dst); 4537 emit_byte(0x0F); 4538 emit_byte(0xBF); 4539 emit_operand(dst, src); 4540 } 4541 4542 void Assembler::movswq(Register dst, Register src) { 4543 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4544 emit_byte(0x0F); 4545 emit_byte(0xBF); 4546 emit_byte(0xC0 | encode); 4547 } 4548 4549 void Assembler::movzbq(Register dst, Address src) { 4550 InstructionMark im(this); 4551 prefixq(src, dst); 4552 emit_byte(0x0F); 4553 emit_byte(0xB6); 4554 emit_operand(dst, src); 4555 } 4556 4557 void Assembler::movzbq(Register dst, Register src) { 4558 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4559 emit_byte(0x0F); 4560 emit_byte(0xB6); 4561 emit_byte(0xC0 | encode); 4562 } 4563 4564 void Assembler::movzwq(Register dst, Address src) { 4565 InstructionMark im(this); 4566 prefixq(src, dst); 4567 emit_byte(0x0F); 4568 emit_byte(0xB7); 4569 emit_operand(dst, src); 4570 } 4571 4572 void Assembler::movzwq(Register dst, Register src) { 4573 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4574 emit_byte(0x0F); 4575 emit_byte(0xB7); 4576 emit_byte(0xC0 | encode); 4577 } 4578 4579 void Assembler::negq(Register dst) { 4580 int encode = prefixq_and_encode(dst->encoding()); 4581 emit_byte(0xF7); 4582 emit_byte(0xD8 | encode); 4583 } 4584 4585 void Assembler::notq(Register dst) { 4586 int encode = prefixq_and_encode(dst->encoding()); 4587 emit_byte(0xF7); 4588 emit_byte(0xD0 | encode); 4589 } 4590 4591 void Assembler::orq(Address dst, int32_t imm32) { 4592 InstructionMark im(this); 4593 prefixq(dst); 4594 emit_byte(0x81); 4595 emit_operand(rcx, dst, 4); 4596 emit_long(imm32); 4597 } 4598 4599 void Assembler::orq(Register dst, int32_t imm32) { 4600 (void) prefixq_and_encode(dst->encoding()); 4601 emit_arith(0x81, 0xC8, dst, imm32); 4602 } 4603 4604 void Assembler::orq(Register dst, Address src) { 4605 InstructionMark im(this); 4606 prefixq(src, dst); 4607 emit_byte(0x0B); 4608 emit_operand(dst, src); 4609 } 4610 4611 void Assembler::orq(Register dst, Register src) { 4612 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4613 emit_arith(0x0B, 0xC0, dst, src); 4614 } 4615 4616 void Assembler::popa() { // 64bit 4617 movq(r15, Address(rsp, 0)); 4618 movq(r14, Address(rsp, wordSize)); 4619 movq(r13, Address(rsp, 2 * wordSize)); 4620 movq(r12, Address(rsp, 3 * wordSize)); 4621 movq(r11, Address(rsp, 4 * wordSize)); 4622 movq(r10, Address(rsp, 5 * wordSize)); 4623 movq(r9, Address(rsp, 6 * wordSize)); 4624 movq(r8, Address(rsp, 7 * wordSize)); 4625 movq(rdi, Address(rsp, 8 * wordSize)); 4626 movq(rsi, Address(rsp, 9 * wordSize)); 4627 movq(rbp, Address(rsp, 10 * wordSize)); 4628 // skip rsp 4629 movq(rbx, Address(rsp, 12 * wordSize)); 4630 movq(rdx, Address(rsp, 13 * wordSize)); 4631 movq(rcx, Address(rsp, 14 * wordSize)); 4632 movq(rax, Address(rsp, 15 * wordSize)); 4633 4634 addq(rsp, 16 * wordSize); 4635 } 4636 4637 void Assembler::popcntq(Register dst, Address src) { 4638 assert(VM_Version::supports_popcnt(), "must support"); 4639 InstructionMark im(this); 4640 emit_byte(0xF3); 4641 prefixq(src, dst); 4642 emit_byte(0x0F); 4643 emit_byte(0xB8); 4644 emit_operand(dst, src); 4645 } 4646 4647 void Assembler::popcntq(Register dst, Register src) { 4648 assert(VM_Version::supports_popcnt(), "must support"); 4649 emit_byte(0xF3); 4650 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4651 emit_byte(0x0F); 4652 emit_byte(0xB8); 4653 emit_byte(0xC0 | encode); 4654 } 4655 4656 void Assembler::popq(Address dst) { 4657 InstructionMark im(this); 4658 prefixq(dst); 4659 emit_byte(0x8F); 4660 emit_operand(rax, dst); 4661 } 4662 4663 void Assembler::pusha() { // 64bit 4664 // we have to store original rsp. ABI says that 128 bytes 4665 // below rsp are local scratch. 4666 movq(Address(rsp, -5 * wordSize), rsp); 4667 4668 subq(rsp, 16 * wordSize); 4669 4670 movq(Address(rsp, 15 * wordSize), rax); 4671 movq(Address(rsp, 14 * wordSize), rcx); 4672 movq(Address(rsp, 13 * wordSize), rdx); 4673 movq(Address(rsp, 12 * wordSize), rbx); 4674 // skip rsp 4675 movq(Address(rsp, 10 * wordSize), rbp); 4676 movq(Address(rsp, 9 * wordSize), rsi); 4677 movq(Address(rsp, 8 * wordSize), rdi); 4678 movq(Address(rsp, 7 * wordSize), r8); 4679 movq(Address(rsp, 6 * wordSize), r9); 4680 movq(Address(rsp, 5 * wordSize), r10); 4681 movq(Address(rsp, 4 * wordSize), r11); 4682 movq(Address(rsp, 3 * wordSize), r12); 4683 movq(Address(rsp, 2 * wordSize), r13); 4684 movq(Address(rsp, wordSize), r14); 4685 movq(Address(rsp, 0), r15); 4686 } 4687 4688 void Assembler::pushq(Address src) { 4689 InstructionMark im(this); 4690 prefixq(src); 4691 emit_byte(0xFF); 4692 emit_operand(rsi, src); 4693 } 4694 4695 void Assembler::rclq(Register dst, int imm8) { 4696 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4697 int encode = prefixq_and_encode(dst->encoding()); 4698 if (imm8 == 1) { 4699 emit_byte(0xD1); 4700 emit_byte(0xD0 | encode); 4701 } else { 4702 emit_byte(0xC1); 4703 emit_byte(0xD0 | encode); 4704 emit_byte(imm8); 4705 } 4706 } 4707 void Assembler::sarq(Register dst, int imm8) { 4708 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4709 int encode = prefixq_and_encode(dst->encoding()); 4710 if (imm8 == 1) { 4711 emit_byte(0xD1); 4712 emit_byte(0xF8 | encode); 4713 } else { 4714 emit_byte(0xC1); 4715 emit_byte(0xF8 | encode); 4716 emit_byte(imm8); 4717 } 4718 } 4719 4720 void Assembler::sarq(Register dst) { 4721 int encode = prefixq_and_encode(dst->encoding()); 4722 emit_byte(0xD3); 4723 emit_byte(0xF8 | encode); 4724 } 4725 4726 void Assembler::sbbq(Address dst, int32_t imm32) { 4727 InstructionMark im(this); 4728 prefixq(dst); 4729 emit_arith_operand(0x81, rbx, dst, imm32); 4730 } 4731 4732 void Assembler::sbbq(Register dst, int32_t imm32) { 4733 (void) prefixq_and_encode(dst->encoding()); 4734 emit_arith(0x81, 0xD8, dst, imm32); 4735 } 4736 4737 void Assembler::sbbq(Register dst, Address src) { 4738 InstructionMark im(this); 4739 prefixq(src, dst); 4740 emit_byte(0x1B); 4741 emit_operand(dst, src); 4742 } 4743 4744 void Assembler::sbbq(Register dst, Register src) { 4745 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4746 emit_arith(0x1B, 0xC0, dst, src); 4747 } 4748 4749 void Assembler::shlq(Register dst, int imm8) { 4750 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4751 int encode = prefixq_and_encode(dst->encoding()); 4752 if (imm8 == 1) { 4753 emit_byte(0xD1); 4754 emit_byte(0xE0 | encode); 4755 } else { 4756 emit_byte(0xC1); 4757 emit_byte(0xE0 | encode); 4758 emit_byte(imm8); 4759 } 4760 } 4761 4762 void Assembler::shlq(Register dst) { 4763 int encode = prefixq_and_encode(dst->encoding()); 4764 emit_byte(0xD3); 4765 emit_byte(0xE0 | encode); 4766 } 4767 4768 void Assembler::shrq(Register dst, int imm8) { 4769 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 4770 int encode = prefixq_and_encode(dst->encoding()); 4771 emit_byte(0xC1); 4772 emit_byte(0xE8 | encode); 4773 emit_byte(imm8); 4774 } 4775 4776 void Assembler::shrq(Register dst) { 4777 int encode = prefixq_and_encode(dst->encoding()); 4778 emit_byte(0xD3); 4779 emit_byte(0xE8 | encode); 4780 } 4781 4782 void Assembler::subq(Address dst, int32_t imm32) { 4783 InstructionMark im(this); 4784 prefixq(dst); 4785 emit_arith_operand(0x81, rbp, dst, imm32); 4786 } 4787 4788 void Assembler::subq(Address dst, Register src) { 4789 InstructionMark im(this); 4790 prefixq(dst, src); 4791 emit_byte(0x29); 4792 emit_operand(src, dst); 4793 } 4794 4795 void Assembler::subq(Register dst, int32_t imm32) { 4796 (void) prefixq_and_encode(dst->encoding()); 4797 emit_arith(0x81, 0xE8, dst, imm32); 4798 } 4799 4800 // Force generation of a 4 byte immediate value even if it fits into 8bit 4801 void Assembler::subq_imm32(Register dst, int32_t imm32) { 4802 (void) prefixq_and_encode(dst->encoding()); 4803 emit_arith_imm32(0x81, 0xE8, dst, imm32); 4804 } 4805 4806 void Assembler::subq(Register dst, Address src) { 4807 InstructionMark im(this); 4808 prefixq(src, dst); 4809 emit_byte(0x2B); 4810 emit_operand(dst, src); 4811 } 4812 4813 void Assembler::subq(Register dst, Register src) { 4814 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4815 emit_arith(0x2B, 0xC0, dst, src); 4816 } 4817 4818 void Assembler::testq(Register dst, int32_t imm32) { 4819 // not using emit_arith because test 4820 // doesn't support sign-extension of 4821 // 8bit operands 4822 int encode = dst->encoding(); 4823 if (encode == 0) { 4824 prefix(REX_W); 4825 emit_byte(0xA9); 4826 } else { 4827 encode = prefixq_and_encode(encode); 4828 emit_byte(0xF7); 4829 emit_byte(0xC0 | encode); 4830 } 4831 emit_long(imm32); 4832 } 4833 4834 void Assembler::testq(Register dst, Register src) { 4835 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4836 emit_arith(0x85, 0xC0, dst, src); 4837 } 4838 4839 void Assembler::xaddq(Address dst, Register src) { 4840 InstructionMark im(this); 4841 prefixq(dst, src); 4842 emit_byte(0x0F); 4843 emit_byte(0xC1); 4844 emit_operand(src, dst); 4845 } 4846 4847 void Assembler::xchgq(Register dst, Address src) { 4848 InstructionMark im(this); 4849 prefixq(src, dst); 4850 emit_byte(0x87); 4851 emit_operand(dst, src); 4852 } 4853 4854 void Assembler::xchgq(Register dst, Register src) { 4855 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4856 emit_byte(0x87); 4857 emit_byte(0xc0 | encode); 4858 } 4859 4860 void Assembler::xorq(Register dst, Register src) { 4861 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4862 emit_arith(0x33, 0xC0, dst, src); 4863 } 4864 4865 void Assembler::xorq(Register dst, Address src) { 4866 InstructionMark im(this); 4867 prefixq(src, dst); 4868 emit_byte(0x33); 4869 emit_operand(dst, src); 4870 } 4871 4872 #endif // !LP64 4873 4874 static Assembler::Condition reverse[] = { 4875 Assembler::noOverflow /* overflow = 0x0 */ , 4876 Assembler::overflow /* noOverflow = 0x1 */ , 4877 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 4878 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 4879 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 4880 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 4881 Assembler::above /* belowEqual = 0x6 */ , 4882 Assembler::belowEqual /* above = 0x7 */ , 4883 Assembler::positive /* negative = 0x8 */ , 4884 Assembler::negative /* positive = 0x9 */ , 4885 Assembler::noParity /* parity = 0xa */ , 4886 Assembler::parity /* noParity = 0xb */ , 4887 Assembler::greaterEqual /* less = 0xc */ , 4888 Assembler::less /* greaterEqual = 0xd */ , 4889 Assembler::greater /* lessEqual = 0xe */ , 4890 Assembler::lessEqual /* greater = 0xf, */ 4891 4892 }; 4893 4894 4895 // Implementation of MacroAssembler 4896 4897 // First all the versions that have distinct versions depending on 32/64 bit 4898 // Unless the difference is trivial (1 line or so). 4899 4900 #ifndef _LP64 4901 4902 // 32bit versions 4903 4904 Address MacroAssembler::as_Address(AddressLiteral adr) { 4905 return Address(adr.target(), adr.rspec()); 4906 } 4907 4908 Address MacroAssembler::as_Address(ArrayAddress adr) { 4909 return Address::make_array(adr); 4910 } 4911 4912 int MacroAssembler::biased_locking_enter(Register lock_reg, 4913 Register obj_reg, 4914 Register swap_reg, 4915 Register tmp_reg, 4916 bool swap_reg_contains_mark, 4917 Label& done, 4918 Label* slow_case, 4919 BiasedLockingCounters* counters) { 4920 assert(UseBiasedLocking, "why call this otherwise?"); 4921 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 4922 assert_different_registers(lock_reg, obj_reg, swap_reg); 4923 4924 if (PrintBiasedLockingStatistics && counters == NULL) 4925 counters = BiasedLocking::counters(); 4926 4927 bool need_tmp_reg = false; 4928 if (tmp_reg == noreg) { 4929 need_tmp_reg = true; 4930 tmp_reg = lock_reg; 4931 } else { 4932 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 4933 } 4934 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 4935 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 4936 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 4937 Address saved_mark_addr(lock_reg, 0); 4938 4939 // Biased locking 4940 // See whether the lock is currently biased toward our thread and 4941 // whether the epoch is still valid 4942 // Note that the runtime guarantees sufficient alignment of JavaThread 4943 // pointers to allow age to be placed into low bits 4944 // First check to see whether biasing is even enabled for this object 4945 Label cas_label; 4946 int null_check_offset = -1; 4947 if (!swap_reg_contains_mark) { 4948 null_check_offset = offset(); 4949 movl(swap_reg, mark_addr); 4950 } 4951 if (need_tmp_reg) { 4952 push(tmp_reg); 4953 } 4954 movl(tmp_reg, swap_reg); 4955 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 4956 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 4957 if (need_tmp_reg) { 4958 pop(tmp_reg); 4959 } 4960 jcc(Assembler::notEqual, cas_label); 4961 // The bias pattern is present in the object's header. Need to check 4962 // whether the bias owner and the epoch are both still current. 4963 // Note that because there is no current thread register on x86 we 4964 // need to store off the mark word we read out of the object to 4965 // avoid reloading it and needing to recheck invariants below. This 4966 // store is unfortunate but it makes the overall code shorter and 4967 // simpler. 4968 movl(saved_mark_addr, swap_reg); 4969 if (need_tmp_reg) { 4970 push(tmp_reg); 4971 } 4972 get_thread(tmp_reg); 4973 xorl(swap_reg, tmp_reg); 4974 if (swap_reg_contains_mark) { 4975 null_check_offset = offset(); 4976 } 4977 movl(tmp_reg, klass_addr); 4978 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); 4979 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 4980 if (need_tmp_reg) { 4981 pop(tmp_reg); 4982 } 4983 if (counters != NULL) { 4984 cond_inc32(Assembler::zero, 4985 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 4986 } 4987 jcc(Assembler::equal, done); 4988 4989 Label try_revoke_bias; 4990 Label try_rebias; 4991 4992 // At this point we know that the header has the bias pattern and 4993 // that we are not the bias owner in the current epoch. We need to 4994 // figure out more details about the state of the header in order to 4995 // know what operations can be legally performed on the object's 4996 // header. 4997 4998 // If the low three bits in the xor result aren't clear, that means 4999 // the prototype header is no longer biased and we have to revoke 5000 // the bias on this object. 5001 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 5002 jcc(Assembler::notZero, try_revoke_bias); 5003 5004 // Biasing is still enabled for this data type. See whether the 5005 // epoch of the current bias is still valid, meaning that the epoch 5006 // bits of the mark word are equal to the epoch bits of the 5007 // prototype header. (Note that the prototype header's epoch bits 5008 // only change at a safepoint.) If not, attempt to rebias the object 5009 // toward the current thread. Note that we must be absolutely sure 5010 // that the current epoch is invalid in order to do this because 5011 // otherwise the manipulations it performs on the mark word are 5012 // illegal. 5013 testl(swap_reg, markOopDesc::epoch_mask_in_place); 5014 jcc(Assembler::notZero, try_rebias); 5015 5016 // The epoch of the current bias is still valid but we know nothing 5017 // about the owner; it might be set or it might be clear. Try to 5018 // acquire the bias of the object using an atomic operation. If this 5019 // fails we will go in to the runtime to revoke the object's bias. 5020 // Note that we first construct the presumed unbiased header so we 5021 // don't accidentally blow away another thread's valid bias. 5022 movl(swap_reg, saved_mark_addr); 5023 andl(swap_reg, 5024 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 5025 if (need_tmp_reg) { 5026 push(tmp_reg); 5027 } 5028 get_thread(tmp_reg); 5029 orl(tmp_reg, swap_reg); 5030 if (os::is_MP()) { 5031 lock(); 5032 } 5033 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5034 if (need_tmp_reg) { 5035 pop(tmp_reg); 5036 } 5037 // If the biasing toward our thread failed, this means that 5038 // another thread succeeded in biasing it toward itself and we 5039 // need to revoke that bias. The revocation will occur in the 5040 // interpreter runtime in the slow case. 5041 if (counters != NULL) { 5042 cond_inc32(Assembler::zero, 5043 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 5044 } 5045 if (slow_case != NULL) { 5046 jcc(Assembler::notZero, *slow_case); 5047 } 5048 jmp(done); 5049 5050 bind(try_rebias); 5051 // At this point we know the epoch has expired, meaning that the 5052 // current "bias owner", if any, is actually invalid. Under these 5053 // circumstances _only_, we are allowed to use the current header's 5054 // value as the comparison value when doing the cas to acquire the 5055 // bias in the current epoch. In other words, we allow transfer of 5056 // the bias from one thread to another directly in this situation. 5057 // 5058 // FIXME: due to a lack of registers we currently blow away the age 5059 // bits in this situation. Should attempt to preserve them. 5060 if (need_tmp_reg) { 5061 push(tmp_reg); 5062 } 5063 get_thread(tmp_reg); 5064 movl(swap_reg, klass_addr); 5065 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); 5066 movl(swap_reg, saved_mark_addr); 5067 if (os::is_MP()) { 5068 lock(); 5069 } 5070 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5071 if (need_tmp_reg) { 5072 pop(tmp_reg); 5073 } 5074 // If the biasing toward our thread failed, then another thread 5075 // succeeded in biasing it toward itself and we need to revoke that 5076 // bias. The revocation will occur in the runtime in the slow case. 5077 if (counters != NULL) { 5078 cond_inc32(Assembler::zero, 5079 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 5080 } 5081 if (slow_case != NULL) { 5082 jcc(Assembler::notZero, *slow_case); 5083 } 5084 jmp(done); 5085 5086 bind(try_revoke_bias); 5087 // The prototype mark in the klass doesn't have the bias bit set any 5088 // more, indicating that objects of this data type are not supposed 5089 // to be biased any more. We are going to try to reset the mark of 5090 // this object to the prototype value and fall through to the 5091 // CAS-based locking scheme. Note that if our CAS fails, it means 5092 // that another thread raced us for the privilege of revoking the 5093 // bias of this particular object, so it's okay to continue in the 5094 // normal locking code. 5095 // 5096 // FIXME: due to a lack of registers we currently blow away the age 5097 // bits in this situation. Should attempt to preserve them. 5098 movl(swap_reg, saved_mark_addr); 5099 if (need_tmp_reg) { 5100 push(tmp_reg); 5101 } 5102 movl(tmp_reg, klass_addr); 5103 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 5104 if (os::is_MP()) { 5105 lock(); 5106 } 5107 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5108 if (need_tmp_reg) { 5109 pop(tmp_reg); 5110 } 5111 // Fall through to the normal CAS-based lock, because no matter what 5112 // the result of the above CAS, some thread must have succeeded in 5113 // removing the bias bit from the object's header. 5114 if (counters != NULL) { 5115 cond_inc32(Assembler::zero, 5116 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 5117 } 5118 5119 bind(cas_label); 5120 5121 return null_check_offset; 5122 } 5123 void MacroAssembler::call_VM_leaf_base(address entry_point, 5124 int number_of_arguments) { 5125 call(RuntimeAddress(entry_point)); 5126 increment(rsp, number_of_arguments * wordSize); 5127 } 5128 5129 void MacroAssembler::cmpoop(Address src1, jobject obj) { 5130 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5131 } 5132 5133 void MacroAssembler::cmpoop(Register src1, jobject obj) { 5134 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5135 } 5136 5137 void MacroAssembler::extend_sign(Register hi, Register lo) { 5138 // According to Intel Doc. AP-526, "Integer Divide", p.18. 5139 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 5140 cdql(); 5141 } else { 5142 movl(hi, lo); 5143 sarl(hi, 31); 5144 } 5145 } 5146 5147 void MacroAssembler::jC2(Register tmp, Label& L) { 5148 // set parity bit if FPU flag C2 is set (via rax) 5149 save_rax(tmp); 5150 fwait(); fnstsw_ax(); 5151 sahf(); 5152 restore_rax(tmp); 5153 // branch 5154 jcc(Assembler::parity, L); 5155 } 5156 5157 void MacroAssembler::jnC2(Register tmp, Label& L) { 5158 // set parity bit if FPU flag C2 is set (via rax) 5159 save_rax(tmp); 5160 fwait(); fnstsw_ax(); 5161 sahf(); 5162 restore_rax(tmp); 5163 // branch 5164 jcc(Assembler::noParity, L); 5165 } 5166 5167 // 32bit can do a case table jump in one instruction but we no longer allow the base 5168 // to be installed in the Address class 5169 void MacroAssembler::jump(ArrayAddress entry) { 5170 jmp(as_Address(entry)); 5171 } 5172 5173 // Note: y_lo will be destroyed 5174 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5175 // Long compare for Java (semantics as described in JVM spec.) 5176 Label high, low, done; 5177 5178 cmpl(x_hi, y_hi); 5179 jcc(Assembler::less, low); 5180 jcc(Assembler::greater, high); 5181 // x_hi is the return register 5182 xorl(x_hi, x_hi); 5183 cmpl(x_lo, y_lo); 5184 jcc(Assembler::below, low); 5185 jcc(Assembler::equal, done); 5186 5187 bind(high); 5188 xorl(x_hi, x_hi); 5189 increment(x_hi); 5190 jmp(done); 5191 5192 bind(low); 5193 xorl(x_hi, x_hi); 5194 decrementl(x_hi); 5195 5196 bind(done); 5197 } 5198 5199 void MacroAssembler::lea(Register dst, AddressLiteral src) { 5200 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 5201 } 5202 5203 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5204 // leal(dst, as_Address(adr)); 5205 // see note in movl as to why we must use a move 5206 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 5207 } 5208 5209 void MacroAssembler::leave() { 5210 mov(rsp, rbp); 5211 pop(rbp); 5212 } 5213 5214 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 5215 // Multiplication of two Java long values stored on the stack 5216 // as illustrated below. Result is in rdx:rax. 5217 // 5218 // rsp ---> [ ?? ] \ \ 5219 // .... | y_rsp_offset | 5220 // [ y_lo ] / (in bytes) | x_rsp_offset 5221 // [ y_hi ] | (in bytes) 5222 // .... | 5223 // [ x_lo ] / 5224 // [ x_hi ] 5225 // .... 5226 // 5227 // Basic idea: lo(result) = lo(x_lo * y_lo) 5228 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 5229 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 5230 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 5231 Label quick; 5232 // load x_hi, y_hi and check if quick 5233 // multiplication is possible 5234 movl(rbx, x_hi); 5235 movl(rcx, y_hi); 5236 movl(rax, rbx); 5237 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 5238 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 5239 // do full multiplication 5240 // 1st step 5241 mull(y_lo); // x_hi * y_lo 5242 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 5243 // 2nd step 5244 movl(rax, x_lo); 5245 mull(rcx); // x_lo * y_hi 5246 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 5247 // 3rd step 5248 bind(quick); // note: rbx, = 0 if quick multiply! 5249 movl(rax, x_lo); 5250 mull(y_lo); // x_lo * y_lo 5251 addl(rdx, rbx); // correct hi(x_lo * y_lo) 5252 } 5253 5254 void MacroAssembler::lneg(Register hi, Register lo) { 5255 negl(lo); 5256 adcl(hi, 0); 5257 negl(hi); 5258 } 5259 5260 void MacroAssembler::lshl(Register hi, Register lo) { 5261 // Java shift left long support (semantics as described in JVM spec., p.305) 5262 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 5263 // shift value is in rcx ! 5264 assert(hi != rcx, "must not use rcx"); 5265 assert(lo != rcx, "must not use rcx"); 5266 const Register s = rcx; // shift count 5267 const int n = BitsPerWord; 5268 Label L; 5269 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5270 cmpl(s, n); // if (s < n) 5271 jcc(Assembler::less, L); // else (s >= n) 5272 movl(hi, lo); // x := x << n 5273 xorl(lo, lo); 5274 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5275 bind(L); // s (mod n) < n 5276 shldl(hi, lo); // x := x << s 5277 shll(lo); 5278 } 5279 5280 5281 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 5282 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 5283 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 5284 assert(hi != rcx, "must not use rcx"); 5285 assert(lo != rcx, "must not use rcx"); 5286 const Register s = rcx; // shift count 5287 const int n = BitsPerWord; 5288 Label L; 5289 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5290 cmpl(s, n); // if (s < n) 5291 jcc(Assembler::less, L); // else (s >= n) 5292 movl(lo, hi); // x := x >> n 5293 if (sign_extension) sarl(hi, 31); 5294 else xorl(hi, hi); 5295 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5296 bind(L); // s (mod n) < n 5297 shrdl(lo, hi); // x := x >> s 5298 if (sign_extension) sarl(hi); 5299 else shrl(hi); 5300 } 5301 5302 void MacroAssembler::movoop(Register dst, jobject obj) { 5303 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5304 } 5305 5306 void MacroAssembler::movoop(Address dst, jobject obj) { 5307 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5308 } 5309 5310 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5311 if (src.is_lval()) { 5312 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 5313 } else { 5314 movl(dst, as_Address(src)); 5315 } 5316 } 5317 5318 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5319 movl(as_Address(dst), src); 5320 } 5321 5322 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5323 movl(dst, as_Address(src)); 5324 } 5325 5326 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 5327 void MacroAssembler::movptr(Address dst, intptr_t src) { 5328 movl(dst, src); 5329 } 5330 5331 5332 void MacroAssembler::pop_callee_saved_registers() { 5333 pop(rcx); 5334 pop(rdx); 5335 pop(rdi); 5336 pop(rsi); 5337 } 5338 5339 void MacroAssembler::pop_fTOS() { 5340 fld_d(Address(rsp, 0)); 5341 addl(rsp, 2 * wordSize); 5342 } 5343 5344 void MacroAssembler::push_callee_saved_registers() { 5345 push(rsi); 5346 push(rdi); 5347 push(rdx); 5348 push(rcx); 5349 } 5350 5351 void MacroAssembler::push_fTOS() { 5352 subl(rsp, 2 * wordSize); 5353 fstp_d(Address(rsp, 0)); 5354 } 5355 5356 5357 void MacroAssembler::pushoop(jobject obj) { 5358 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 5359 } 5360 5361 5362 void MacroAssembler::pushptr(AddressLiteral src) { 5363 if (src.is_lval()) { 5364 push_literal32((int32_t)src.target(), src.rspec()); 5365 } else { 5366 pushl(as_Address(src)); 5367 } 5368 } 5369 5370 void MacroAssembler::set_word_if_not_zero(Register dst) { 5371 xorl(dst, dst); 5372 set_byte_if_not_zero(dst); 5373 } 5374 5375 static void pass_arg0(MacroAssembler* masm, Register arg) { 5376 masm->push(arg); 5377 } 5378 5379 static void pass_arg1(MacroAssembler* masm, Register arg) { 5380 masm->push(arg); 5381 } 5382 5383 static void pass_arg2(MacroAssembler* masm, Register arg) { 5384 masm->push(arg); 5385 } 5386 5387 static void pass_arg3(MacroAssembler* masm, Register arg) { 5388 masm->push(arg); 5389 } 5390 5391 #ifndef PRODUCT 5392 extern "C" void findpc(intptr_t x); 5393 #endif 5394 5395 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 5396 // In order to get locks to work, we need to fake a in_VM state 5397 JavaThread* thread = JavaThread::current(); 5398 JavaThreadState saved_state = thread->thread_state(); 5399 thread->set_thread_state(_thread_in_vm); 5400 if (ShowMessageBoxOnError) { 5401 JavaThread* thread = JavaThread::current(); 5402 JavaThreadState saved_state = thread->thread_state(); 5403 thread->set_thread_state(_thread_in_vm); 5404 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5405 ttyLocker ttyl; 5406 BytecodeCounter::print(); 5407 } 5408 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5409 // This is the value of eip which points to where verify_oop will return. 5410 if (os::message_box(msg, "Execution stopped, print registers?")) { 5411 ttyLocker ttyl; 5412 tty->print_cr("eip = 0x%08x", eip); 5413 #ifndef PRODUCT 5414 if ((WizardMode || Verbose) && PrintMiscellaneous) { 5415 tty->cr(); 5416 findpc(eip); 5417 tty->cr(); 5418 } 5419 #endif 5420 tty->print_cr("rax = 0x%08x", rax); 5421 tty->print_cr("rbx = 0x%08x", rbx); 5422 tty->print_cr("rcx = 0x%08x", rcx); 5423 tty->print_cr("rdx = 0x%08x", rdx); 5424 tty->print_cr("rdi = 0x%08x", rdi); 5425 tty->print_cr("rsi = 0x%08x", rsi); 5426 tty->print_cr("rbp = 0x%08x", rbp); 5427 tty->print_cr("rsp = 0x%08x", rsp); 5428 BREAKPOINT; 5429 assert(false, "start up GDB"); 5430 } 5431 } else { 5432 ttyLocker ttyl; 5433 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 5434 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 5435 } 5436 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5437 } 5438 5439 void MacroAssembler::stop(const char* msg) { 5440 ExternalAddress message((address)msg); 5441 // push address of message 5442 pushptr(message.addr()); 5443 { Label L; call(L, relocInfo::none); bind(L); } // push eip 5444 pusha(); // push registers 5445 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 5446 hlt(); 5447 } 5448 5449 void MacroAssembler::warn(const char* msg) { 5450 push_CPU_state(); 5451 5452 ExternalAddress message((address) msg); 5453 // push address of message 5454 pushptr(message.addr()); 5455 5456 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 5457 addl(rsp, wordSize); // discard argument 5458 pop_CPU_state(); 5459 } 5460 5461 #else // _LP64 5462 5463 // 64 bit versions 5464 5465 Address MacroAssembler::as_Address(AddressLiteral adr) { 5466 // amd64 always does this as a pc-rel 5467 // we can be absolute or disp based on the instruction type 5468 // jmp/call are displacements others are absolute 5469 assert(!adr.is_lval(), "must be rval"); 5470 assert(reachable(adr), "must be"); 5471 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 5472 5473 } 5474 5475 Address MacroAssembler::as_Address(ArrayAddress adr) { 5476 AddressLiteral base = adr.base(); 5477 lea(rscratch1, base); 5478 Address index = adr.index(); 5479 assert(index._disp == 0, "must not have disp"); // maybe it can? 5480 Address array(rscratch1, index._index, index._scale, index._disp); 5481 return array; 5482 } 5483 5484 int MacroAssembler::biased_locking_enter(Register lock_reg, 5485 Register obj_reg, 5486 Register swap_reg, 5487 Register tmp_reg, 5488 bool swap_reg_contains_mark, 5489 Label& done, 5490 Label* slow_case, 5491 BiasedLockingCounters* counters) { 5492 assert(UseBiasedLocking, "why call this otherwise?"); 5493 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 5494 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 5495 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 5496 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 5497 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 5498 Address saved_mark_addr(lock_reg, 0); 5499 5500 if (PrintBiasedLockingStatistics && counters == NULL) 5501 counters = BiasedLocking::counters(); 5502 5503 // Biased locking 5504 // See whether the lock is currently biased toward our thread and 5505 // whether the epoch is still valid 5506 // Note that the runtime guarantees sufficient alignment of JavaThread 5507 // pointers to allow age to be placed into low bits 5508 // First check to see whether biasing is even enabled for this object 5509 Label cas_label; 5510 int null_check_offset = -1; 5511 if (!swap_reg_contains_mark) { 5512 null_check_offset = offset(); 5513 movq(swap_reg, mark_addr); 5514 } 5515 movq(tmp_reg, swap_reg); 5516 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5517 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 5518 jcc(Assembler::notEqual, cas_label); 5519 // The bias pattern is present in the object's header. Need to check 5520 // whether the bias owner and the epoch are both still current. 5521 load_prototype_header(tmp_reg, obj_reg); 5522 orq(tmp_reg, r15_thread); 5523 xorq(tmp_reg, swap_reg); 5524 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 5525 if (counters != NULL) { 5526 cond_inc32(Assembler::zero, 5527 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5528 } 5529 jcc(Assembler::equal, done); 5530 5531 Label try_revoke_bias; 5532 Label try_rebias; 5533 5534 // At this point we know that the header has the bias pattern and 5535 // that we are not the bias owner in the current epoch. We need to 5536 // figure out more details about the state of the header in order to 5537 // know what operations can be legally performed on the object's 5538 // header. 5539 5540 // If the low three bits in the xor result aren't clear, that means 5541 // the prototype header is no longer biased and we have to revoke 5542 // the bias on this object. 5543 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5544 jcc(Assembler::notZero, try_revoke_bias); 5545 5546 // Biasing is still enabled for this data type. See whether the 5547 // epoch of the current bias is still valid, meaning that the epoch 5548 // bits of the mark word are equal to the epoch bits of the 5549 // prototype header. (Note that the prototype header's epoch bits 5550 // only change at a safepoint.) If not, attempt to rebias the object 5551 // toward the current thread. Note that we must be absolutely sure 5552 // that the current epoch is invalid in order to do this because 5553 // otherwise the manipulations it performs on the mark word are 5554 // illegal. 5555 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 5556 jcc(Assembler::notZero, try_rebias); 5557 5558 // The epoch of the current bias is still valid but we know nothing 5559 // about the owner; it might be set or it might be clear. Try to 5560 // acquire the bias of the object using an atomic operation. If this 5561 // fails we will go in to the runtime to revoke the object's bias. 5562 // Note that we first construct the presumed unbiased header so we 5563 // don't accidentally blow away another thread's valid bias. 5564 andq(swap_reg, 5565 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 5566 movq(tmp_reg, swap_reg); 5567 orq(tmp_reg, r15_thread); 5568 if (os::is_MP()) { 5569 lock(); 5570 } 5571 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5572 // If the biasing toward our thread failed, this means that 5573 // another thread succeeded in biasing it toward itself and we 5574 // need to revoke that bias. The revocation will occur in the 5575 // interpreter runtime in the slow case. 5576 if (counters != NULL) { 5577 cond_inc32(Assembler::zero, 5578 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5579 } 5580 if (slow_case != NULL) { 5581 jcc(Assembler::notZero, *slow_case); 5582 } 5583 jmp(done); 5584 5585 bind(try_rebias); 5586 // At this point we know the epoch has expired, meaning that the 5587 // current "bias owner", if any, is actually invalid. Under these 5588 // circumstances _only_, we are allowed to use the current header's 5589 // value as the comparison value when doing the cas to acquire the 5590 // bias in the current epoch. In other words, we allow transfer of 5591 // the bias from one thread to another directly in this situation. 5592 // 5593 // FIXME: due to a lack of registers we currently blow away the age 5594 // bits in this situation. Should attempt to preserve them. 5595 load_prototype_header(tmp_reg, obj_reg); 5596 orq(tmp_reg, r15_thread); 5597 if (os::is_MP()) { 5598 lock(); 5599 } 5600 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5601 // If the biasing toward our thread failed, then another thread 5602 // succeeded in biasing it toward itself and we need to revoke that 5603 // bias. The revocation will occur in the runtime in the slow case. 5604 if (counters != NULL) { 5605 cond_inc32(Assembler::zero, 5606 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 5607 } 5608 if (slow_case != NULL) { 5609 jcc(Assembler::notZero, *slow_case); 5610 } 5611 jmp(done); 5612 5613 bind(try_revoke_bias); 5614 // The prototype mark in the klass doesn't have the bias bit set any 5615 // more, indicating that objects of this data type are not supposed 5616 // to be biased any more. We are going to try to reset the mark of 5617 // this object to the prototype value and fall through to the 5618 // CAS-based locking scheme. Note that if our CAS fails, it means 5619 // that another thread raced us for the privilege of revoking the 5620 // bias of this particular object, so it's okay to continue in the 5621 // normal locking code. 5622 // 5623 // FIXME: due to a lack of registers we currently blow away the age 5624 // bits in this situation. Should attempt to preserve them. 5625 load_prototype_header(tmp_reg, obj_reg); 5626 if (os::is_MP()) { 5627 lock(); 5628 } 5629 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 5630 // Fall through to the normal CAS-based lock, because no matter what 5631 // the result of the above CAS, some thread must have succeeded in 5632 // removing the bias bit from the object's header. 5633 if (counters != NULL) { 5634 cond_inc32(Assembler::zero, 5635 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 5636 } 5637 5638 bind(cas_label); 5639 5640 return null_check_offset; 5641 } 5642 5643 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 5644 Label L, E; 5645 5646 #ifdef _WIN64 5647 // Windows always allocates space for it's register args 5648 assert(num_args <= 4, "only register arguments supported"); 5649 subq(rsp, frame::arg_reg_save_area_bytes); 5650 #endif 5651 5652 // Align stack if necessary 5653 testl(rsp, 15); 5654 jcc(Assembler::zero, L); 5655 5656 subq(rsp, 8); 5657 { 5658 call(RuntimeAddress(entry_point)); 5659 } 5660 addq(rsp, 8); 5661 jmp(E); 5662 5663 bind(L); 5664 { 5665 call(RuntimeAddress(entry_point)); 5666 } 5667 5668 bind(E); 5669 5670 #ifdef _WIN64 5671 // restore stack pointer 5672 addq(rsp, frame::arg_reg_save_area_bytes); 5673 #endif 5674 5675 } 5676 5677 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 5678 assert(!src2.is_lval(), "should use cmpptr"); 5679 5680 if (reachable(src2)) { 5681 cmpq(src1, as_Address(src2)); 5682 } else { 5683 lea(rscratch1, src2); 5684 Assembler::cmpq(src1, Address(rscratch1, 0)); 5685 } 5686 } 5687 5688 int MacroAssembler::corrected_idivq(Register reg) { 5689 // Full implementation of Java ldiv and lrem; checks for special 5690 // case as described in JVM spec., p.243 & p.271. The function 5691 // returns the (pc) offset of the idivl instruction - may be needed 5692 // for implicit exceptions. 5693 // 5694 // normal case special case 5695 // 5696 // input : rax: dividend min_long 5697 // reg: divisor (may not be eax/edx) -1 5698 // 5699 // output: rax: quotient (= rax idiv reg) min_long 5700 // rdx: remainder (= rax irem reg) 0 5701 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 5702 static const int64_t min_long = 0x8000000000000000; 5703 Label normal_case, special_case; 5704 5705 // check for special case 5706 cmp64(rax, ExternalAddress((address) &min_long)); 5707 jcc(Assembler::notEqual, normal_case); 5708 xorl(rdx, rdx); // prepare rdx for possible special case (where 5709 // remainder = 0) 5710 cmpq(reg, -1); 5711 jcc(Assembler::equal, special_case); 5712 5713 // handle normal case 5714 bind(normal_case); 5715 cdqq(); 5716 int idivq_offset = offset(); 5717 idivq(reg); 5718 5719 // normal and special case exit 5720 bind(special_case); 5721 5722 return idivq_offset; 5723 } 5724 5725 void MacroAssembler::decrementq(Register reg, int value) { 5726 if (value == min_jint) { subq(reg, value); return; } 5727 if (value < 0) { incrementq(reg, -value); return; } 5728 if (value == 0) { ; return; } 5729 if (value == 1 && UseIncDec) { decq(reg) ; return; } 5730 /* else */ { subq(reg, value) ; return; } 5731 } 5732 5733 void MacroAssembler::decrementq(Address dst, int value) { 5734 if (value == min_jint) { subq(dst, value); return; } 5735 if (value < 0) { incrementq(dst, -value); return; } 5736 if (value == 0) { ; return; } 5737 if (value == 1 && UseIncDec) { decq(dst) ; return; } 5738 /* else */ { subq(dst, value) ; return; } 5739 } 5740 5741 void MacroAssembler::incrementq(Register reg, int value) { 5742 if (value == min_jint) { addq(reg, value); return; } 5743 if (value < 0) { decrementq(reg, -value); return; } 5744 if (value == 0) { ; return; } 5745 if (value == 1 && UseIncDec) { incq(reg) ; return; } 5746 /* else */ { addq(reg, value) ; return; } 5747 } 5748 5749 void MacroAssembler::incrementq(Address dst, int value) { 5750 if (value == min_jint) { addq(dst, value); return; } 5751 if (value < 0) { decrementq(dst, -value); return; } 5752 if (value == 0) { ; return; } 5753 if (value == 1 && UseIncDec) { incq(dst) ; return; } 5754 /* else */ { addq(dst, value) ; return; } 5755 } 5756 5757 // 32bit can do a case table jump in one instruction but we no longer allow the base 5758 // to be installed in the Address class 5759 void MacroAssembler::jump(ArrayAddress entry) { 5760 lea(rscratch1, entry.base()); 5761 Address dispatch = entry.index(); 5762 assert(dispatch._base == noreg, "must be"); 5763 dispatch._base = rscratch1; 5764 jmp(dispatch); 5765 } 5766 5767 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5768 ShouldNotReachHere(); // 64bit doesn't use two regs 5769 cmpq(x_lo, y_lo); 5770 } 5771 5772 void MacroAssembler::lea(Register dst, AddressLiteral src) { 5773 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5774 } 5775 5776 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5777 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 5778 movptr(dst, rscratch1); 5779 } 5780 5781 void MacroAssembler::leave() { 5782 // %%% is this really better? Why not on 32bit too? 5783 emit_byte(0xC9); // LEAVE 5784 } 5785 5786 void MacroAssembler::lneg(Register hi, Register lo) { 5787 ShouldNotReachHere(); // 64bit doesn't use two regs 5788 negq(lo); 5789 } 5790 5791 void MacroAssembler::movoop(Register dst, jobject obj) { 5792 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5793 } 5794 5795 void MacroAssembler::movoop(Address dst, jobject obj) { 5796 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 5797 movq(dst, rscratch1); 5798 } 5799 5800 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5801 if (src.is_lval()) { 5802 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 5803 } else { 5804 if (reachable(src)) { 5805 movq(dst, as_Address(src)); 5806 } else { 5807 lea(rscratch1, src); 5808 movq(dst, Address(rscratch1,0)); 5809 } 5810 } 5811 } 5812 5813 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5814 movq(as_Address(dst), src); 5815 } 5816 5817 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5818 movq(dst, as_Address(src)); 5819 } 5820 5821 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 5822 void MacroAssembler::movptr(Address dst, intptr_t src) { 5823 mov64(rscratch1, src); 5824 movq(dst, rscratch1); 5825 } 5826 5827 // These are mostly for initializing NULL 5828 void MacroAssembler::movptr(Address dst, int32_t src) { 5829 movslq(dst, src); 5830 } 5831 5832 void MacroAssembler::movptr(Register dst, int32_t src) { 5833 mov64(dst, (intptr_t)src); 5834 } 5835 5836 void MacroAssembler::pushoop(jobject obj) { 5837 movoop(rscratch1, obj); 5838 push(rscratch1); 5839 } 5840 5841 void MacroAssembler::pushptr(AddressLiteral src) { 5842 lea(rscratch1, src); 5843 if (src.is_lval()) { 5844 push(rscratch1); 5845 } else { 5846 pushq(Address(rscratch1, 0)); 5847 } 5848 } 5849 5850 void MacroAssembler::reset_last_Java_frame(bool clear_fp, 5851 bool clear_pc) { 5852 // we must set sp to zero to clear frame 5853 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 5854 // must clear fp, so that compiled frames are not confused; it is 5855 // possible that we need it only for debugging 5856 if (clear_fp) { 5857 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 5858 } 5859 5860 if (clear_pc) { 5861 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 5862 } 5863 } 5864 5865 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 5866 Register last_java_fp, 5867 address last_java_pc) { 5868 // determine last_java_sp register 5869 if (!last_java_sp->is_valid()) { 5870 last_java_sp = rsp; 5871 } 5872 5873 // last_java_fp is optional 5874 if (last_java_fp->is_valid()) { 5875 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 5876 last_java_fp); 5877 } 5878 5879 // last_java_pc is optional 5880 if (last_java_pc != NULL) { 5881 Address java_pc(r15_thread, 5882 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 5883 lea(rscratch1, InternalAddress(last_java_pc)); 5884 movptr(java_pc, rscratch1); 5885 } 5886 5887 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 5888 } 5889 5890 static void pass_arg0(MacroAssembler* masm, Register arg) { 5891 if (c_rarg0 != arg ) { 5892 masm->mov(c_rarg0, arg); 5893 } 5894 } 5895 5896 static void pass_arg1(MacroAssembler* masm, Register arg) { 5897 if (c_rarg1 != arg ) { 5898 masm->mov(c_rarg1, arg); 5899 } 5900 } 5901 5902 static void pass_arg2(MacroAssembler* masm, Register arg) { 5903 if (c_rarg2 != arg ) { 5904 masm->mov(c_rarg2, arg); 5905 } 5906 } 5907 5908 static void pass_arg3(MacroAssembler* masm, Register arg) { 5909 if (c_rarg3 != arg ) { 5910 masm->mov(c_rarg3, arg); 5911 } 5912 } 5913 5914 void MacroAssembler::stop(const char* msg) { 5915 address rip = pc(); 5916 pusha(); // get regs on stack 5917 lea(c_rarg0, ExternalAddress((address) msg)); 5918 lea(c_rarg1, InternalAddress(rip)); 5919 movq(c_rarg2, rsp); // pass pointer to regs array 5920 andq(rsp, -16); // align stack as required by ABI 5921 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 5922 hlt(); 5923 } 5924 5925 void MacroAssembler::warn(const char* msg) { 5926 push(rsp); 5927 andq(rsp, -16); // align stack as required by push_CPU_state and call 5928 5929 push_CPU_state(); // keeps alignment at 16 bytes 5930 lea(c_rarg0, ExternalAddress((address) msg)); 5931 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 5932 pop_CPU_state(); 5933 pop(rsp); 5934 } 5935 5936 #ifndef PRODUCT 5937 extern "C" void findpc(intptr_t x); 5938 #endif 5939 5940 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 5941 // In order to get locks to work, we need to fake a in_VM state 5942 if (ShowMessageBoxOnError ) { 5943 JavaThread* thread = JavaThread::current(); 5944 JavaThreadState saved_state = thread->thread_state(); 5945 thread->set_thread_state(_thread_in_vm); 5946 #ifndef PRODUCT 5947 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5948 ttyLocker ttyl; 5949 BytecodeCounter::print(); 5950 } 5951 #endif 5952 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5953 // XXX correct this offset for amd64 5954 // This is the value of eip which points to where verify_oop will return. 5955 if (os::message_box(msg, "Execution stopped, print registers?")) { 5956 ttyLocker ttyl; 5957 tty->print_cr("rip = 0x%016lx", pc); 5958 #ifndef PRODUCT 5959 tty->cr(); 5960 findpc(pc); 5961 tty->cr(); 5962 #endif 5963 tty->print_cr("rax = 0x%016lx", regs[15]); 5964 tty->print_cr("rbx = 0x%016lx", regs[12]); 5965 tty->print_cr("rcx = 0x%016lx", regs[14]); 5966 tty->print_cr("rdx = 0x%016lx", regs[13]); 5967 tty->print_cr("rdi = 0x%016lx", regs[8]); 5968 tty->print_cr("rsi = 0x%016lx", regs[9]); 5969 tty->print_cr("rbp = 0x%016lx", regs[10]); 5970 tty->print_cr("rsp = 0x%016lx", regs[11]); 5971 tty->print_cr("r8 = 0x%016lx", regs[7]); 5972 tty->print_cr("r9 = 0x%016lx", regs[6]); 5973 tty->print_cr("r10 = 0x%016lx", regs[5]); 5974 tty->print_cr("r11 = 0x%016lx", regs[4]); 5975 tty->print_cr("r12 = 0x%016lx", regs[3]); 5976 tty->print_cr("r13 = 0x%016lx", regs[2]); 5977 tty->print_cr("r14 = 0x%016lx", regs[1]); 5978 tty->print_cr("r15 = 0x%016lx", regs[0]); 5979 BREAKPOINT; 5980 } 5981 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5982 } else { 5983 ttyLocker ttyl; 5984 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 5985 msg); 5986 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 5987 } 5988 } 5989 5990 #endif // _LP64 5991 5992 // Now versions that are common to 32/64 bit 5993 5994 void MacroAssembler::addptr(Register dst, int32_t imm32) { 5995 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 5996 } 5997 5998 void MacroAssembler::addptr(Register dst, Register src) { 5999 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 6000 } 6001 6002 void MacroAssembler::addptr(Address dst, Register src) { 6003 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 6004 } 6005 6006 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { 6007 if (reachable(src)) { 6008 Assembler::addsd(dst, as_Address(src)); 6009 } else { 6010 lea(rscratch1, src); 6011 Assembler::addsd(dst, Address(rscratch1, 0)); 6012 } 6013 } 6014 6015 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { 6016 if (reachable(src)) { 6017 addss(dst, as_Address(src)); 6018 } else { 6019 lea(rscratch1, src); 6020 addss(dst, Address(rscratch1, 0)); 6021 } 6022 } 6023 6024 void MacroAssembler::align(int modulus) { 6025 if (offset() % modulus != 0) { 6026 nop(modulus - (offset() % modulus)); 6027 } 6028 } 6029 6030 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 6031 // Used in sign-masking with aligned address. 6032 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 6033 if (reachable(src)) { 6034 Assembler::andpd(dst, as_Address(src)); 6035 } else { 6036 lea(rscratch1, src); 6037 Assembler::andpd(dst, Address(rscratch1, 0)); 6038 } 6039 } 6040 6041 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { 6042 // Used in sign-masking with aligned address. 6043 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 6044 if (reachable(src)) { 6045 Assembler::andps(dst, as_Address(src)); 6046 } else { 6047 lea(rscratch1, src); 6048 Assembler::andps(dst, Address(rscratch1, 0)); 6049 } 6050 } 6051 6052 void MacroAssembler::andptr(Register dst, int32_t imm32) { 6053 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 6054 } 6055 6056 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 6057 pushf(); 6058 if (os::is_MP()) 6059 lock(); 6060 incrementl(counter_addr); 6061 popf(); 6062 } 6063 6064 // Writes to stack successive pages until offset reached to check for 6065 // stack overflow + shadow pages. This clobbers tmp. 6066 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 6067 movptr(tmp, rsp); 6068 // Bang stack for total size given plus shadow page size. 6069 // Bang one page at a time because large size can bang beyond yellow and 6070 // red zones. 6071 Label loop; 6072 bind(loop); 6073 movl(Address(tmp, (-os::vm_page_size())), size ); 6074 subptr(tmp, os::vm_page_size()); 6075 subl(size, os::vm_page_size()); 6076 jcc(Assembler::greater, loop); 6077 6078 // Bang down shadow pages too. 6079 // The -1 because we already subtracted 1 page. 6080 for (int i = 0; i< StackShadowPages-1; i++) { 6081 // this could be any sized move but this is can be a debugging crumb 6082 // so the bigger the better. 6083 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 6084 } 6085 } 6086 6087 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 6088 assert(UseBiasedLocking, "why call this otherwise?"); 6089 6090 // Check for biased locking unlock case, which is a no-op 6091 // Note: we do not have to check the thread ID for two reasons. 6092 // First, the interpreter checks for IllegalMonitorStateException at 6093 // a higher level. Second, if the bias was revoked while we held the 6094 // lock, the object could not be rebiased toward another thread, so 6095 // the bias bit would be clear. 6096 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 6097 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 6098 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 6099 jcc(Assembler::equal, done); 6100 } 6101 6102 void MacroAssembler::c2bool(Register x) { 6103 // implements x == 0 ? 0 : 1 6104 // note: must only look at least-significant byte of x 6105 // since C-style booleans are stored in one byte 6106 // only! (was bug) 6107 andl(x, 0xFF); 6108 setb(Assembler::notZero, x); 6109 } 6110 6111 // Wouldn't need if AddressLiteral version had new name 6112 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 6113 Assembler::call(L, rtype); 6114 } 6115 6116 void MacroAssembler::call(Register entry) { 6117 Assembler::call(entry); 6118 } 6119 6120 void MacroAssembler::call(AddressLiteral entry) { 6121 if (reachable(entry)) { 6122 Assembler::call_literal(entry.target(), entry.rspec()); 6123 } else { 6124 lea(rscratch1, entry); 6125 Assembler::call(rscratch1); 6126 } 6127 } 6128 6129 // Implementation of call_VM versions 6130 6131 void MacroAssembler::call_VM(Register oop_result, 6132 address entry_point, 6133 bool check_exceptions) { 6134 Label C, E; 6135 call(C, relocInfo::none); 6136 jmp(E); 6137 6138 bind(C); 6139 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 6140 ret(0); 6141 6142 bind(E); 6143 } 6144 6145 void MacroAssembler::call_VM(Register oop_result, 6146 address entry_point, 6147 Register arg_1, 6148 bool check_exceptions) { 6149 Label C, E; 6150 call(C, relocInfo::none); 6151 jmp(E); 6152 6153 bind(C); 6154 pass_arg1(this, arg_1); 6155 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 6156 ret(0); 6157 6158 bind(E); 6159 } 6160 6161 void MacroAssembler::call_VM(Register oop_result, 6162 address entry_point, 6163 Register arg_1, 6164 Register arg_2, 6165 bool check_exceptions) { 6166 Label C, E; 6167 call(C, relocInfo::none); 6168 jmp(E); 6169 6170 bind(C); 6171 6172 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6173 6174 pass_arg2(this, arg_2); 6175 pass_arg1(this, arg_1); 6176 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 6177 ret(0); 6178 6179 bind(E); 6180 } 6181 6182 void MacroAssembler::call_VM(Register oop_result, 6183 address entry_point, 6184 Register arg_1, 6185 Register arg_2, 6186 Register arg_3, 6187 bool check_exceptions) { 6188 Label C, E; 6189 call(C, relocInfo::none); 6190 jmp(E); 6191 6192 bind(C); 6193 6194 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6195 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6196 pass_arg3(this, arg_3); 6197 6198 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6199 pass_arg2(this, arg_2); 6200 6201 pass_arg1(this, arg_1); 6202 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 6203 ret(0); 6204 6205 bind(E); 6206 } 6207 6208 void MacroAssembler::call_VM(Register oop_result, 6209 Register last_java_sp, 6210 address entry_point, 6211 int number_of_arguments, 6212 bool check_exceptions) { 6213 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6214 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6215 } 6216 6217 void MacroAssembler::call_VM(Register oop_result, 6218 Register last_java_sp, 6219 address entry_point, 6220 Register arg_1, 6221 bool check_exceptions) { 6222 pass_arg1(this, arg_1); 6223 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6224 } 6225 6226 void MacroAssembler::call_VM(Register oop_result, 6227 Register last_java_sp, 6228 address entry_point, 6229 Register arg_1, 6230 Register arg_2, 6231 bool check_exceptions) { 6232 6233 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6234 pass_arg2(this, arg_2); 6235 pass_arg1(this, arg_1); 6236 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6237 } 6238 6239 void MacroAssembler::call_VM(Register oop_result, 6240 Register last_java_sp, 6241 address entry_point, 6242 Register arg_1, 6243 Register arg_2, 6244 Register arg_3, 6245 bool check_exceptions) { 6246 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6247 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6248 pass_arg3(this, arg_3); 6249 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6250 pass_arg2(this, arg_2); 6251 pass_arg1(this, arg_1); 6252 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6253 } 6254 6255 void MacroAssembler::super_call_VM(Register oop_result, 6256 Register last_java_sp, 6257 address entry_point, 6258 int number_of_arguments, 6259 bool check_exceptions) { 6260 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6261 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6262 } 6263 6264 void MacroAssembler::super_call_VM(Register oop_result, 6265 Register last_java_sp, 6266 address entry_point, 6267 Register arg_1, 6268 bool check_exceptions) { 6269 pass_arg1(this, arg_1); 6270 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6271 } 6272 6273 void MacroAssembler::super_call_VM(Register oop_result, 6274 Register last_java_sp, 6275 address entry_point, 6276 Register arg_1, 6277 Register arg_2, 6278 bool check_exceptions) { 6279 6280 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6281 pass_arg2(this, arg_2); 6282 pass_arg1(this, arg_1); 6283 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6284 } 6285 6286 void MacroAssembler::super_call_VM(Register oop_result, 6287 Register last_java_sp, 6288 address entry_point, 6289 Register arg_1, 6290 Register arg_2, 6291 Register arg_3, 6292 bool check_exceptions) { 6293 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6294 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6295 pass_arg3(this, arg_3); 6296 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6297 pass_arg2(this, arg_2); 6298 pass_arg1(this, arg_1); 6299 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6300 } 6301 6302 void MacroAssembler::call_VM_base(Register oop_result, 6303 Register java_thread, 6304 Register last_java_sp, 6305 address entry_point, 6306 int number_of_arguments, 6307 bool check_exceptions) { 6308 // determine java_thread register 6309 if (!java_thread->is_valid()) { 6310 #ifdef _LP64 6311 java_thread = r15_thread; 6312 #else 6313 java_thread = rdi; 6314 get_thread(java_thread); 6315 #endif // LP64 6316 } 6317 // determine last_java_sp register 6318 if (!last_java_sp->is_valid()) { 6319 last_java_sp = rsp; 6320 } 6321 // debugging support 6322 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 6323 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 6324 #ifdef ASSERT 6325 // TraceBytecodes does not use r12 but saves it over the call, so don't verify 6326 // r12 is the heapbase. 6327 LP64_ONLY(if (UseCompressedOops && !TraceBytecodes) verify_heapbase("call_VM_base");) 6328 #endif // ASSERT 6329 6330 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 6331 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 6332 6333 // push java thread (becomes first argument of C function) 6334 6335 NOT_LP64(push(java_thread); number_of_arguments++); 6336 LP64_ONLY(mov(c_rarg0, r15_thread)); 6337 6338 // set last Java frame before call 6339 assert(last_java_sp != rbp, "can't use ebp/rbp"); 6340 6341 // Only interpreter should have to set fp 6342 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 6343 6344 // do the call, remove parameters 6345 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 6346 6347 // restore the thread (cannot use the pushed argument since arguments 6348 // may be overwritten by C code generated by an optimizing compiler); 6349 // however can use the register value directly if it is callee saved. 6350 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 6351 // rdi & rsi (also r15) are callee saved -> nothing to do 6352 #ifdef ASSERT 6353 guarantee(java_thread != rax, "change this code"); 6354 push(rax); 6355 { Label L; 6356 get_thread(rax); 6357 cmpptr(java_thread, rax); 6358 jcc(Assembler::equal, L); 6359 stop("MacroAssembler::call_VM_base: rdi not callee saved?"); 6360 bind(L); 6361 } 6362 pop(rax); 6363 #endif 6364 } else { 6365 get_thread(java_thread); 6366 } 6367 // reset last Java frame 6368 // Only interpreter should have to clear fp 6369 reset_last_Java_frame(java_thread, true, false); 6370 6371 #ifndef CC_INTERP 6372 // C++ interp handles this in the interpreter 6373 check_and_handle_popframe(java_thread); 6374 check_and_handle_earlyret(java_thread); 6375 #endif /* CC_INTERP */ 6376 6377 if (check_exceptions) { 6378 // check for pending exceptions (java_thread is set upon return) 6379 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 6380 #ifndef _LP64 6381 jump_cc(Assembler::notEqual, 6382 RuntimeAddress(StubRoutines::forward_exception_entry())); 6383 #else 6384 // This used to conditionally jump to forward_exception however it is 6385 // possible if we relocate that the branch will not reach. So we must jump 6386 // around so we can always reach 6387 6388 Label ok; 6389 jcc(Assembler::equal, ok); 6390 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 6391 bind(ok); 6392 #endif // LP64 6393 } 6394 6395 // get oop result if there is one and reset the value in the thread 6396 if (oop_result->is_valid()) { 6397 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 6398 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 6399 verify_oop(oop_result, "broken oop in call_VM_base"); 6400 } 6401 } 6402 6403 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 6404 6405 // Calculate the value for last_Java_sp 6406 // somewhat subtle. call_VM does an intermediate call 6407 // which places a return address on the stack just under the 6408 // stack pointer as the user finsihed with it. This allows 6409 // use to retrieve last_Java_pc from last_Java_sp[-1]. 6410 // On 32bit we then have to push additional args on the stack to accomplish 6411 // the actual requested call. On 64bit call_VM only can use register args 6412 // so the only extra space is the return address that call_VM created. 6413 // This hopefully explains the calculations here. 6414 6415 #ifdef _LP64 6416 // We've pushed one address, correct last_Java_sp 6417 lea(rax, Address(rsp, wordSize)); 6418 #else 6419 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 6420 #endif // LP64 6421 6422 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 6423 6424 } 6425 6426 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 6427 call_VM_leaf_base(entry_point, number_of_arguments); 6428 } 6429 6430 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 6431 pass_arg0(this, arg_0); 6432 call_VM_leaf(entry_point, 1); 6433 } 6434 6435 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 6436 6437 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6438 pass_arg1(this, arg_1); 6439 pass_arg0(this, arg_0); 6440 call_VM_leaf(entry_point, 2); 6441 } 6442 6443 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 6444 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6445 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6446 pass_arg2(this, arg_2); 6447 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6448 pass_arg1(this, arg_1); 6449 pass_arg0(this, arg_0); 6450 call_VM_leaf(entry_point, 3); 6451 } 6452 6453 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 6454 pass_arg0(this, arg_0); 6455 MacroAssembler::call_VM_leaf_base(entry_point, 1); 6456 } 6457 6458 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 6459 6460 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6461 pass_arg1(this, arg_1); 6462 pass_arg0(this, arg_0); 6463 MacroAssembler::call_VM_leaf_base(entry_point, 2); 6464 } 6465 6466 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 6467 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6468 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6469 pass_arg2(this, arg_2); 6470 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6471 pass_arg1(this, arg_1); 6472 pass_arg0(this, arg_0); 6473 MacroAssembler::call_VM_leaf_base(entry_point, 3); 6474 } 6475 6476 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 6477 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); 6478 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6479 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6480 pass_arg3(this, arg_3); 6481 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6482 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6483 pass_arg2(this, arg_2); 6484 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6485 pass_arg1(this, arg_1); 6486 pass_arg0(this, arg_0); 6487 MacroAssembler::call_VM_leaf_base(entry_point, 4); 6488 } 6489 6490 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 6491 } 6492 6493 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 6494 } 6495 6496 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 6497 if (reachable(src1)) { 6498 cmpl(as_Address(src1), imm); 6499 } else { 6500 lea(rscratch1, src1); 6501 cmpl(Address(rscratch1, 0), imm); 6502 } 6503 } 6504 6505 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 6506 assert(!src2.is_lval(), "use cmpptr"); 6507 if (reachable(src2)) { 6508 cmpl(src1, as_Address(src2)); 6509 } else { 6510 lea(rscratch1, src2); 6511 cmpl(src1, Address(rscratch1, 0)); 6512 } 6513 } 6514 6515 void MacroAssembler::cmp32(Register src1, int32_t imm) { 6516 Assembler::cmpl(src1, imm); 6517 } 6518 6519 void MacroAssembler::cmp32(Register src1, Address src2) { 6520 Assembler::cmpl(src1, src2); 6521 } 6522 6523 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6524 ucomisd(opr1, opr2); 6525 6526 Label L; 6527 if (unordered_is_less) { 6528 movl(dst, -1); 6529 jcc(Assembler::parity, L); 6530 jcc(Assembler::below , L); 6531 movl(dst, 0); 6532 jcc(Assembler::equal , L); 6533 increment(dst); 6534 } else { // unordered is greater 6535 movl(dst, 1); 6536 jcc(Assembler::parity, L); 6537 jcc(Assembler::above , L); 6538 movl(dst, 0); 6539 jcc(Assembler::equal , L); 6540 decrementl(dst); 6541 } 6542 bind(L); 6543 } 6544 6545 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6546 ucomiss(opr1, opr2); 6547 6548 Label L; 6549 if (unordered_is_less) { 6550 movl(dst, -1); 6551 jcc(Assembler::parity, L); 6552 jcc(Assembler::below , L); 6553 movl(dst, 0); 6554 jcc(Assembler::equal , L); 6555 increment(dst); 6556 } else { // unordered is greater 6557 movl(dst, 1); 6558 jcc(Assembler::parity, L); 6559 jcc(Assembler::above , L); 6560 movl(dst, 0); 6561 jcc(Assembler::equal , L); 6562 decrementl(dst); 6563 } 6564 bind(L); 6565 } 6566 6567 6568 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 6569 if (reachable(src1)) { 6570 cmpb(as_Address(src1), imm); 6571 } else { 6572 lea(rscratch1, src1); 6573 cmpb(Address(rscratch1, 0), imm); 6574 } 6575 } 6576 6577 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 6578 #ifdef _LP64 6579 if (src2.is_lval()) { 6580 movptr(rscratch1, src2); 6581 Assembler::cmpq(src1, rscratch1); 6582 } else if (reachable(src2)) { 6583 cmpq(src1, as_Address(src2)); 6584 } else { 6585 lea(rscratch1, src2); 6586 Assembler::cmpq(src1, Address(rscratch1, 0)); 6587 } 6588 #else 6589 if (src2.is_lval()) { 6590 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 6591 } else { 6592 cmpl(src1, as_Address(src2)); 6593 } 6594 #endif // _LP64 6595 } 6596 6597 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 6598 assert(src2.is_lval(), "not a mem-mem compare"); 6599 #ifdef _LP64 6600 // moves src2's literal address 6601 movptr(rscratch1, src2); 6602 Assembler::cmpq(src1, rscratch1); 6603 #else 6604 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 6605 #endif // _LP64 6606 } 6607 6608 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 6609 if (reachable(adr)) { 6610 if (os::is_MP()) 6611 lock(); 6612 cmpxchgptr(reg, as_Address(adr)); 6613 } else { 6614 lea(rscratch1, adr); 6615 if (os::is_MP()) 6616 lock(); 6617 cmpxchgptr(reg, Address(rscratch1, 0)); 6618 } 6619 } 6620 6621 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 6622 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 6623 } 6624 6625 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 6626 if (reachable(src)) { 6627 Assembler::comisd(dst, as_Address(src)); 6628 } else { 6629 lea(rscratch1, src); 6630 Assembler::comisd(dst, Address(rscratch1, 0)); 6631 } 6632 } 6633 6634 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 6635 if (reachable(src)) { 6636 Assembler::comiss(dst, as_Address(src)); 6637 } else { 6638 lea(rscratch1, src); 6639 Assembler::comiss(dst, Address(rscratch1, 0)); 6640 } 6641 } 6642 6643 6644 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 6645 Condition negated_cond = negate_condition(cond); 6646 Label L; 6647 jcc(negated_cond, L); 6648 atomic_incl(counter_addr); 6649 bind(L); 6650 } 6651 6652 int MacroAssembler::corrected_idivl(Register reg) { 6653 // Full implementation of Java idiv and irem; checks for 6654 // special case as described in JVM spec., p.243 & p.271. 6655 // The function returns the (pc) offset of the idivl 6656 // instruction - may be needed for implicit exceptions. 6657 // 6658 // normal case special case 6659 // 6660 // input : rax,: dividend min_int 6661 // reg: divisor (may not be rax,/rdx) -1 6662 // 6663 // output: rax,: quotient (= rax, idiv reg) min_int 6664 // rdx: remainder (= rax, irem reg) 0 6665 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 6666 const int min_int = 0x80000000; 6667 Label normal_case, special_case; 6668 6669 // check for special case 6670 cmpl(rax, min_int); 6671 jcc(Assembler::notEqual, normal_case); 6672 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 6673 cmpl(reg, -1); 6674 jcc(Assembler::equal, special_case); 6675 6676 // handle normal case 6677 bind(normal_case); 6678 cdql(); 6679 int idivl_offset = offset(); 6680 idivl(reg); 6681 6682 // normal and special case exit 6683 bind(special_case); 6684 6685 return idivl_offset; 6686 } 6687 6688 6689 6690 void MacroAssembler::decrementl(Register reg, int value) { 6691 if (value == min_jint) {subl(reg, value) ; return; } 6692 if (value < 0) { incrementl(reg, -value); return; } 6693 if (value == 0) { ; return; } 6694 if (value == 1 && UseIncDec) { decl(reg) ; return; } 6695 /* else */ { subl(reg, value) ; return; } 6696 } 6697 6698 void MacroAssembler::decrementl(Address dst, int value) { 6699 if (value == min_jint) {subl(dst, value) ; return; } 6700 if (value < 0) { incrementl(dst, -value); return; } 6701 if (value == 0) { ; return; } 6702 if (value == 1 && UseIncDec) { decl(dst) ; return; } 6703 /* else */ { subl(dst, value) ; return; } 6704 } 6705 6706 void MacroAssembler::division_with_shift (Register reg, int shift_value) { 6707 assert (shift_value > 0, "illegal shift value"); 6708 Label _is_positive; 6709 testl (reg, reg); 6710 jcc (Assembler::positive, _is_positive); 6711 int offset = (1 << shift_value) - 1 ; 6712 6713 if (offset == 1) { 6714 incrementl(reg); 6715 } else { 6716 addl(reg, offset); 6717 } 6718 6719 bind (_is_positive); 6720 sarl(reg, shift_value); 6721 } 6722 6723 void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { 6724 if (reachable(src)) { 6725 Assembler::divsd(dst, as_Address(src)); 6726 } else { 6727 lea(rscratch1, src); 6728 Assembler::divsd(dst, Address(rscratch1, 0)); 6729 } 6730 } 6731 6732 void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { 6733 if (reachable(src)) { 6734 Assembler::divss(dst, as_Address(src)); 6735 } else { 6736 lea(rscratch1, src); 6737 Assembler::divss(dst, Address(rscratch1, 0)); 6738 } 6739 } 6740 6741 // !defined(COMPILER2) is because of stupid core builds 6742 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 6743 void MacroAssembler::empty_FPU_stack() { 6744 if (VM_Version::supports_mmx()) { 6745 emms(); 6746 } else { 6747 for (int i = 8; i-- > 0; ) ffree(i); 6748 } 6749 } 6750 #endif // !LP64 || C1 || !C2 6751 6752 6753 // Defines obj, preserves var_size_in_bytes 6754 void MacroAssembler::eden_allocate(Register obj, 6755 Register var_size_in_bytes, 6756 int con_size_in_bytes, 6757 Register t1, 6758 Label& slow_case) { 6759 assert(obj == rax, "obj must be in rax, for cmpxchg"); 6760 assert_different_registers(obj, var_size_in_bytes, t1); 6761 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 6762 jmp(slow_case); 6763 } else { 6764 Register end = t1; 6765 Label retry; 6766 bind(retry); 6767 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 6768 movptr(obj, heap_top); 6769 if (var_size_in_bytes == noreg) { 6770 lea(end, Address(obj, con_size_in_bytes)); 6771 } else { 6772 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 6773 } 6774 // if end < obj then we wrapped around => object too long => slow case 6775 cmpptr(end, obj); 6776 jcc(Assembler::below, slow_case); 6777 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 6778 jcc(Assembler::above, slow_case); 6779 // Compare obj with the top addr, and if still equal, store the new top addr in 6780 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 6781 // it otherwise. Use lock prefix for atomicity on MPs. 6782 locked_cmpxchgptr(end, heap_top); 6783 jcc(Assembler::notEqual, retry); 6784 } 6785 } 6786 6787 void MacroAssembler::enter() { 6788 push(rbp); 6789 mov(rbp, rsp); 6790 } 6791 6792 // A 5 byte nop that is safe for patching (see patch_verified_entry) 6793 void MacroAssembler::fat_nop() { 6794 if (UseAddressNop) { 6795 addr_nop_5(); 6796 } else { 6797 emit_byte(0x26); // es: 6798 emit_byte(0x2e); // cs: 6799 emit_byte(0x64); // fs: 6800 emit_byte(0x65); // gs: 6801 emit_byte(0x90); 6802 } 6803 } 6804 6805 void MacroAssembler::fcmp(Register tmp) { 6806 fcmp(tmp, 1, true, true); 6807 } 6808 6809 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 6810 assert(!pop_right || pop_left, "usage error"); 6811 if (VM_Version::supports_cmov()) { 6812 assert(tmp == noreg, "unneeded temp"); 6813 if (pop_left) { 6814 fucomip(index); 6815 } else { 6816 fucomi(index); 6817 } 6818 if (pop_right) { 6819 fpop(); 6820 } 6821 } else { 6822 assert(tmp != noreg, "need temp"); 6823 if (pop_left) { 6824 if (pop_right) { 6825 fcompp(); 6826 } else { 6827 fcomp(index); 6828 } 6829 } else { 6830 fcom(index); 6831 } 6832 // convert FPU condition into eflags condition via rax, 6833 save_rax(tmp); 6834 fwait(); fnstsw_ax(); 6835 sahf(); 6836 restore_rax(tmp); 6837 } 6838 // condition codes set as follows: 6839 // 6840 // CF (corresponds to C0) if x < y 6841 // PF (corresponds to C2) if unordered 6842 // ZF (corresponds to C3) if x = y 6843 } 6844 6845 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 6846 fcmp2int(dst, unordered_is_less, 1, true, true); 6847 } 6848 6849 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 6850 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 6851 Label L; 6852 if (unordered_is_less) { 6853 movl(dst, -1); 6854 jcc(Assembler::parity, L); 6855 jcc(Assembler::below , L); 6856 movl(dst, 0); 6857 jcc(Assembler::equal , L); 6858 increment(dst); 6859 } else { // unordered is greater 6860 movl(dst, 1); 6861 jcc(Assembler::parity, L); 6862 jcc(Assembler::above , L); 6863 movl(dst, 0); 6864 jcc(Assembler::equal , L); 6865 decrementl(dst); 6866 } 6867 bind(L); 6868 } 6869 6870 void MacroAssembler::fld_d(AddressLiteral src) { 6871 fld_d(as_Address(src)); 6872 } 6873 6874 void MacroAssembler::fld_s(AddressLiteral src) { 6875 fld_s(as_Address(src)); 6876 } 6877 6878 void MacroAssembler::fld_x(AddressLiteral src) { 6879 Assembler::fld_x(as_Address(src)); 6880 } 6881 6882 void MacroAssembler::fldcw(AddressLiteral src) { 6883 Assembler::fldcw(as_Address(src)); 6884 } 6885 6886 void MacroAssembler::pow_exp_core_encoding() { 6887 // kills rax, rcx, rdx 6888 subptr(rsp,sizeof(jdouble)); 6889 // computes 2^X. Stack: X ... 6890 // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and 6891 // keep it on the thread's stack to compute 2^int(X) later 6892 // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) 6893 // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) 6894 fld_s(0); // Stack: X X ... 6895 frndint(); // Stack: int(X) X ... 6896 fsuba(1); // Stack: int(X) X-int(X) ... 6897 fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... 6898 f2xm1(); // Stack: 2^(X-int(X))-1 ... 6899 fld1(); // Stack: 1 2^(X-int(X))-1 ... 6900 faddp(1); // Stack: 2^(X-int(X)) 6901 // computes 2^(int(X)): add exponent bias (1023) to int(X), then 6902 // shift int(X)+1023 to exponent position. 6903 // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 6904 // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent 6905 // values so detect them and set result to NaN. 6906 movl(rax,Address(rsp,0)); 6907 movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding 6908 addl(rax, 1023); 6909 movl(rdx,rax); 6910 shll(rax,20); 6911 // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. 6912 addl(rdx,1); 6913 // Check that 1 < int(X)+1023+1 < 2048 6914 // in 3 steps: 6915 // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 6916 // 2- (int(X)+1023+1)&-2048 != 0 6917 // 3- (int(X)+1023+1)&-2048 != 1 6918 // Do 2- first because addl just updated the flags. 6919 cmov32(Assembler::equal,rax,rcx); 6920 cmpl(rdx,1); 6921 cmov32(Assembler::equal,rax,rcx); 6922 testl(rdx,rcx); 6923 cmov32(Assembler::notEqual,rax,rcx); 6924 movl(Address(rsp,4),rax); 6925 movl(Address(rsp,0),0); 6926 fmul_d(Address(rsp,0)); // Stack: 2^X ... 6927 addptr(rsp,sizeof(jdouble)); 6928 } 6929 6930 void MacroAssembler::fast_pow() { 6931 // computes X^Y = 2^(Y * log2(X)) 6932 // if fast computation is not possible, result is NaN. Requires 6933 // fallback from user of this macro. 6934 fyl2x(); // Stack: (Y*log2(X)) ... 6935 pow_exp_core_encoding(); // Stack: exp(X) ... 6936 } 6937 6938 void MacroAssembler::fast_exp() { 6939 // computes exp(X) = 2^(X * log2(e)) 6940 // if fast computation is not possible, result is NaN. Requires 6941 // fallback from user of this macro. 6942 fldl2e(); // Stack: log2(e) X ... 6943 fmulp(1); // Stack: (X*log2(e)) ... 6944 pow_exp_core_encoding(); // Stack: exp(X) ... 6945 } 6946 6947 void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { 6948 // kills rax, rcx, rdx 6949 // pow and exp needs 2 extra registers on the fpu stack. 6950 Label slow_case, done; 6951 Register tmp = noreg; 6952 if (!VM_Version::supports_cmov()) { 6953 // fcmp needs a temporary so preserve rdx, 6954 tmp = rdx; 6955 } 6956 Register tmp2 = rax; 6957 NOT_LP64(Register tmp3 = rcx;) 6958 6959 if (is_exp) { 6960 // Stack: X 6961 fld_s(0); // duplicate argument for runtime call. Stack: X X 6962 fast_exp(); // Stack: exp(X) X 6963 fcmp(tmp, 0, false, false); // Stack: exp(X) X 6964 // exp(X) not equal to itself: exp(X) is NaN go to slow case. 6965 jcc(Assembler::parity, slow_case); 6966 // get rid of duplicate argument. Stack: exp(X) 6967 if (num_fpu_regs_in_use > 0) { 6968 fxch(); 6969 fpop(); 6970 } else { 6971 ffree(1); 6972 } 6973 jmp(done); 6974 } else { 6975 // Stack: X Y 6976 Label x_negative, y_odd; 6977 6978 fldz(); // Stack: 0 X Y 6979 fcmp(tmp, 1, true, false); // Stack: X Y 6980 jcc(Assembler::above, x_negative); 6981 6982 // X >= 0 6983 6984 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 6985 fld_s(1); // Stack: X Y X Y 6986 fast_pow(); // Stack: X^Y X Y 6987 fcmp(tmp, 0, false, false); // Stack: X^Y X Y 6988 // X^Y not equal to itself: X^Y is NaN go to slow case. 6989 jcc(Assembler::parity, slow_case); 6990 // get rid of duplicate arguments. Stack: X^Y 6991 if (num_fpu_regs_in_use > 0) { 6992 fxch(); fpop(); 6993 fxch(); fpop(); 6994 } else { 6995 ffree(2); 6996 ffree(1); 6997 } 6998 jmp(done); 6999 7000 // X <= 0 7001 bind(x_negative); 7002 7003 fld_s(1); // Stack: Y X Y 7004 frndint(); // Stack: int(Y) X Y 7005 fcmp(tmp, 2, false, false); // Stack: int(Y) X Y 7006 jcc(Assembler::notEqual, slow_case); 7007 7008 subptr(rsp, 8); 7009 7010 // For X^Y, when X < 0, Y has to be an integer and the final 7011 // result depends on whether it's odd or even. We just checked 7012 // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit 7013 // integer to test its parity. If int(Y) is huge and doesn't fit 7014 // in the 64 bit integer range, the integer indefinite value will 7015 // end up in the gp registers. Huge numbers are all even, the 7016 // integer indefinite number is even so it's fine. 7017 7018 #ifdef ASSERT 7019 // Let's check we don't end up with an integer indefinite number 7020 // when not expected. First test for huge numbers: check whether 7021 // int(Y)+1 == int(Y) which is true for very large numbers and 7022 // those are all even. A 64 bit integer is guaranteed to not 7023 // overflow for numbers where y+1 != y (when precision is set to 7024 // double precision). 7025 Label y_not_huge; 7026 7027 fld1(); // Stack: 1 int(Y) X Y 7028 fadd(1); // Stack: 1+int(Y) int(Y) X Y 7029 7030 #ifdef _LP64 7031 // trip to memory to force the precision down from double extended 7032 // precision 7033 fstp_d(Address(rsp, 0)); 7034 fld_d(Address(rsp, 0)); 7035 #endif 7036 7037 fcmp(tmp, 1, true, false); // Stack: int(Y) X Y 7038 #endif 7039 7040 // move int(Y) as 64 bit integer to thread's stack 7041 fistp_d(Address(rsp,0)); // Stack: X Y 7042 7043 #ifdef ASSERT 7044 jcc(Assembler::notEqual, y_not_huge); 7045 7046 // Y is huge so we know it's even. It may not fit in a 64 bit 7047 // integer and we don't want the debug code below to see the 7048 // integer indefinite value so overwrite int(Y) on the thread's 7049 // stack with 0. 7050 movl(Address(rsp, 0), 0); 7051 movl(Address(rsp, 4), 0); 7052 7053 bind(y_not_huge); 7054 #endif 7055 7056 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 7057 fld_s(1); // Stack: X Y X Y 7058 fabs(); // Stack: abs(X) Y X Y 7059 fast_pow(); // Stack: abs(X)^Y X Y 7060 fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y 7061 // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. 7062 7063 pop(tmp2); 7064 NOT_LP64(pop(tmp3)); 7065 jcc(Assembler::parity, slow_case); 7066 7067 #ifdef ASSERT 7068 // Check that int(Y) is not integer indefinite value (int 7069 // overflow). Shouldn't happen because for values that would 7070 // overflow, 1+int(Y)==Y which was tested earlier. 7071 #ifndef _LP64 7072 { 7073 Label integer; 7074 testl(tmp2, tmp2); 7075 jcc(Assembler::notZero, integer); 7076 cmpl(tmp3, 0x80000000); 7077 jcc(Assembler::notZero, integer); 7078 stop("integer indefinite value shouldn't be seen here"); 7079 bind(integer); 7080 } 7081 #else 7082 { 7083 Label integer; 7084 shlq(tmp2, 1); 7085 jcc(Assembler::carryClear, integer); 7086 jcc(Assembler::notZero, integer); 7087 stop("integer indefinite value shouldn't be seen here"); 7088 bind(integer); 7089 } 7090 #endif 7091 #endif 7092 7093 // get rid of duplicate arguments. Stack: X^Y 7094 if (num_fpu_regs_in_use > 0) { 7095 fxch(); fpop(); 7096 fxch(); fpop(); 7097 } else { 7098 ffree(2); 7099 ffree(1); 7100 } 7101 7102 testl(tmp2, 1); 7103 jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y 7104 // X <= 0, Y even: X^Y = -abs(X)^Y 7105 7106 fchs(); // Stack: -abs(X)^Y Y 7107 jmp(done); 7108 } 7109 7110 // slow case: runtime call 7111 bind(slow_case); 7112 7113 fpop(); // pop incorrect result or int(Y) 7114 7115 fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 7116 is_exp ? 1 : 2, num_fpu_regs_in_use); 7117 7118 // Come here with result in F-TOS 7119 bind(done); 7120 } 7121 7122 void MacroAssembler::fpop() { 7123 ffree(); 7124 fincstp(); 7125 } 7126 7127 void MacroAssembler::fremr(Register tmp) { 7128 save_rax(tmp); 7129 { Label L; 7130 bind(L); 7131 fprem(); 7132 fwait(); fnstsw_ax(); 7133 #ifdef _LP64 7134 testl(rax, 0x400); 7135 jcc(Assembler::notEqual, L); 7136 #else 7137 sahf(); 7138 jcc(Assembler::parity, L); 7139 #endif // _LP64 7140 } 7141 restore_rax(tmp); 7142 // Result is in ST0. 7143 // Note: fxch & fpop to get rid of ST1 7144 // (otherwise FPU stack could overflow eventually) 7145 fxch(1); 7146 fpop(); 7147 } 7148 7149 7150 void MacroAssembler::incrementl(AddressLiteral dst) { 7151 if (reachable(dst)) { 7152 incrementl(as_Address(dst)); 7153 } else { 7154 lea(rscratch1, dst); 7155 incrementl(Address(rscratch1, 0)); 7156 } 7157 } 7158 7159 void MacroAssembler::incrementl(ArrayAddress dst) { 7160 incrementl(as_Address(dst)); 7161 } 7162 7163 void MacroAssembler::incrementl(Register reg, int value) { 7164 if (value == min_jint) {addl(reg, value) ; return; } 7165 if (value < 0) { decrementl(reg, -value); return; } 7166 if (value == 0) { ; return; } 7167 if (value == 1 && UseIncDec) { incl(reg) ; return; } 7168 /* else */ { addl(reg, value) ; return; } 7169 } 7170 7171 void MacroAssembler::incrementl(Address dst, int value) { 7172 if (value == min_jint) {addl(dst, value) ; return; } 7173 if (value < 0) { decrementl(dst, -value); return; } 7174 if (value == 0) { ; return; } 7175 if (value == 1 && UseIncDec) { incl(dst) ; return; } 7176 /* else */ { addl(dst, value) ; return; } 7177 } 7178 7179 void MacroAssembler::jump(AddressLiteral dst) { 7180 if (reachable(dst)) { 7181 jmp_literal(dst.target(), dst.rspec()); 7182 } else { 7183 lea(rscratch1, dst); 7184 jmp(rscratch1); 7185 } 7186 } 7187 7188 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 7189 if (reachable(dst)) { 7190 InstructionMark im(this); 7191 relocate(dst.reloc()); 7192 const int short_size = 2; 7193 const int long_size = 6; 7194 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos); 7195 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 7196 // 0111 tttn #8-bit disp 7197 emit_byte(0x70 | cc); 7198 emit_byte((offs - short_size) & 0xFF); 7199 } else { 7200 // 0000 1111 1000 tttn #32-bit disp 7201 emit_byte(0x0F); 7202 emit_byte(0x80 | cc); 7203 emit_long(offs - long_size); 7204 } 7205 } else { 7206 #ifdef ASSERT 7207 warning("reversing conditional branch"); 7208 #endif /* ASSERT */ 7209 Label skip; 7210 jccb(reverse[cc], skip); 7211 lea(rscratch1, dst); 7212 Assembler::jmp(rscratch1); 7213 bind(skip); 7214 } 7215 } 7216 7217 void MacroAssembler::ldmxcsr(AddressLiteral src) { 7218 if (reachable(src)) { 7219 Assembler::ldmxcsr(as_Address(src)); 7220 } else { 7221 lea(rscratch1, src); 7222 Assembler::ldmxcsr(Address(rscratch1, 0)); 7223 } 7224 } 7225 7226 int MacroAssembler::load_signed_byte(Register dst, Address src) { 7227 int off; 7228 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7229 off = offset(); 7230 movsbl(dst, src); // movsxb 7231 } else { 7232 off = load_unsigned_byte(dst, src); 7233 shll(dst, 24); 7234 sarl(dst, 24); 7235 } 7236 return off; 7237 } 7238 7239 // Note: load_signed_short used to be called load_signed_word. 7240 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler 7241 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. 7242 // The term "word" in HotSpot means a 32- or 64-bit machine word. 7243 int MacroAssembler::load_signed_short(Register dst, Address src) { 7244 int off; 7245 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7246 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 7247 // version but this is what 64bit has always done. This seems to imply 7248 // that users are only using 32bits worth. 7249 off = offset(); 7250 movswl(dst, src); // movsxw 7251 } else { 7252 off = load_unsigned_short(dst, src); 7253 shll(dst, 16); 7254 sarl(dst, 16); 7255 } 7256 return off; 7257 } 7258 7259 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 7260 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 7261 // and "3.9 Partial Register Penalties", p. 22). 7262 int off; 7263 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 7264 off = offset(); 7265 movzbl(dst, src); // movzxb 7266 } else { 7267 xorl(dst, dst); 7268 off = offset(); 7269 movb(dst, src); 7270 } 7271 return off; 7272 } 7273 7274 // Note: load_unsigned_short used to be called load_unsigned_word. 7275 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 7276 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 7277 // and "3.9 Partial Register Penalties", p. 22). 7278 int off; 7279 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 7280 off = offset(); 7281 movzwl(dst, src); // movzxw 7282 } else { 7283 xorl(dst, dst); 7284 off = offset(); 7285 movw(dst, src); 7286 } 7287 return off; 7288 } 7289 7290 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 7291 switch (size_in_bytes) { 7292 #ifndef _LP64 7293 case 8: 7294 assert(dst2 != noreg, "second dest register required"); 7295 movl(dst, src); 7296 movl(dst2, src.plus_disp(BytesPerInt)); 7297 break; 7298 #else 7299 case 8: movq(dst, src); break; 7300 #endif 7301 case 4: movl(dst, src); break; 7302 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 7303 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 7304 default: ShouldNotReachHere(); 7305 } 7306 } 7307 7308 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 7309 switch (size_in_bytes) { 7310 #ifndef _LP64 7311 case 8: 7312 assert(src2 != noreg, "second source register required"); 7313 movl(dst, src); 7314 movl(dst.plus_disp(BytesPerInt), src2); 7315 break; 7316 #else 7317 case 8: movq(dst, src); break; 7318 #endif 7319 case 4: movl(dst, src); break; 7320 case 2: movw(dst, src); break; 7321 case 1: movb(dst, src); break; 7322 default: ShouldNotReachHere(); 7323 } 7324 } 7325 7326 void MacroAssembler::mov32(AddressLiteral dst, Register src) { 7327 if (reachable(dst)) { 7328 movl(as_Address(dst), src); 7329 } else { 7330 lea(rscratch1, dst); 7331 movl(Address(rscratch1, 0), src); 7332 } 7333 } 7334 7335 void MacroAssembler::mov32(Register dst, AddressLiteral src) { 7336 if (reachable(src)) { 7337 movl(dst, as_Address(src)); 7338 } else { 7339 lea(rscratch1, src); 7340 movl(dst, Address(rscratch1, 0)); 7341 } 7342 } 7343 7344 // C++ bool manipulation 7345 7346 void MacroAssembler::movbool(Register dst, Address src) { 7347 if(sizeof(bool) == 1) 7348 movb(dst, src); 7349 else if(sizeof(bool) == 2) 7350 movw(dst, src); 7351 else if(sizeof(bool) == 4) 7352 movl(dst, src); 7353 else 7354 // unsupported 7355 ShouldNotReachHere(); 7356 } 7357 7358 void MacroAssembler::movbool(Address dst, bool boolconst) { 7359 if(sizeof(bool) == 1) 7360 movb(dst, (int) boolconst); 7361 else if(sizeof(bool) == 2) 7362 movw(dst, (int) boolconst); 7363 else if(sizeof(bool) == 4) 7364 movl(dst, (int) boolconst); 7365 else 7366 // unsupported 7367 ShouldNotReachHere(); 7368 } 7369 7370 void MacroAssembler::movbool(Address dst, Register src) { 7371 if(sizeof(bool) == 1) 7372 movb(dst, src); 7373 else if(sizeof(bool) == 2) 7374 movw(dst, src); 7375 else if(sizeof(bool) == 4) 7376 movl(dst, src); 7377 else 7378 // unsupported 7379 ShouldNotReachHere(); 7380 } 7381 7382 void MacroAssembler::movbyte(ArrayAddress dst, int src) { 7383 movb(as_Address(dst), src); 7384 } 7385 7386 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 7387 if (reachable(src)) { 7388 if (UseXmmLoadAndClearUpper) { 7389 movsd (dst, as_Address(src)); 7390 } else { 7391 movlpd(dst, as_Address(src)); 7392 } 7393 } else { 7394 lea(rscratch1, src); 7395 if (UseXmmLoadAndClearUpper) { 7396 movsd (dst, Address(rscratch1, 0)); 7397 } else { 7398 movlpd(dst, Address(rscratch1, 0)); 7399 } 7400 } 7401 } 7402 7403 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 7404 if (reachable(src)) { 7405 movss(dst, as_Address(src)); 7406 } else { 7407 lea(rscratch1, src); 7408 movss(dst, Address(rscratch1, 0)); 7409 } 7410 } 7411 7412 void MacroAssembler::movptr(Register dst, Register src) { 7413 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 7414 } 7415 7416 void MacroAssembler::movptr(Register dst, Address src) { 7417 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 7418 } 7419 7420 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 7421 void MacroAssembler::movptr(Register dst, intptr_t src) { 7422 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 7423 } 7424 7425 void MacroAssembler::movptr(Address dst, Register src) { 7426 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 7427 } 7428 7429 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 7430 if (reachable(src)) { 7431 Assembler::movsd(dst, as_Address(src)); 7432 } else { 7433 lea(rscratch1, src); 7434 Assembler::movsd(dst, Address(rscratch1, 0)); 7435 } 7436 } 7437 7438 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 7439 if (reachable(src)) { 7440 Assembler::movss(dst, as_Address(src)); 7441 } else { 7442 lea(rscratch1, src); 7443 Assembler::movss(dst, Address(rscratch1, 0)); 7444 } 7445 } 7446 7447 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { 7448 if (reachable(src)) { 7449 Assembler::mulsd(dst, as_Address(src)); 7450 } else { 7451 lea(rscratch1, src); 7452 Assembler::mulsd(dst, Address(rscratch1, 0)); 7453 } 7454 } 7455 7456 void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { 7457 if (reachable(src)) { 7458 Assembler::mulss(dst, as_Address(src)); 7459 } else { 7460 lea(rscratch1, src); 7461 Assembler::mulss(dst, Address(rscratch1, 0)); 7462 } 7463 } 7464 7465 void MacroAssembler::null_check(Register reg, int offset) { 7466 if (needs_explicit_null_check(offset)) { 7467 // provoke OS NULL exception if reg = NULL by 7468 // accessing M[reg] w/o changing any (non-CC) registers 7469 // NOTE: cmpl is plenty here to provoke a segv 7470 cmpptr(rax, Address(reg, 0)); 7471 // Note: should probably use testl(rax, Address(reg, 0)); 7472 // may be shorter code (however, this version of 7473 // testl needs to be implemented first) 7474 } else { 7475 // nothing to do, (later) access of M[reg + offset] 7476 // will provoke OS NULL exception if reg = NULL 7477 } 7478 } 7479 7480 void MacroAssembler::os_breakpoint() { 7481 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 7482 // (e.g., MSVC can't call ps() otherwise) 7483 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 7484 } 7485 7486 void MacroAssembler::pop_CPU_state() { 7487 pop_FPU_state(); 7488 pop_IU_state(); 7489 } 7490 7491 void MacroAssembler::pop_FPU_state() { 7492 NOT_LP64(frstor(Address(rsp, 0));) 7493 LP64_ONLY(fxrstor(Address(rsp, 0));) 7494 addptr(rsp, FPUStateSizeInWords * wordSize); 7495 } 7496 7497 void MacroAssembler::pop_IU_state() { 7498 popa(); 7499 LP64_ONLY(addq(rsp, 8)); 7500 popf(); 7501 } 7502 7503 // Save Integer and Float state 7504 // Warning: Stack must be 16 byte aligned (64bit) 7505 void MacroAssembler::push_CPU_state() { 7506 push_IU_state(); 7507 push_FPU_state(); 7508 } 7509 7510 void MacroAssembler::push_FPU_state() { 7511 subptr(rsp, FPUStateSizeInWords * wordSize); 7512 #ifndef _LP64 7513 fnsave(Address(rsp, 0)); 7514 fwait(); 7515 #else 7516 fxsave(Address(rsp, 0)); 7517 #endif // LP64 7518 } 7519 7520 void MacroAssembler::push_IU_state() { 7521 // Push flags first because pusha kills them 7522 pushf(); 7523 // Make sure rsp stays 16-byte aligned 7524 LP64_ONLY(subq(rsp, 8)); 7525 pusha(); 7526 } 7527 7528 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 7529 // determine java_thread register 7530 if (!java_thread->is_valid()) { 7531 java_thread = rdi; 7532 get_thread(java_thread); 7533 } 7534 // we must set sp to zero to clear frame 7535 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 7536 if (clear_fp) { 7537 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 7538 } 7539 7540 if (clear_pc) 7541 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 7542 7543 } 7544 7545 void MacroAssembler::restore_rax(Register tmp) { 7546 if (tmp == noreg) pop(rax); 7547 else if (tmp != rax) mov(rax, tmp); 7548 } 7549 7550 void MacroAssembler::round_to(Register reg, int modulus) { 7551 addptr(reg, modulus - 1); 7552 andptr(reg, -modulus); 7553 } 7554 7555 void MacroAssembler::save_rax(Register tmp) { 7556 if (tmp == noreg) push(rax); 7557 else if (tmp != rax) mov(tmp, rax); 7558 } 7559 7560 // Write serialization page so VM thread can do a pseudo remote membar. 7561 // We use the current thread pointer to calculate a thread specific 7562 // offset to write to within the page. This minimizes bus traffic 7563 // due to cache line collision. 7564 void MacroAssembler::serialize_memory(Register thread, Register tmp) { 7565 movl(tmp, thread); 7566 shrl(tmp, os::get_serialize_page_shift_count()); 7567 andl(tmp, (os::vm_page_size() - sizeof(int))); 7568 7569 Address index(noreg, tmp, Address::times_1); 7570 ExternalAddress page(os::get_memory_serialize_page()); 7571 7572 // Size of store must match masking code above 7573 movl(as_Address(ArrayAddress(page, index)), tmp); 7574 } 7575 7576 // Calls to C land 7577 // 7578 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded 7579 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 7580 // has to be reset to 0. This is required to allow proper stack traversal. 7581 void MacroAssembler::set_last_Java_frame(Register java_thread, 7582 Register last_java_sp, 7583 Register last_java_fp, 7584 address last_java_pc) { 7585 // determine java_thread register 7586 if (!java_thread->is_valid()) { 7587 java_thread = rdi; 7588 get_thread(java_thread); 7589 } 7590 // determine last_java_sp register 7591 if (!last_java_sp->is_valid()) { 7592 last_java_sp = rsp; 7593 } 7594 7595 // last_java_fp is optional 7596 7597 if (last_java_fp->is_valid()) { 7598 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 7599 } 7600 7601 // last_java_pc is optional 7602 7603 if (last_java_pc != NULL) { 7604 lea(Address(java_thread, 7605 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 7606 InternalAddress(last_java_pc)); 7607 7608 } 7609 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 7610 } 7611 7612 void MacroAssembler::shlptr(Register dst, int imm8) { 7613 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 7614 } 7615 7616 void MacroAssembler::shrptr(Register dst, int imm8) { 7617 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 7618 } 7619 7620 void MacroAssembler::sign_extend_byte(Register reg) { 7621 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 7622 movsbl(reg, reg); // movsxb 7623 } else { 7624 shll(reg, 24); 7625 sarl(reg, 24); 7626 } 7627 } 7628 7629 void MacroAssembler::sign_extend_short(Register reg) { 7630 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7631 movswl(reg, reg); // movsxw 7632 } else { 7633 shll(reg, 16); 7634 sarl(reg, 16); 7635 } 7636 } 7637 7638 void MacroAssembler::testl(Register dst, AddressLiteral src) { 7639 assert(reachable(src), "Address should be reachable"); 7640 testl(dst, as_Address(src)); 7641 } 7642 7643 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { 7644 if (reachable(src)) { 7645 Assembler::sqrtsd(dst, as_Address(src)); 7646 } else { 7647 lea(rscratch1, src); 7648 Assembler::sqrtsd(dst, Address(rscratch1, 0)); 7649 } 7650 } 7651 7652 void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { 7653 if (reachable(src)) { 7654 Assembler::sqrtss(dst, as_Address(src)); 7655 } else { 7656 lea(rscratch1, src); 7657 Assembler::sqrtss(dst, Address(rscratch1, 0)); 7658 } 7659 } 7660 7661 void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { 7662 if (reachable(src)) { 7663 Assembler::subsd(dst, as_Address(src)); 7664 } else { 7665 lea(rscratch1, src); 7666 Assembler::subsd(dst, Address(rscratch1, 0)); 7667 } 7668 } 7669 7670 void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { 7671 if (reachable(src)) { 7672 Assembler::subss(dst, as_Address(src)); 7673 } else { 7674 lea(rscratch1, src); 7675 Assembler::subss(dst, Address(rscratch1, 0)); 7676 } 7677 } 7678 7679 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 7680 if (reachable(src)) { 7681 Assembler::ucomisd(dst, as_Address(src)); 7682 } else { 7683 lea(rscratch1, src); 7684 Assembler::ucomisd(dst, Address(rscratch1, 0)); 7685 } 7686 } 7687 7688 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 7689 if (reachable(src)) { 7690 Assembler::ucomiss(dst, as_Address(src)); 7691 } else { 7692 lea(rscratch1, src); 7693 Assembler::ucomiss(dst, Address(rscratch1, 0)); 7694 } 7695 } 7696 7697 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 7698 // Used in sign-bit flipping with aligned address. 7699 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 7700 if (reachable(src)) { 7701 Assembler::xorpd(dst, as_Address(src)); 7702 } else { 7703 lea(rscratch1, src); 7704 Assembler::xorpd(dst, Address(rscratch1, 0)); 7705 } 7706 } 7707 7708 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 7709 // Used in sign-bit flipping with aligned address. 7710 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 7711 if (reachable(src)) { 7712 Assembler::xorps(dst, as_Address(src)); 7713 } else { 7714 lea(rscratch1, src); 7715 Assembler::xorps(dst, Address(rscratch1, 0)); 7716 } 7717 } 7718 7719 // AVX 3-operands instructions 7720 7721 void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7722 if (reachable(src)) { 7723 vaddsd(dst, nds, as_Address(src)); 7724 } else { 7725 lea(rscratch1, src); 7726 vaddsd(dst, nds, Address(rscratch1, 0)); 7727 } 7728 } 7729 7730 void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7731 if (reachable(src)) { 7732 vaddss(dst, nds, as_Address(src)); 7733 } else { 7734 lea(rscratch1, src); 7735 vaddss(dst, nds, Address(rscratch1, 0)); 7736 } 7737 } 7738 7739 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7740 if (reachable(src)) { 7741 vandpd(dst, nds, as_Address(src)); 7742 } else { 7743 lea(rscratch1, src); 7744 vandpd(dst, nds, Address(rscratch1, 0)); 7745 } 7746 } 7747 7748 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7749 if (reachable(src)) { 7750 vandps(dst, nds, as_Address(src)); 7751 } else { 7752 lea(rscratch1, src); 7753 vandps(dst, nds, Address(rscratch1, 0)); 7754 } 7755 } 7756 7757 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7758 if (reachable(src)) { 7759 vdivsd(dst, nds, as_Address(src)); 7760 } else { 7761 lea(rscratch1, src); 7762 vdivsd(dst, nds, Address(rscratch1, 0)); 7763 } 7764 } 7765 7766 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7767 if (reachable(src)) { 7768 vdivss(dst, nds, as_Address(src)); 7769 } else { 7770 lea(rscratch1, src); 7771 vdivss(dst, nds, Address(rscratch1, 0)); 7772 } 7773 } 7774 7775 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7776 if (reachable(src)) { 7777 vmulsd(dst, nds, as_Address(src)); 7778 } else { 7779 lea(rscratch1, src); 7780 vmulsd(dst, nds, Address(rscratch1, 0)); 7781 } 7782 } 7783 7784 void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7785 if (reachable(src)) { 7786 vmulss(dst, nds, as_Address(src)); 7787 } else { 7788 lea(rscratch1, src); 7789 vmulss(dst, nds, Address(rscratch1, 0)); 7790 } 7791 } 7792 7793 void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7794 if (reachable(src)) { 7795 vsubsd(dst, nds, as_Address(src)); 7796 } else { 7797 lea(rscratch1, src); 7798 vsubsd(dst, nds, Address(rscratch1, 0)); 7799 } 7800 } 7801 7802 void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7803 if (reachable(src)) { 7804 vsubss(dst, nds, as_Address(src)); 7805 } else { 7806 lea(rscratch1, src); 7807 vsubss(dst, nds, Address(rscratch1, 0)); 7808 } 7809 } 7810 7811 void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7812 if (reachable(src)) { 7813 vxorpd(dst, nds, as_Address(src)); 7814 } else { 7815 lea(rscratch1, src); 7816 vxorpd(dst, nds, Address(rscratch1, 0)); 7817 } 7818 } 7819 7820 void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 7821 if (reachable(src)) { 7822 vxorps(dst, nds, as_Address(src)); 7823 } else { 7824 lea(rscratch1, src); 7825 vxorps(dst, nds, Address(rscratch1, 0)); 7826 } 7827 } 7828 7829 7830 ////////////////////////////////////////////////////////////////////////////////// 7831 #ifndef SERIALGC 7832 7833 void MacroAssembler::g1_write_barrier_pre(Register obj, 7834 Register pre_val, 7835 Register thread, 7836 Register tmp, 7837 bool tosca_live, 7838 bool expand_call) { 7839 7840 // If expand_call is true then we expand the call_VM_leaf macro 7841 // directly to skip generating the check by 7842 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 7843 7844 #ifdef _LP64 7845 assert(thread == r15_thread, "must be"); 7846 #endif // _LP64 7847 7848 Label done; 7849 Label runtime; 7850 7851 assert(pre_val != noreg, "check this code"); 7852 7853 if (obj != noreg) { 7854 assert_different_registers(obj, pre_val, tmp); 7855 assert(pre_val != rax, "check this code"); 7856 } 7857 7858 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 7859 PtrQueue::byte_offset_of_active())); 7860 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 7861 PtrQueue::byte_offset_of_index())); 7862 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 7863 PtrQueue::byte_offset_of_buf())); 7864 7865 7866 // Is marking active? 7867 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 7868 cmpl(in_progress, 0); 7869 } else { 7870 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 7871 cmpb(in_progress, 0); 7872 } 7873 jcc(Assembler::equal, done); 7874 7875 // Do we need to load the previous value? 7876 if (obj != noreg) { 7877 load_heap_oop(pre_val, Address(obj, 0)); 7878 } 7879 7880 // Is the previous value null? 7881 cmpptr(pre_val, (int32_t) NULL_WORD); 7882 jcc(Assembler::equal, done); 7883 7884 // Can we store original value in the thread's buffer? 7885 // Is index == 0? 7886 // (The index field is typed as size_t.) 7887 7888 movptr(tmp, index); // tmp := *index_adr 7889 cmpptr(tmp, 0); // tmp == 0? 7890 jcc(Assembler::equal, runtime); // If yes, goto runtime 7891 7892 subptr(tmp, wordSize); // tmp := tmp - wordSize 7893 movptr(index, tmp); // *index_adr := tmp 7894 addptr(tmp, buffer); // tmp := tmp + *buffer_adr 7895 7896 // Record the previous value 7897 movptr(Address(tmp, 0), pre_val); 7898 jmp(done); 7899 7900 bind(runtime); 7901 // save the live input values 7902 if(tosca_live) push(rax); 7903 7904 if (obj != noreg && obj != rax) 7905 push(obj); 7906 7907 if (pre_val != rax) 7908 push(pre_val); 7909 7910 // Calling the runtime using the regular call_VM_leaf mechanism generates 7911 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 7912 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 7913 // 7914 // If we care generating the pre-barrier without a frame (e.g. in the 7915 // intrinsified Reference.get() routine) then ebp might be pointing to 7916 // the caller frame and so this check will most likely fail at runtime. 7917 // 7918 // Expanding the call directly bypasses the generation of the check. 7919 // So when we do not have have a full interpreter frame on the stack 7920 // expand_call should be passed true. 7921 7922 NOT_LP64( push(thread); ) 7923 7924 if (expand_call) { 7925 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 7926 pass_arg1(this, thread); 7927 pass_arg0(this, pre_val); 7928 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 7929 } else { 7930 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 7931 } 7932 7933 NOT_LP64( pop(thread); ) 7934 7935 // save the live input values 7936 if (pre_val != rax) 7937 pop(pre_val); 7938 7939 if (obj != noreg && obj != rax) 7940 pop(obj); 7941 7942 if(tosca_live) pop(rax); 7943 7944 bind(done); 7945 } 7946 7947 void MacroAssembler::g1_write_barrier_post(Register store_addr, 7948 Register new_val, 7949 Register thread, 7950 Register tmp, 7951 Register tmp2) { 7952 #ifdef _LP64 7953 assert(thread == r15_thread, "must be"); 7954 #endif // _LP64 7955 7956 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 7957 PtrQueue::byte_offset_of_index())); 7958 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 7959 PtrQueue::byte_offset_of_buf())); 7960 7961 BarrierSet* bs = Universe::heap()->barrier_set(); 7962 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 7963 Label done; 7964 Label runtime; 7965 7966 // Does store cross heap regions? 7967 7968 movptr(tmp, store_addr); 7969 xorptr(tmp, new_val); 7970 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 7971 jcc(Assembler::equal, done); 7972 7973 // crosses regions, storing NULL? 7974 7975 cmpptr(new_val, (int32_t) NULL_WORD); 7976 jcc(Assembler::equal, done); 7977 7978 // storing region crossing non-NULL, is card already dirty? 7979 7980 ExternalAddress cardtable((address) ct->byte_map_base); 7981 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 7982 #ifdef _LP64 7983 const Register card_addr = tmp; 7984 7985 movq(card_addr, store_addr); 7986 shrq(card_addr, CardTableModRefBS::card_shift); 7987 7988 lea(tmp2, cardtable); 7989 7990 // get the address of the card 7991 addq(card_addr, tmp2); 7992 #else 7993 const Register card_index = tmp; 7994 7995 movl(card_index, store_addr); 7996 shrl(card_index, CardTableModRefBS::card_shift); 7997 7998 Address index(noreg, card_index, Address::times_1); 7999 const Register card_addr = tmp; 8000 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 8001 #endif 8002 cmpb(Address(card_addr, 0), 0); 8003 jcc(Assembler::equal, done); 8004 8005 // storing a region crossing, non-NULL oop, card is clean. 8006 // dirty card and log. 8007 8008 movb(Address(card_addr, 0), 0); 8009 8010 cmpl(queue_index, 0); 8011 jcc(Assembler::equal, runtime); 8012 subl(queue_index, wordSize); 8013 movptr(tmp2, buffer); 8014 #ifdef _LP64 8015 movslq(rscratch1, queue_index); 8016 addq(tmp2, rscratch1); 8017 movq(Address(tmp2, 0), card_addr); 8018 #else 8019 addl(tmp2, queue_index); 8020 movl(Address(tmp2, 0), card_index); 8021 #endif 8022 jmp(done); 8023 8024 bind(runtime); 8025 // save the live input values 8026 push(store_addr); 8027 push(new_val); 8028 #ifdef _LP64 8029 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 8030 #else 8031 push(thread); 8032 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 8033 pop(thread); 8034 #endif 8035 pop(new_val); 8036 pop(store_addr); 8037 8038 bind(done); 8039 } 8040 8041 #endif // SERIALGC 8042 ////////////////////////////////////////////////////////////////////////////////// 8043 8044 8045 void MacroAssembler::store_check(Register obj) { 8046 // Does a store check for the oop in register obj. The content of 8047 // register obj is destroyed afterwards. 8048 store_check_part_1(obj); 8049 store_check_part_2(obj); 8050 } 8051 8052 void MacroAssembler::store_check(Register obj, Address dst) { 8053 store_check(obj); 8054 } 8055 8056 8057 // split the store check operation so that other instructions can be scheduled inbetween 8058 void MacroAssembler::store_check_part_1(Register obj) { 8059 BarrierSet* bs = Universe::heap()->barrier_set(); 8060 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 8061 shrptr(obj, CardTableModRefBS::card_shift); 8062 } 8063 8064 void MacroAssembler::store_check_part_2(Register obj) { 8065 BarrierSet* bs = Universe::heap()->barrier_set(); 8066 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 8067 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 8068 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 8069 8070 // The calculation for byte_map_base is as follows: 8071 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 8072 // So this essentially converts an address to a displacement and 8073 // it will never need to be relocated. On 64bit however the value may be too 8074 // large for a 32bit displacement 8075 8076 intptr_t disp = (intptr_t) ct->byte_map_base; 8077 if (is_simm32(disp)) { 8078 Address cardtable(noreg, obj, Address::times_1, disp); 8079 movb(cardtable, 0); 8080 } else { 8081 // By doing it as an ExternalAddress disp could be converted to a rip-relative 8082 // displacement and done in a single instruction given favorable mapping and 8083 // a smarter version of as_Address. Worst case it is two instructions which 8084 // is no worse off then loading disp into a register and doing as a simple 8085 // Address() as above. 8086 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 8087 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 8088 // in some cases we'll get a single instruction version. 8089 8090 ExternalAddress cardtable((address)disp); 8091 Address index(noreg, obj, Address::times_1); 8092 movb(as_Address(ArrayAddress(cardtable, index)), 0); 8093 } 8094 } 8095 8096 void MacroAssembler::subptr(Register dst, int32_t imm32) { 8097 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 8098 } 8099 8100 // Force generation of a 4 byte immediate value even if it fits into 8bit 8101 void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { 8102 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); 8103 } 8104 8105 void MacroAssembler::subptr(Register dst, Register src) { 8106 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 8107 } 8108 8109 // C++ bool manipulation 8110 void MacroAssembler::testbool(Register dst) { 8111 if(sizeof(bool) == 1) 8112 testb(dst, 0xff); 8113 else if(sizeof(bool) == 2) { 8114 // testw implementation needed for two byte bools 8115 ShouldNotReachHere(); 8116 } else if(sizeof(bool) == 4) 8117 testl(dst, dst); 8118 else 8119 // unsupported 8120 ShouldNotReachHere(); 8121 } 8122 8123 void MacroAssembler::testptr(Register dst, Register src) { 8124 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 8125 } 8126 8127 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 8128 void MacroAssembler::tlab_allocate(Register obj, 8129 Register var_size_in_bytes, 8130 int con_size_in_bytes, 8131 Register t1, 8132 Register t2, 8133 Label& slow_case) { 8134 assert_different_registers(obj, t1, t2); 8135 assert_different_registers(obj, var_size_in_bytes, t1); 8136 Register end = t2; 8137 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 8138 8139 verify_tlab(); 8140 8141 NOT_LP64(get_thread(thread)); 8142 8143 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 8144 if (var_size_in_bytes == noreg) { 8145 lea(end, Address(obj, con_size_in_bytes)); 8146 } else { 8147 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 8148 } 8149 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 8150 jcc(Assembler::above, slow_case); 8151 8152 // update the tlab top pointer 8153 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 8154 8155 // recover var_size_in_bytes if necessary 8156 if (var_size_in_bytes == end) { 8157 subptr(var_size_in_bytes, obj); 8158 } 8159 verify_tlab(); 8160 } 8161 8162 // Preserves rbx, and rdx. 8163 Register MacroAssembler::tlab_refill(Label& retry, 8164 Label& try_eden, 8165 Label& slow_case) { 8166 Register top = rax; 8167 Register t1 = rcx; 8168 Register t2 = rsi; 8169 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 8170 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 8171 Label do_refill, discard_tlab; 8172 8173 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 8174 // No allocation in the shared eden. 8175 jmp(slow_case); 8176 } 8177 8178 NOT_LP64(get_thread(thread_reg)); 8179 8180 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8181 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 8182 8183 // calculate amount of free space 8184 subptr(t1, top); 8185 shrptr(t1, LogHeapWordSize); 8186 8187 // Retain tlab and allocate object in shared space if 8188 // the amount free in the tlab is too large to discard. 8189 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 8190 jcc(Assembler::lessEqual, discard_tlab); 8191 8192 // Retain 8193 // %%% yuck as movptr... 8194 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 8195 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 8196 if (TLABStats) { 8197 // increment number of slow_allocations 8198 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 8199 } 8200 jmp(try_eden); 8201 8202 bind(discard_tlab); 8203 if (TLABStats) { 8204 // increment number of refills 8205 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 8206 // accumulate wastage -- t1 is amount free in tlab 8207 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 8208 } 8209 8210 // if tlab is currently allocated (top or end != null) then 8211 // fill [top, end + alignment_reserve) with array object 8212 testptr(top, top); 8213 jcc(Assembler::zero, do_refill); 8214 8215 // set up the mark word 8216 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 8217 // set the length to the remaining space 8218 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 8219 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 8220 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 8221 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 8222 // set klass to intArrayKlass 8223 // dubious reloc why not an oop reloc? 8224 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); 8225 // store klass last. concurrent gcs assumes klass length is valid if 8226 // klass field is not null. 8227 store_klass(top, t1); 8228 8229 movptr(t1, top); 8230 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 8231 incr_allocated_bytes(thread_reg, t1, 0); 8232 8233 // refill the tlab with an eden allocation 8234 bind(do_refill); 8235 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 8236 shlptr(t1, LogHeapWordSize); 8237 // allocate new tlab, address returned in top 8238 eden_allocate(top, t1, 0, t2, slow_case); 8239 8240 // Check that t1 was preserved in eden_allocate. 8241 #ifdef ASSERT 8242 if (UseTLAB) { 8243 Label ok; 8244 Register tsize = rsi; 8245 assert_different_registers(tsize, thread_reg, t1); 8246 push(tsize); 8247 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 8248 shlptr(tsize, LogHeapWordSize); 8249 cmpptr(t1, tsize); 8250 jcc(Assembler::equal, ok); 8251 stop("assert(t1 != tlab size)"); 8252 should_not_reach_here(); 8253 8254 bind(ok); 8255 pop(tsize); 8256 } 8257 #endif 8258 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 8259 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 8260 addptr(top, t1); 8261 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 8262 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 8263 verify_tlab(); 8264 jmp(retry); 8265 8266 return thread_reg; // for use by caller 8267 } 8268 8269 void MacroAssembler::incr_allocated_bytes(Register thread, 8270 Register var_size_in_bytes, 8271 int con_size_in_bytes, 8272 Register t1) { 8273 if (!thread->is_valid()) { 8274 #ifdef _LP64 8275 thread = r15_thread; 8276 #else 8277 assert(t1->is_valid(), "need temp reg"); 8278 thread = t1; 8279 get_thread(thread); 8280 #endif 8281 } 8282 8283 #ifdef _LP64 8284 if (var_size_in_bytes->is_valid()) { 8285 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 8286 } else { 8287 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 8288 } 8289 #else 8290 if (var_size_in_bytes->is_valid()) { 8291 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 8292 } else { 8293 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 8294 } 8295 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); 8296 #endif 8297 } 8298 8299 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { 8300 pusha(); 8301 8302 // if we are coming from c1, xmm registers may be live 8303 if (UseSSE >= 1) { 8304 subptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); 8305 } 8306 int off = 0; 8307 if (UseSSE == 1) { 8308 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); 8309 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); 8310 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); 8311 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); 8312 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); 8313 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); 8314 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); 8315 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); 8316 } else if (UseSSE >= 2) { 8317 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm0); 8318 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm1); 8319 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm2); 8320 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm3); 8321 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm4); 8322 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm5); 8323 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm6); 8324 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm7); 8325 #ifdef _LP64 8326 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm8); 8327 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm9); 8328 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm10); 8329 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm11); 8330 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm12); 8331 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm13); 8332 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm14); 8333 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm15); 8334 #endif 8335 } 8336 8337 // Preserve registers across runtime call 8338 int incoming_argument_and_return_value_offset = -1; 8339 if (num_fpu_regs_in_use > 1) { 8340 // Must preserve all other FPU regs (could alternatively convert 8341 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash 8342 // FPU state, but can not trust C compiler) 8343 NEEDS_CLEANUP; 8344 // NOTE that in this case we also push the incoming argument(s) to 8345 // the stack and restore it later; we also use this stack slot to 8346 // hold the return value from dsin, dcos etc. 8347 for (int i = 0; i < num_fpu_regs_in_use; i++) { 8348 subptr(rsp, sizeof(jdouble)); 8349 fstp_d(Address(rsp, 0)); 8350 } 8351 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 8352 for (int i = nb_args-1; i >= 0; i--) { 8353 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); 8354 } 8355 } 8356 8357 subptr(rsp, nb_args*sizeof(jdouble)); 8358 for (int i = 0; i < nb_args; i++) { 8359 fstp_d(Address(rsp, i*sizeof(jdouble))); 8360 } 8361 8362 #ifdef _LP64 8363 if (nb_args > 0) { 8364 movdbl(xmm0, Address(rsp, 0)); 8365 } 8366 if (nb_args > 1) { 8367 movdbl(xmm1, Address(rsp, sizeof(jdouble))); 8368 } 8369 assert(nb_args <= 2, "unsupported number of args"); 8370 #endif // _LP64 8371 8372 // NOTE: we must not use call_VM_leaf here because that requires a 8373 // complete interpreter frame in debug mode -- same bug as 4387334 8374 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 8375 // do proper 64bit abi 8376 8377 NEEDS_CLEANUP; 8378 // Need to add stack banging before this runtime call if it needs to 8379 // be taken; however, there is no generic stack banging routine at 8380 // the MacroAssembler level 8381 8382 MacroAssembler::call_VM_leaf_base(runtime_entry, 0); 8383 8384 #ifdef _LP64 8385 movsd(Address(rsp, 0), xmm0); 8386 fld_d(Address(rsp, 0)); 8387 #endif // _LP64 8388 addptr(rsp, sizeof(jdouble) * nb_args); 8389 if (num_fpu_regs_in_use > 1) { 8390 // Must save return value to stack and then restore entire FPU 8391 // stack except incoming arguments 8392 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 8393 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { 8394 fld_d(Address(rsp, 0)); 8395 addptr(rsp, sizeof(jdouble)); 8396 } 8397 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); 8398 addptr(rsp, sizeof(jdouble) * nb_args); 8399 } 8400 8401 off = 0; 8402 if (UseSSE == 1) { 8403 movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); 8404 movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); 8405 movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); 8406 movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); 8407 movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); 8408 movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); 8409 movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); 8410 movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); 8411 } else if (UseSSE >= 2) { 8412 movdbl(xmm0, Address(rsp,off++*sizeof(jdouble))); 8413 movdbl(xmm1, Address(rsp,off++*sizeof(jdouble))); 8414 movdbl(xmm2, Address(rsp,off++*sizeof(jdouble))); 8415 movdbl(xmm3, Address(rsp,off++*sizeof(jdouble))); 8416 movdbl(xmm4, Address(rsp,off++*sizeof(jdouble))); 8417 movdbl(xmm5, Address(rsp,off++*sizeof(jdouble))); 8418 movdbl(xmm6, Address(rsp,off++*sizeof(jdouble))); 8419 movdbl(xmm7, Address(rsp,off++*sizeof(jdouble))); 8420 #ifdef _LP64 8421 movdbl(xmm8, Address(rsp,off++*sizeof(jdouble))); 8422 movdbl(xmm9, Address(rsp,off++*sizeof(jdouble))); 8423 movdbl(xmm10, Address(rsp,off++*sizeof(jdouble))); 8424 movdbl(xmm11, Address(rsp,off++*sizeof(jdouble))); 8425 movdbl(xmm12, Address(rsp,off++*sizeof(jdouble))); 8426 movdbl(xmm13, Address(rsp,off++*sizeof(jdouble))); 8427 movdbl(xmm14, Address(rsp,off++*sizeof(jdouble))); 8428 movdbl(xmm15, Address(rsp,off++*sizeof(jdouble))); 8429 #endif 8430 } 8431 if (UseSSE >= 1) { 8432 addptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); 8433 } 8434 popa(); 8435 } 8436 8437 static const double pi_4 = 0.7853981633974483; 8438 8439 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 8440 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 8441 // was attempted in this code; unfortunately it appears that the 8442 // switch to 80-bit precision and back causes this to be 8443 // unprofitable compared with simply performing a runtime call if 8444 // the argument is out of the (-pi/4, pi/4) range. 8445 8446 Register tmp = noreg; 8447 if (!VM_Version::supports_cmov()) { 8448 // fcmp needs a temporary so preserve rbx, 8449 tmp = rbx; 8450 push(tmp); 8451 } 8452 8453 Label slow_case, done; 8454 8455 ExternalAddress pi4_adr = (address)&pi_4; 8456 if (reachable(pi4_adr)) { 8457 // x ?<= pi/4 8458 fld_d(pi4_adr); 8459 fld_s(1); // Stack: X PI/4 X 8460 fabs(); // Stack: |X| PI/4 X 8461 fcmp(tmp); 8462 jcc(Assembler::above, slow_case); 8463 8464 // fastest case: -pi/4 <= x <= pi/4 8465 switch(trig) { 8466 case 's': 8467 fsin(); 8468 break; 8469 case 'c': 8470 fcos(); 8471 break; 8472 case 't': 8473 ftan(); 8474 break; 8475 default: 8476 assert(false, "bad intrinsic"); 8477 break; 8478 } 8479 jmp(done); 8480 } 8481 8482 // slow case: runtime call 8483 bind(slow_case); 8484 8485 switch(trig) { 8486 case 's': 8487 { 8488 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); 8489 } 8490 break; 8491 case 'c': 8492 { 8493 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); 8494 } 8495 break; 8496 case 't': 8497 { 8498 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); 8499 } 8500 break; 8501 default: 8502 assert(false, "bad intrinsic"); 8503 break; 8504 } 8505 8506 // Come here with result in F-TOS 8507 bind(done); 8508 8509 if (tmp != noreg) { 8510 pop(tmp); 8511 } 8512 } 8513 8514 8515 // Look up the method for a megamorphic invokeinterface call. 8516 // The target method is determined by <intf_klass, itable_index>. 8517 // The receiver klass is in recv_klass. 8518 // On success, the result will be in method_result, and execution falls through. 8519 // On failure, execution transfers to the given label. 8520 void MacroAssembler::lookup_interface_method(Register recv_klass, 8521 Register intf_klass, 8522 RegisterOrConstant itable_index, 8523 Register method_result, 8524 Register scan_temp, 8525 Label& L_no_such_interface) { 8526 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 8527 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 8528 "caller must use same register for non-constant itable index as for method"); 8529 8530 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 8531 int vtable_base = instanceKlass::vtable_start_offset() * wordSize; 8532 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 8533 int scan_step = itableOffsetEntry::size() * wordSize; 8534 int vte_size = vtableEntry::size() * wordSize; 8535 Address::ScaleFactor times_vte_scale = Address::times_ptr; 8536 assert(vte_size == wordSize, "else adjust times_vte_scale"); 8537 8538 movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize)); 8539 8540 // %%% Could store the aligned, prescaled offset in the klassoop. 8541 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 8542 if (HeapWordsPerLong > 1) { 8543 // Round up to align_object_offset boundary 8544 // see code for instanceKlass::start_of_itable! 8545 round_to(scan_temp, BytesPerLong); 8546 } 8547 8548 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 8549 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 8550 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 8551 8552 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 8553 // if (scan->interface() == intf) { 8554 // result = (klass + scan->offset() + itable_index); 8555 // } 8556 // } 8557 Label search, found_method; 8558 8559 for (int peel = 1; peel >= 0; peel--) { 8560 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 8561 cmpptr(intf_klass, method_result); 8562 8563 if (peel) { 8564 jccb(Assembler::equal, found_method); 8565 } else { 8566 jccb(Assembler::notEqual, search); 8567 // (invert the test to fall through to found_method...) 8568 } 8569 8570 if (!peel) break; 8571 8572 bind(search); 8573 8574 // Check that the previous entry is non-null. A null entry means that 8575 // the receiver class doesn't implement the interface, and wasn't the 8576 // same as when the caller was compiled. 8577 testptr(method_result, method_result); 8578 jcc(Assembler::zero, L_no_such_interface); 8579 addptr(scan_temp, scan_step); 8580 } 8581 8582 bind(found_method); 8583 8584 // Got a hit. 8585 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 8586 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 8587 } 8588 8589 8590 void MacroAssembler::check_klass_subtype(Register sub_klass, 8591 Register super_klass, 8592 Register temp_reg, 8593 Label& L_success) { 8594 Label L_failure; 8595 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 8596 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 8597 bind(L_failure); 8598 } 8599 8600 8601 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 8602 Register super_klass, 8603 Register temp_reg, 8604 Label* L_success, 8605 Label* L_failure, 8606 Label* L_slow_path, 8607 RegisterOrConstant super_check_offset) { 8608 assert_different_registers(sub_klass, super_klass, temp_reg); 8609 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 8610 if (super_check_offset.is_register()) { 8611 assert_different_registers(sub_klass, super_klass, 8612 super_check_offset.as_register()); 8613 } else if (must_load_sco) { 8614 assert(temp_reg != noreg, "supply either a temp or a register offset"); 8615 } 8616 8617 Label L_fallthrough; 8618 int label_nulls = 0; 8619 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 8620 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 8621 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 8622 assert(label_nulls <= 1, "at most one NULL in the batch"); 8623 8624 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 8625 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 8626 Address super_check_offset_addr(super_klass, sco_offset); 8627 8628 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 8629 // range of a jccb. If this routine grows larger, reconsider at 8630 // least some of these. 8631 #define local_jcc(assembler_cond, label) \ 8632 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 8633 else jcc( assembler_cond, label) /*omit semi*/ 8634 8635 // Hacked jmp, which may only be used just before L_fallthrough. 8636 #define final_jmp(label) \ 8637 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 8638 else jmp(label) /*omit semi*/ 8639 8640 // If the pointers are equal, we are done (e.g., String[] elements). 8641 // This self-check enables sharing of secondary supertype arrays among 8642 // non-primary types such as array-of-interface. Otherwise, each such 8643 // type would need its own customized SSA. 8644 // We move this check to the front of the fast path because many 8645 // type checks are in fact trivially successful in this manner, 8646 // so we get a nicely predicted branch right at the start of the check. 8647 cmpptr(sub_klass, super_klass); 8648 local_jcc(Assembler::equal, *L_success); 8649 8650 // Check the supertype display: 8651 if (must_load_sco) { 8652 // Positive movl does right thing on LP64. 8653 movl(temp_reg, super_check_offset_addr); 8654 super_check_offset = RegisterOrConstant(temp_reg); 8655 } 8656 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 8657 cmpptr(super_klass, super_check_addr); // load displayed supertype 8658 8659 // This check has worked decisively for primary supers. 8660 // Secondary supers are sought in the super_cache ('super_cache_addr'). 8661 // (Secondary supers are interfaces and very deeply nested subtypes.) 8662 // This works in the same check above because of a tricky aliasing 8663 // between the super_cache and the primary super display elements. 8664 // (The 'super_check_addr' can address either, as the case requires.) 8665 // Note that the cache is updated below if it does not help us find 8666 // what we need immediately. 8667 // So if it was a primary super, we can just fail immediately. 8668 // Otherwise, it's the slow path for us (no success at this point). 8669 8670 if (super_check_offset.is_register()) { 8671 local_jcc(Assembler::equal, *L_success); 8672 cmpl(super_check_offset.as_register(), sc_offset); 8673 if (L_failure == &L_fallthrough) { 8674 local_jcc(Assembler::equal, *L_slow_path); 8675 } else { 8676 local_jcc(Assembler::notEqual, *L_failure); 8677 final_jmp(*L_slow_path); 8678 } 8679 } else if (super_check_offset.as_constant() == sc_offset) { 8680 // Need a slow path; fast failure is impossible. 8681 if (L_slow_path == &L_fallthrough) { 8682 local_jcc(Assembler::equal, *L_success); 8683 } else { 8684 local_jcc(Assembler::notEqual, *L_slow_path); 8685 final_jmp(*L_success); 8686 } 8687 } else { 8688 // No slow path; it's a fast decision. 8689 if (L_failure == &L_fallthrough) { 8690 local_jcc(Assembler::equal, *L_success); 8691 } else { 8692 local_jcc(Assembler::notEqual, *L_failure); 8693 final_jmp(*L_success); 8694 } 8695 } 8696 8697 bind(L_fallthrough); 8698 8699 #undef local_jcc 8700 #undef final_jmp 8701 } 8702 8703 8704 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 8705 Register super_klass, 8706 Register temp_reg, 8707 Register temp2_reg, 8708 Label* L_success, 8709 Label* L_failure, 8710 bool set_cond_codes) { 8711 assert_different_registers(sub_klass, super_klass, temp_reg); 8712 if (temp2_reg != noreg) 8713 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 8714 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 8715 8716 Label L_fallthrough; 8717 int label_nulls = 0; 8718 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 8719 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 8720 assert(label_nulls <= 1, "at most one NULL in the batch"); 8721 8722 // a couple of useful fields in sub_klass: 8723 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 8724 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 8725 Address secondary_supers_addr(sub_klass, ss_offset); 8726 Address super_cache_addr( sub_klass, sc_offset); 8727 8728 // Do a linear scan of the secondary super-klass chain. 8729 // This code is rarely used, so simplicity is a virtue here. 8730 // The repne_scan instruction uses fixed registers, which we must spill. 8731 // Don't worry too much about pre-existing connections with the input regs. 8732 8733 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 8734 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 8735 8736 // Get super_klass value into rax (even if it was in rdi or rcx). 8737 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 8738 if (super_klass != rax || UseCompressedOops) { 8739 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 8740 mov(rax, super_klass); 8741 } 8742 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 8743 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 8744 8745 #ifndef PRODUCT 8746 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 8747 ExternalAddress pst_counter_addr((address) pst_counter); 8748 NOT_LP64( incrementl(pst_counter_addr) ); 8749 LP64_ONLY( lea(rcx, pst_counter_addr) ); 8750 LP64_ONLY( incrementl(Address(rcx, 0)) ); 8751 #endif //PRODUCT 8752 8753 // We will consult the secondary-super array. 8754 movptr(rdi, secondary_supers_addr); 8755 // Load the array length. (Positive movl does right thing on LP64.) 8756 movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 8757 // Skip to start of data. 8758 addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 8759 8760 // Scan RCX words at [RDI] for an occurrence of RAX. 8761 // Set NZ/Z based on last compare. 8762 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 8763 // not change flags (only scas instruction which is repeated sets flags). 8764 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 8765 #ifdef _LP64 8766 // This part is tricky, as values in supers array could be 32 or 64 bit wide 8767 // and we store values in objArrays always encoded, thus we need to encode 8768 // the value of rax before repne. Note that rax is dead after the repne. 8769 if (UseCompressedOops) { 8770 encode_heap_oop_not_null(rax); // Changes flags. 8771 // The superclass is never null; it would be a basic system error if a null 8772 // pointer were to sneak in here. Note that we have already loaded the 8773 // Klass::super_check_offset from the super_klass in the fast path, 8774 // so if there is a null in that register, we are already in the afterlife. 8775 testl(rax,rax); // Set Z = 0 8776 repne_scanl(); 8777 } else 8778 #endif // _LP64 8779 { 8780 testptr(rax,rax); // Set Z = 0 8781 repne_scan(); 8782 } 8783 // Unspill the temp. registers: 8784 if (pushed_rdi) pop(rdi); 8785 if (pushed_rcx) pop(rcx); 8786 if (pushed_rax) pop(rax); 8787 8788 if (set_cond_codes) { 8789 // Special hack for the AD files: rdi is guaranteed non-zero. 8790 assert(!pushed_rdi, "rdi must be left non-NULL"); 8791 // Also, the condition codes are properly set Z/NZ on succeed/failure. 8792 } 8793 8794 if (L_failure == &L_fallthrough) 8795 jccb(Assembler::notEqual, *L_failure); 8796 else jcc(Assembler::notEqual, *L_failure); 8797 8798 // Success. Cache the super we found and proceed in triumph. 8799 movptr(super_cache_addr, super_klass); 8800 8801 if (L_success != &L_fallthrough) { 8802 jmp(*L_success); 8803 } 8804 8805 #undef IS_A_TEMP 8806 8807 bind(L_fallthrough); 8808 } 8809 8810 8811 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 8812 if (VM_Version::supports_cmov()) { 8813 cmovl(cc, dst, src); 8814 } else { 8815 Label L; 8816 jccb(negate_condition(cc), L); 8817 movl(dst, src); 8818 bind(L); 8819 } 8820 } 8821 8822 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { 8823 if (VM_Version::supports_cmov()) { 8824 cmovl(cc, dst, src); 8825 } else { 8826 Label L; 8827 jccb(negate_condition(cc), L); 8828 movl(dst, src); 8829 bind(L); 8830 } 8831 } 8832 8833 void MacroAssembler::verify_oop(Register reg, const char* s) { 8834 if (!VerifyOops) return; 8835 8836 // Pass register number to verify_oop_subroutine 8837 char* b = new char[strlen(s) + 50]; 8838 sprintf(b, "verify_oop: %s: %s", reg->name(), s); 8839 #ifdef _LP64 8840 push(rscratch1); // save r10, trashed by movptr() 8841 #endif 8842 push(rax); // save rax, 8843 push(reg); // pass register argument 8844 ExternalAddress buffer((address) b); 8845 // avoid using pushptr, as it modifies scratch registers 8846 // and our contract is not to modify anything 8847 movptr(rax, buffer.addr()); 8848 push(rax); 8849 // call indirectly to solve generation ordering problem 8850 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 8851 call(rax); 8852 // Caller pops the arguments (oop, message) and restores rax, r10 8853 } 8854 8855 8856 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 8857 Register tmp, 8858 int offset) { 8859 intptr_t value = *delayed_value_addr; 8860 if (value != 0) 8861 return RegisterOrConstant(value + offset); 8862 8863 // load indirectly to solve generation ordering problem 8864 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 8865 8866 #ifdef ASSERT 8867 { Label L; 8868 testptr(tmp, tmp); 8869 if (WizardMode) { 8870 jcc(Assembler::notZero, L); 8871 char* buf = new char[40]; 8872 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 8873 stop(buf); 8874 } else { 8875 jccb(Assembler::notZero, L); 8876 hlt(); 8877 } 8878 bind(L); 8879 } 8880 #endif 8881 8882 if (offset != 0) 8883 addptr(tmp, offset); 8884 8885 return RegisterOrConstant(tmp); 8886 } 8887 8888 8889 // registers on entry: 8890 // - rax ('check' register): required MethodType 8891 // - rcx: method handle 8892 // - rdx, rsi, or ?: killable temp 8893 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg, 8894 Register temp_reg, 8895 Label& wrong_method_type) { 8896 Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)); 8897 // compare method type against that of the receiver 8898 if (UseCompressedOops) { 8899 load_heap_oop(temp_reg, type_addr); 8900 cmpptr(mtype_reg, temp_reg); 8901 } else { 8902 cmpptr(mtype_reg, type_addr); 8903 } 8904 jcc(Assembler::notEqual, wrong_method_type); 8905 } 8906 8907 8908 // A method handle has a "vmslots" field which gives the size of its 8909 // argument list in JVM stack slots. This field is either located directly 8910 // in every method handle, or else is indirectly accessed through the 8911 // method handle's MethodType. This macro hides the distinction. 8912 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg, 8913 Register temp_reg) { 8914 assert_different_registers(vmslots_reg, mh_reg, temp_reg); 8915 // load mh.type.form.vmslots 8916 Register temp2_reg = vmslots_reg; 8917 load_heap_oop(temp2_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg))); 8918 load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg))); 8919 movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg))); 8920 } 8921 8922 8923 // registers on entry: 8924 // - rcx: method handle 8925 // - rdx: killable temp (interpreted only) 8926 // - rax: killable temp (compiled only) 8927 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) { 8928 assert(mh_reg == rcx, "caller must put MH object in rcx"); 8929 assert_different_registers(mh_reg, temp_reg); 8930 8931 // pick out the interpreted side of the handler 8932 // NOTE: vmentry is not an oop! 8933 movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg))); 8934 8935 // off we go... 8936 jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes())); 8937 8938 // for the various stubs which take control at this point, 8939 // see MethodHandles::generate_method_handle_stub 8940 } 8941 8942 8943 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 8944 int extra_slot_offset) { 8945 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 8946 int stackElementSize = Interpreter::stackElementSize; 8947 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 8948 #ifdef ASSERT 8949 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 8950 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 8951 #endif 8952 Register scale_reg = noreg; 8953 Address::ScaleFactor scale_factor = Address::no_scale; 8954 if (arg_slot.is_constant()) { 8955 offset += arg_slot.as_constant() * stackElementSize; 8956 } else { 8957 scale_reg = arg_slot.as_register(); 8958 scale_factor = Address::times(stackElementSize); 8959 } 8960 offset += wordSize; // return PC is on stack 8961 return Address(rsp, scale_reg, scale_factor, offset); 8962 } 8963 8964 8965 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 8966 if (!VerifyOops) return; 8967 8968 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 8969 // Pass register number to verify_oop_subroutine 8970 char* b = new char[strlen(s) + 50]; 8971 sprintf(b, "verify_oop_addr: %s", s); 8972 8973 #ifdef _LP64 8974 push(rscratch1); // save r10, trashed by movptr() 8975 #endif 8976 push(rax); // save rax, 8977 // addr may contain rsp so we will have to adjust it based on the push 8978 // we just did (and on 64 bit we do two pushes) 8979 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 8980 // stores rax into addr which is backwards of what was intended. 8981 if (addr.uses(rsp)) { 8982 lea(rax, addr); 8983 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); 8984 } else { 8985 pushptr(addr); 8986 } 8987 8988 ExternalAddress buffer((address) b); 8989 // pass msg argument 8990 // avoid using pushptr, as it modifies scratch registers 8991 // and our contract is not to modify anything 8992 movptr(rax, buffer.addr()); 8993 push(rax); 8994 8995 // call indirectly to solve generation ordering problem 8996 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 8997 call(rax); 8998 // Caller pops the arguments (addr, message) and restores rax, r10. 8999 } 9000 9001 void MacroAssembler::verify_tlab() { 9002 #ifdef ASSERT 9003 if (UseTLAB && VerifyOops) { 9004 Label next, ok; 9005 Register t1 = rsi; 9006 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 9007 9008 push(t1); 9009 NOT_LP64(push(thread_reg)); 9010 NOT_LP64(get_thread(thread_reg)); 9011 9012 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 9013 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 9014 jcc(Assembler::aboveEqual, next); 9015 stop("assert(top >= start)"); 9016 should_not_reach_here(); 9017 9018 bind(next); 9019 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 9020 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 9021 jcc(Assembler::aboveEqual, ok); 9022 stop("assert(top <= end)"); 9023 should_not_reach_here(); 9024 9025 bind(ok); 9026 NOT_LP64(pop(thread_reg)); 9027 pop(t1); 9028 } 9029 #endif 9030 } 9031 9032 class ControlWord { 9033 public: 9034 int32_t _value; 9035 9036 int rounding_control() const { return (_value >> 10) & 3 ; } 9037 int precision_control() const { return (_value >> 8) & 3 ; } 9038 bool precision() const { return ((_value >> 5) & 1) != 0; } 9039 bool underflow() const { return ((_value >> 4) & 1) != 0; } 9040 bool overflow() const { return ((_value >> 3) & 1) != 0; } 9041 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 9042 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 9043 bool invalid() const { return ((_value >> 0) & 1) != 0; } 9044 9045 void print() const { 9046 // rounding control 9047 const char* rc; 9048 switch (rounding_control()) { 9049 case 0: rc = "round near"; break; 9050 case 1: rc = "round down"; break; 9051 case 2: rc = "round up "; break; 9052 case 3: rc = "chop "; break; 9053 }; 9054 // precision control 9055 const char* pc; 9056 switch (precision_control()) { 9057 case 0: pc = "24 bits "; break; 9058 case 1: pc = "reserved"; break; 9059 case 2: pc = "53 bits "; break; 9060 case 3: pc = "64 bits "; break; 9061 }; 9062 // flags 9063 char f[9]; 9064 f[0] = ' '; 9065 f[1] = ' '; 9066 f[2] = (precision ()) ? 'P' : 'p'; 9067 f[3] = (underflow ()) ? 'U' : 'u'; 9068 f[4] = (overflow ()) ? 'O' : 'o'; 9069 f[5] = (zero_divide ()) ? 'Z' : 'z'; 9070 f[6] = (denormalized()) ? 'D' : 'd'; 9071 f[7] = (invalid ()) ? 'I' : 'i'; 9072 f[8] = '\x0'; 9073 // output 9074 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 9075 } 9076 9077 }; 9078 9079 class StatusWord { 9080 public: 9081 int32_t _value; 9082 9083 bool busy() const { return ((_value >> 15) & 1) != 0; } 9084 bool C3() const { return ((_value >> 14) & 1) != 0; } 9085 bool C2() const { return ((_value >> 10) & 1) != 0; } 9086 bool C1() const { return ((_value >> 9) & 1) != 0; } 9087 bool C0() const { return ((_value >> 8) & 1) != 0; } 9088 int top() const { return (_value >> 11) & 7 ; } 9089 bool error_status() const { return ((_value >> 7) & 1) != 0; } 9090 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 9091 bool precision() const { return ((_value >> 5) & 1) != 0; } 9092 bool underflow() const { return ((_value >> 4) & 1) != 0; } 9093 bool overflow() const { return ((_value >> 3) & 1) != 0; } 9094 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 9095 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 9096 bool invalid() const { return ((_value >> 0) & 1) != 0; } 9097 9098 void print() const { 9099 // condition codes 9100 char c[5]; 9101 c[0] = (C3()) ? '3' : '-'; 9102 c[1] = (C2()) ? '2' : '-'; 9103 c[2] = (C1()) ? '1' : '-'; 9104 c[3] = (C0()) ? '0' : '-'; 9105 c[4] = '\x0'; 9106 // flags 9107 char f[9]; 9108 f[0] = (error_status()) ? 'E' : '-'; 9109 f[1] = (stack_fault ()) ? 'S' : '-'; 9110 f[2] = (precision ()) ? 'P' : '-'; 9111 f[3] = (underflow ()) ? 'U' : '-'; 9112 f[4] = (overflow ()) ? 'O' : '-'; 9113 f[5] = (zero_divide ()) ? 'Z' : '-'; 9114 f[6] = (denormalized()) ? 'D' : '-'; 9115 f[7] = (invalid ()) ? 'I' : '-'; 9116 f[8] = '\x0'; 9117 // output 9118 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 9119 } 9120 9121 }; 9122 9123 class TagWord { 9124 public: 9125 int32_t _value; 9126 9127 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 9128 9129 void print() const { 9130 printf("%04x", _value & 0xFFFF); 9131 } 9132 9133 }; 9134 9135 class FPU_Register { 9136 public: 9137 int32_t _m0; 9138 int32_t _m1; 9139 int16_t _ex; 9140 9141 bool is_indefinite() const { 9142 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 9143 } 9144 9145 void print() const { 9146 char sign = (_ex < 0) ? '-' : '+'; 9147 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 9148 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 9149 }; 9150 9151 }; 9152 9153 class FPU_State { 9154 public: 9155 enum { 9156 register_size = 10, 9157 number_of_registers = 8, 9158 register_mask = 7 9159 }; 9160 9161 ControlWord _control_word; 9162 StatusWord _status_word; 9163 TagWord _tag_word; 9164 int32_t _error_offset; 9165 int32_t _error_selector; 9166 int32_t _data_offset; 9167 int32_t _data_selector; 9168 int8_t _register[register_size * number_of_registers]; 9169 9170 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 9171 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 9172 9173 const char* tag_as_string(int tag) const { 9174 switch (tag) { 9175 case 0: return "valid"; 9176 case 1: return "zero"; 9177 case 2: return "special"; 9178 case 3: return "empty"; 9179 } 9180 ShouldNotReachHere(); 9181 return NULL; 9182 } 9183 9184 void print() const { 9185 // print computation registers 9186 { int t = _status_word.top(); 9187 for (int i = 0; i < number_of_registers; i++) { 9188 int j = (i - t) & register_mask; 9189 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 9190 st(j)->print(); 9191 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 9192 } 9193 } 9194 printf("\n"); 9195 // print control registers 9196 printf("ctrl = "); _control_word.print(); printf("\n"); 9197 printf("stat = "); _status_word .print(); printf("\n"); 9198 printf("tags = "); _tag_word .print(); printf("\n"); 9199 } 9200 9201 }; 9202 9203 class Flag_Register { 9204 public: 9205 int32_t _value; 9206 9207 bool overflow() const { return ((_value >> 11) & 1) != 0; } 9208 bool direction() const { return ((_value >> 10) & 1) != 0; } 9209 bool sign() const { return ((_value >> 7) & 1) != 0; } 9210 bool zero() const { return ((_value >> 6) & 1) != 0; } 9211 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 9212 bool parity() const { return ((_value >> 2) & 1) != 0; } 9213 bool carry() const { return ((_value >> 0) & 1) != 0; } 9214 9215 void print() const { 9216 // flags 9217 char f[8]; 9218 f[0] = (overflow ()) ? 'O' : '-'; 9219 f[1] = (direction ()) ? 'D' : '-'; 9220 f[2] = (sign ()) ? 'S' : '-'; 9221 f[3] = (zero ()) ? 'Z' : '-'; 9222 f[4] = (auxiliary_carry()) ? 'A' : '-'; 9223 f[5] = (parity ()) ? 'P' : '-'; 9224 f[6] = (carry ()) ? 'C' : '-'; 9225 f[7] = '\x0'; 9226 // output 9227 printf("%08x flags = %s", _value, f); 9228 } 9229 9230 }; 9231 9232 class IU_Register { 9233 public: 9234 int32_t _value; 9235 9236 void print() const { 9237 printf("%08x %11d", _value, _value); 9238 } 9239 9240 }; 9241 9242 class IU_State { 9243 public: 9244 Flag_Register _eflags; 9245 IU_Register _rdi; 9246 IU_Register _rsi; 9247 IU_Register _rbp; 9248 IU_Register _rsp; 9249 IU_Register _rbx; 9250 IU_Register _rdx; 9251 IU_Register _rcx; 9252 IU_Register _rax; 9253 9254 void print() const { 9255 // computation registers 9256 printf("rax, = "); _rax.print(); printf("\n"); 9257 printf("rbx, = "); _rbx.print(); printf("\n"); 9258 printf("rcx = "); _rcx.print(); printf("\n"); 9259 printf("rdx = "); _rdx.print(); printf("\n"); 9260 printf("rdi = "); _rdi.print(); printf("\n"); 9261 printf("rsi = "); _rsi.print(); printf("\n"); 9262 printf("rbp, = "); _rbp.print(); printf("\n"); 9263 printf("rsp = "); _rsp.print(); printf("\n"); 9264 printf("\n"); 9265 // control registers 9266 printf("flgs = "); _eflags.print(); printf("\n"); 9267 } 9268 }; 9269 9270 9271 class CPU_State { 9272 public: 9273 FPU_State _fpu_state; 9274 IU_State _iu_state; 9275 9276 void print() const { 9277 printf("--------------------------------------------------\n"); 9278 _iu_state .print(); 9279 printf("\n"); 9280 _fpu_state.print(); 9281 printf("--------------------------------------------------\n"); 9282 } 9283 9284 }; 9285 9286 9287 static void _print_CPU_state(CPU_State* state) { 9288 state->print(); 9289 }; 9290 9291 9292 void MacroAssembler::print_CPU_state() { 9293 push_CPU_state(); 9294 push(rsp); // pass CPU state 9295 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 9296 addptr(rsp, wordSize); // discard argument 9297 pop_CPU_state(); 9298 } 9299 9300 9301 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 9302 static int counter = 0; 9303 FPU_State* fs = &state->_fpu_state; 9304 counter++; 9305 // For leaf calls, only verify that the top few elements remain empty. 9306 // We only need 1 empty at the top for C2 code. 9307 if( stack_depth < 0 ) { 9308 if( fs->tag_for_st(7) != 3 ) { 9309 printf("FPR7 not empty\n"); 9310 state->print(); 9311 assert(false, "error"); 9312 return false; 9313 } 9314 return true; // All other stack states do not matter 9315 } 9316 9317 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 9318 "bad FPU control word"); 9319 9320 // compute stack depth 9321 int i = 0; 9322 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 9323 int d = i; 9324 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 9325 // verify findings 9326 if (i != FPU_State::number_of_registers) { 9327 // stack not contiguous 9328 printf("%s: stack not contiguous at ST%d\n", s, i); 9329 state->print(); 9330 assert(false, "error"); 9331 return false; 9332 } 9333 // check if computed stack depth corresponds to expected stack depth 9334 if (stack_depth < 0) { 9335 // expected stack depth is -stack_depth or less 9336 if (d > -stack_depth) { 9337 // too many elements on the stack 9338 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 9339 state->print(); 9340 assert(false, "error"); 9341 return false; 9342 } 9343 } else { 9344 // expected stack depth is stack_depth 9345 if (d != stack_depth) { 9346 // wrong stack depth 9347 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 9348 state->print(); 9349 assert(false, "error"); 9350 return false; 9351 } 9352 } 9353 // everything is cool 9354 return true; 9355 } 9356 9357 9358 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 9359 if (!VerifyFPU) return; 9360 push_CPU_state(); 9361 push(rsp); // pass CPU state 9362 ExternalAddress msg((address) s); 9363 // pass message string s 9364 pushptr(msg.addr()); 9365 push(stack_depth); // pass stack depth 9366 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 9367 addptr(rsp, 3 * wordSize); // discard arguments 9368 // check for error 9369 { Label L; 9370 testl(rax, rax); 9371 jcc(Assembler::notZero, L); 9372 int3(); // break if error condition 9373 bind(L); 9374 } 9375 pop_CPU_state(); 9376 } 9377 9378 void MacroAssembler::load_klass(Register dst, Register src) { 9379 #ifdef _LP64 9380 if (UseCompressedOops) { 9381 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9382 decode_heap_oop_not_null(dst); 9383 } else 9384 #endif 9385 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9386 } 9387 9388 void MacroAssembler::load_prototype_header(Register dst, Register src) { 9389 #ifdef _LP64 9390 if (UseCompressedOops) { 9391 assert (Universe::heap() != NULL, "java heap should be initialized"); 9392 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9393 if (Universe::narrow_oop_shift() != 0) { 9394 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9395 if (LogMinObjAlignmentInBytes == Address::times_8) { 9396 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset())); 9397 } else { 9398 // OK to use shift since we don't need to preserve flags. 9399 shlq(dst, LogMinObjAlignmentInBytes); 9400 movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset())); 9401 } 9402 } else { 9403 movq(dst, Address(dst, Klass::prototype_header_offset())); 9404 } 9405 } else 9406 #endif 9407 { 9408 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9409 movptr(dst, Address(dst, Klass::prototype_header_offset())); 9410 } 9411 } 9412 9413 void MacroAssembler::store_klass(Register dst, Register src) { 9414 #ifdef _LP64 9415 if (UseCompressedOops) { 9416 encode_heap_oop_not_null(src); 9417 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 9418 } else 9419 #endif 9420 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 9421 } 9422 9423 void MacroAssembler::load_heap_oop(Register dst, Address src) { 9424 #ifdef _LP64 9425 if (UseCompressedOops) { 9426 movl(dst, src); 9427 decode_heap_oop(dst); 9428 } else 9429 #endif 9430 movptr(dst, src); 9431 } 9432 9433 // Doesn't do verfication, generates fixed size code 9434 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { 9435 #ifdef _LP64 9436 if (UseCompressedOops) { 9437 movl(dst, src); 9438 decode_heap_oop_not_null(dst); 9439 } else 9440 #endif 9441 movptr(dst, src); 9442 } 9443 9444 void MacroAssembler::store_heap_oop(Address dst, Register src) { 9445 #ifdef _LP64 9446 if (UseCompressedOops) { 9447 assert(!dst.uses(src), "not enough registers"); 9448 encode_heap_oop(src); 9449 movl(dst, src); 9450 } else 9451 #endif 9452 movptr(dst, src); 9453 } 9454 9455 // Used for storing NULLs. 9456 void MacroAssembler::store_heap_oop_null(Address dst) { 9457 #ifdef _LP64 9458 if (UseCompressedOops) { 9459 movl(dst, (int32_t)NULL_WORD); 9460 } else { 9461 movslq(dst, (int32_t)NULL_WORD); 9462 } 9463 #else 9464 movl(dst, (int32_t)NULL_WORD); 9465 #endif 9466 } 9467 9468 #ifdef _LP64 9469 void MacroAssembler::store_klass_gap(Register dst, Register src) { 9470 if (UseCompressedOops) { 9471 // Store to klass gap in destination 9472 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 9473 } 9474 } 9475 9476 #ifdef ASSERT 9477 void MacroAssembler::verify_heapbase(const char* msg) { 9478 assert (UseCompressedOops, "should be compressed"); 9479 assert (Universe::heap() != NULL, "java heap should be initialized"); 9480 if (CheckCompressedOops) { 9481 Label ok; 9482 push(rscratch1); // cmpptr trashes rscratch1 9483 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 9484 jcc(Assembler::equal, ok); 9485 stop(msg); 9486 bind(ok); 9487 pop(rscratch1); 9488 } 9489 } 9490 #endif 9491 9492 // Algorithm must match oop.inline.hpp encode_heap_oop. 9493 void MacroAssembler::encode_heap_oop(Register r) { 9494 #ifdef ASSERT 9495 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 9496 #endif 9497 verify_oop(r, "broken oop in encode_heap_oop"); 9498 if (Universe::narrow_oop_base() == NULL) { 9499 if (Universe::narrow_oop_shift() != 0) { 9500 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9501 shrq(r, LogMinObjAlignmentInBytes); 9502 } 9503 return; 9504 } 9505 testq(r, r); 9506 cmovq(Assembler::equal, r, r12_heapbase); 9507 subq(r, r12_heapbase); 9508 shrq(r, LogMinObjAlignmentInBytes); 9509 } 9510 9511 void MacroAssembler::encode_heap_oop_not_null(Register r) { 9512 #ifdef ASSERT 9513 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 9514 if (CheckCompressedOops) { 9515 Label ok; 9516 testq(r, r); 9517 jcc(Assembler::notEqual, ok); 9518 stop("null oop passed to encode_heap_oop_not_null"); 9519 bind(ok); 9520 } 9521 #endif 9522 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 9523 if (Universe::narrow_oop_base() != NULL) { 9524 subq(r, r12_heapbase); 9525 } 9526 if (Universe::narrow_oop_shift() != 0) { 9527 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9528 shrq(r, LogMinObjAlignmentInBytes); 9529 } 9530 } 9531 9532 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 9533 #ifdef ASSERT 9534 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 9535 if (CheckCompressedOops) { 9536 Label ok; 9537 testq(src, src); 9538 jcc(Assembler::notEqual, ok); 9539 stop("null oop passed to encode_heap_oop_not_null2"); 9540 bind(ok); 9541 } 9542 #endif 9543 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 9544 if (dst != src) { 9545 movq(dst, src); 9546 } 9547 if (Universe::narrow_oop_base() != NULL) { 9548 subq(dst, r12_heapbase); 9549 } 9550 if (Universe::narrow_oop_shift() != 0) { 9551 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9552 shrq(dst, LogMinObjAlignmentInBytes); 9553 } 9554 } 9555 9556 void MacroAssembler::decode_heap_oop(Register r) { 9557 #ifdef ASSERT 9558 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 9559 #endif 9560 if (Universe::narrow_oop_base() == NULL) { 9561 if (Universe::narrow_oop_shift() != 0) { 9562 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9563 shlq(r, LogMinObjAlignmentInBytes); 9564 } 9565 } else { 9566 Label done; 9567 shlq(r, LogMinObjAlignmentInBytes); 9568 jccb(Assembler::equal, done); 9569 addq(r, r12_heapbase); 9570 bind(done); 9571 } 9572 verify_oop(r, "broken oop in decode_heap_oop"); 9573 } 9574 9575 void MacroAssembler::decode_heap_oop_not_null(Register r) { 9576 // Note: it will change flags 9577 assert (UseCompressedOops, "should only be used for compressed headers"); 9578 assert (Universe::heap() != NULL, "java heap should be initialized"); 9579 // Cannot assert, unverified entry point counts instructions (see .ad file) 9580 // vtableStubs also counts instructions in pd_code_size_limit. 9581 // Also do not verify_oop as this is called by verify_oop. 9582 if (Universe::narrow_oop_shift() != 0) { 9583 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9584 shlq(r, LogMinObjAlignmentInBytes); 9585 if (Universe::narrow_oop_base() != NULL) { 9586 addq(r, r12_heapbase); 9587 } 9588 } else { 9589 assert (Universe::narrow_oop_base() == NULL, "sanity"); 9590 } 9591 } 9592 9593 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 9594 // Note: it will change flags 9595 assert (UseCompressedOops, "should only be used for compressed headers"); 9596 assert (Universe::heap() != NULL, "java heap should be initialized"); 9597 // Cannot assert, unverified entry point counts instructions (see .ad file) 9598 // vtableStubs also counts instructions in pd_code_size_limit. 9599 // Also do not verify_oop as this is called by verify_oop. 9600 if (Universe::narrow_oop_shift() != 0) { 9601 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9602 if (LogMinObjAlignmentInBytes == Address::times_8) { 9603 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 9604 } else { 9605 if (dst != src) { 9606 movq(dst, src); 9607 } 9608 shlq(dst, LogMinObjAlignmentInBytes); 9609 if (Universe::narrow_oop_base() != NULL) { 9610 addq(dst, r12_heapbase); 9611 } 9612 } 9613 } else { 9614 assert (Universe::narrow_oop_base() == NULL, "sanity"); 9615 if (dst != src) { 9616 movq(dst, src); 9617 } 9618 } 9619 } 9620 9621 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 9622 assert (UseCompressedOops, "should only be used for compressed headers"); 9623 assert (Universe::heap() != NULL, "java heap should be initialized"); 9624 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 9625 int oop_index = oop_recorder()->find_index(obj); 9626 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9627 mov_narrow_oop(dst, oop_index, rspec); 9628 } 9629 9630 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 9631 assert (UseCompressedOops, "should only be used for compressed headers"); 9632 assert (Universe::heap() != NULL, "java heap should be initialized"); 9633 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 9634 int oop_index = oop_recorder()->find_index(obj); 9635 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9636 mov_narrow_oop(dst, oop_index, rspec); 9637 } 9638 9639 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 9640 assert (UseCompressedOops, "should only be used for compressed headers"); 9641 assert (Universe::heap() != NULL, "java heap should be initialized"); 9642 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 9643 int oop_index = oop_recorder()->find_index(obj); 9644 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9645 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 9646 } 9647 9648 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 9649 assert (UseCompressedOops, "should only be used for compressed headers"); 9650 assert (Universe::heap() != NULL, "java heap should be initialized"); 9651 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 9652 int oop_index = oop_recorder()->find_index(obj); 9653 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9654 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 9655 } 9656 9657 void MacroAssembler::reinit_heapbase() { 9658 if (UseCompressedOops) { 9659 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 9660 } 9661 } 9662 #endif // _LP64 9663 9664 9665 // C2 compiled method's prolog code. 9666 void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { 9667 9668 // WARNING: Initial instruction MUST be 5 bytes or longer so that 9669 // NativeJump::patch_verified_entry will be able to patch out the entry 9670 // code safely. The push to verify stack depth is ok at 5 bytes, 9671 // the frame allocation can be either 3 or 6 bytes. So if we don't do 9672 // stack bang then we must use the 6 byte frame allocation even if 9673 // we have no frame. :-( 9674 9675 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 9676 // Remove word for return addr 9677 framesize -= wordSize; 9678 9679 // Calls to C2R adapters often do not accept exceptional returns. 9680 // We require that their callers must bang for them. But be careful, because 9681 // some VM calls (such as call site linkage) can use several kilobytes of 9682 // stack. But the stack safety zone should account for that. 9683 // See bugs 4446381, 4468289, 4497237. 9684 if (stack_bang) { 9685 generate_stack_overflow_check(framesize); 9686 9687 // We always push rbp, so that on return to interpreter rbp, will be 9688 // restored correctly and we can correct the stack. 9689 push(rbp); 9690 // Remove word for ebp 9691 framesize -= wordSize; 9692 9693 // Create frame 9694 if (framesize) { 9695 subptr(rsp, framesize); 9696 } 9697 } else { 9698 // Create frame (force generation of a 4 byte immediate value) 9699 subptr_imm32(rsp, framesize); 9700 9701 // Save RBP register now. 9702 framesize -= wordSize; 9703 movptr(Address(rsp, framesize), rbp); 9704 } 9705 9706 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth 9707 framesize -= wordSize; 9708 movptr(Address(rsp, framesize), (int32_t)0xbadb100d); 9709 } 9710 9711 #ifndef _LP64 9712 // If method sets FPU control word do it now 9713 if (fp_mode_24b) { 9714 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 9715 } 9716 if (UseSSE >= 2 && VerifyFPU) { 9717 verify_FPU(0, "FPU stack must be clean on entry"); 9718 } 9719 #endif 9720 9721 #ifdef ASSERT 9722 if (VerifyStackAtCalls) { 9723 Label L; 9724 push(rax); 9725 mov(rax, rsp); 9726 andptr(rax, StackAlignmentInBytes-1); 9727 cmpptr(rax, StackAlignmentInBytes-wordSize); 9728 pop(rax); 9729 jcc(Assembler::equal, L); 9730 stop("Stack is not properly aligned!"); 9731 bind(L); 9732 } 9733 #endif 9734 9735 } 9736 9737 9738 // IndexOf for constant substrings with size >= 8 chars 9739 // which don't need to be loaded through stack. 9740 void MacroAssembler::string_indexofC8(Register str1, Register str2, 9741 Register cnt1, Register cnt2, 9742 int int_cnt2, Register result, 9743 XMMRegister vec, Register tmp) { 9744 ShortBranchVerifier sbv(this); 9745 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 9746 9747 // This method uses pcmpestri inxtruction with bound registers 9748 // inputs: 9749 // xmm - substring 9750 // rax - substring length (elements count) 9751 // mem - scanned string 9752 // rdx - string length (elements count) 9753 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 9754 // outputs: 9755 // rcx - matched index in string 9756 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 9757 9758 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, 9759 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, 9760 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; 9761 9762 // Note, inline_string_indexOf() generates checks: 9763 // if (substr.count > string.count) return -1; 9764 // if (substr.count == 0) return 0; 9765 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); 9766 9767 // Load substring. 9768 movdqu(vec, Address(str2, 0)); 9769 movl(cnt2, int_cnt2); 9770 movptr(result, str1); // string addr 9771 9772 if (int_cnt2 > 8) { 9773 jmpb(SCAN_TO_SUBSTR); 9774 9775 // Reload substr for rescan, this code 9776 // is executed only for large substrings (> 8 chars) 9777 bind(RELOAD_SUBSTR); 9778 movdqu(vec, Address(str2, 0)); 9779 negptr(cnt2); // Jumped here with negative cnt2, convert to positive 9780 9781 bind(RELOAD_STR); 9782 // We came here after the beginning of the substring was 9783 // matched but the rest of it was not so we need to search 9784 // again. Start from the next element after the previous match. 9785 9786 // cnt2 is number of substring reminding elements and 9787 // cnt1 is number of string reminding elements when cmp failed. 9788 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 9789 subl(cnt1, cnt2); 9790 addl(cnt1, int_cnt2); 9791 movl(cnt2, int_cnt2); // Now restore cnt2 9792 9793 decrementl(cnt1); // Shift to next element 9794 cmpl(cnt1, cnt2); 9795 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 9796 9797 addptr(result, 2); 9798 9799 } // (int_cnt2 > 8) 9800 9801 // Scan string for start of substr in 16-byte vectors 9802 bind(SCAN_TO_SUBSTR); 9803 pcmpestri(vec, Address(result, 0), 0x0d); 9804 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 9805 subl(cnt1, 8); 9806 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 9807 cmpl(cnt1, cnt2); 9808 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 9809 addptr(result, 16); 9810 jmpb(SCAN_TO_SUBSTR); 9811 9812 // Found a potential substr 9813 bind(FOUND_CANDIDATE); 9814 // Matched whole vector if first element matched (tmp(rcx) == 0). 9815 if (int_cnt2 == 8) { 9816 jccb(Assembler::overflow, RET_FOUND); // OF == 1 9817 } else { // int_cnt2 > 8 9818 jccb(Assembler::overflow, FOUND_SUBSTR); 9819 } 9820 // After pcmpestri tmp(rcx) contains matched element index 9821 // Compute start addr of substr 9822 lea(result, Address(result, tmp, Address::times_2)); 9823 9824 // Make sure string is still long enough 9825 subl(cnt1, tmp); 9826 cmpl(cnt1, cnt2); 9827 if (int_cnt2 == 8) { 9828 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 9829 } else { // int_cnt2 > 8 9830 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); 9831 } 9832 // Left less then substring. 9833 9834 bind(RET_NOT_FOUND); 9835 movl(result, -1); 9836 jmpb(EXIT); 9837 9838 if (int_cnt2 > 8) { 9839 // This code is optimized for the case when whole substring 9840 // is matched if its head is matched. 9841 bind(MATCH_SUBSTR_HEAD); 9842 pcmpestri(vec, Address(result, 0), 0x0d); 9843 // Reload only string if does not match 9844 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 9845 9846 Label CONT_SCAN_SUBSTR; 9847 // Compare the rest of substring (> 8 chars). 9848 bind(FOUND_SUBSTR); 9849 // First 8 chars are already matched. 9850 negptr(cnt2); 9851 addptr(cnt2, 8); 9852 9853 bind(SCAN_SUBSTR); 9854 subl(cnt1, 8); 9855 cmpl(cnt2, -8); // Do not read beyond substring 9856 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); 9857 // Back-up strings to avoid reading beyond substring: 9858 // cnt1 = cnt1 - cnt2 + 8 9859 addl(cnt1, cnt2); // cnt2 is negative 9860 addl(cnt1, 8); 9861 movl(cnt2, 8); negptr(cnt2); 9862 bind(CONT_SCAN_SUBSTR); 9863 if (int_cnt2 < (int)G) { 9864 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); 9865 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); 9866 } else { 9867 // calculate index in register to avoid integer overflow (int_cnt2*2) 9868 movl(tmp, int_cnt2); 9869 addptr(tmp, cnt2); 9870 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); 9871 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 9872 } 9873 // Need to reload strings pointers if not matched whole vector 9874 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 9875 addptr(cnt2, 8); 9876 jcc(Assembler::negative, SCAN_SUBSTR); 9877 // Fall through if found full substring 9878 9879 } // (int_cnt2 > 8) 9880 9881 bind(RET_FOUND); 9882 // Found result if we matched full small substring. 9883 // Compute substr offset 9884 subptr(result, str1); 9885 shrl(result, 1); // index 9886 bind(EXIT); 9887 9888 } // string_indexofC8 9889 9890 // Small strings are loaded through stack if they cross page boundary. 9891 void MacroAssembler::string_indexof(Register str1, Register str2, 9892 Register cnt1, Register cnt2, 9893 int int_cnt2, Register result, 9894 XMMRegister vec, Register tmp) { 9895 ShortBranchVerifier sbv(this); 9896 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 9897 // 9898 // int_cnt2 is length of small (< 8 chars) constant substring 9899 // or (-1) for non constant substring in which case its length 9900 // is in cnt2 register. 9901 // 9902 // Note, inline_string_indexOf() generates checks: 9903 // if (substr.count > string.count) return -1; 9904 // if (substr.count == 0) return 0; 9905 // 9906 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); 9907 9908 // This method uses pcmpestri inxtruction with bound registers 9909 // inputs: 9910 // xmm - substring 9911 // rax - substring length (elements count) 9912 // mem - scanned string 9913 // rdx - string length (elements count) 9914 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 9915 // outputs: 9916 // rcx - matched index in string 9917 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 9918 9919 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, 9920 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, 9921 FOUND_CANDIDATE; 9922 9923 { //======================================================== 9924 // We don't know where these strings are located 9925 // and we can't read beyond them. Load them through stack. 9926 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; 9927 9928 movptr(tmp, rsp); // save old SP 9929 9930 if (int_cnt2 > 0) { // small (< 8 chars) constant substring 9931 if (int_cnt2 == 1) { // One char 9932 load_unsigned_short(result, Address(str2, 0)); 9933 movdl(vec, result); // move 32 bits 9934 } else if (int_cnt2 == 2) { // Two chars 9935 movdl(vec, Address(str2, 0)); // move 32 bits 9936 } else if (int_cnt2 == 4) { // Four chars 9937 movq(vec, Address(str2, 0)); // move 64 bits 9938 } else { // cnt2 = { 3, 5, 6, 7 } 9939 // Array header size is 12 bytes in 32-bit VM 9940 // + 6 bytes for 3 chars == 18 bytes, 9941 // enough space to load vec and shift. 9942 assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity"); 9943 movdqu(vec, Address(str2, (int_cnt2*2)-16)); 9944 psrldq(vec, 16-(int_cnt2*2)); 9945 } 9946 } else { // not constant substring 9947 cmpl(cnt2, 8); 9948 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough 9949 9950 // We can read beyond string if srt+16 does not cross page boundary 9951 // since heaps are aligned and mapped by pages. 9952 assert(os::vm_page_size() < (int)G, "default page should be small"); 9953 movl(result, str2); // We need only low 32 bits 9954 andl(result, (os::vm_page_size()-1)); 9955 cmpl(result, (os::vm_page_size()-16)); 9956 jccb(Assembler::belowEqual, CHECK_STR); 9957 9958 // Move small strings to stack to allow load 16 bytes into vec. 9959 subptr(rsp, 16); 9960 int stk_offset = wordSize-2; 9961 push(cnt2); 9962 9963 bind(COPY_SUBSTR); 9964 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); 9965 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 9966 decrement(cnt2); 9967 jccb(Assembler::notZero, COPY_SUBSTR); 9968 9969 pop(cnt2); 9970 movptr(str2, rsp); // New substring address 9971 } // non constant 9972 9973 bind(CHECK_STR); 9974 cmpl(cnt1, 8); 9975 jccb(Assembler::aboveEqual, BIG_STRINGS); 9976 9977 // Check cross page boundary. 9978 movl(result, str1); // We need only low 32 bits 9979 andl(result, (os::vm_page_size()-1)); 9980 cmpl(result, (os::vm_page_size()-16)); 9981 jccb(Assembler::belowEqual, BIG_STRINGS); 9982 9983 subptr(rsp, 16); 9984 int stk_offset = -2; 9985 if (int_cnt2 < 0) { // not constant 9986 push(cnt2); 9987 stk_offset += wordSize; 9988 } 9989 movl(cnt2, cnt1); 9990 9991 bind(COPY_STR); 9992 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); 9993 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 9994 decrement(cnt2); 9995 jccb(Assembler::notZero, COPY_STR); 9996 9997 if (int_cnt2 < 0) { // not constant 9998 pop(cnt2); 9999 } 10000 movptr(str1, rsp); // New string address 10001 10002 bind(BIG_STRINGS); 10003 // Load substring. 10004 if (int_cnt2 < 0) { // -1 10005 movdqu(vec, Address(str2, 0)); 10006 push(cnt2); // substr count 10007 push(str2); // substr addr 10008 push(str1); // string addr 10009 } else { 10010 // Small (< 8 chars) constant substrings are loaded already. 10011 movl(cnt2, int_cnt2); 10012 } 10013 push(tmp); // original SP 10014 10015 } // Finished loading 10016 10017 //======================================================== 10018 // Start search 10019 // 10020 10021 movptr(result, str1); // string addr 10022 10023 if (int_cnt2 < 0) { // Only for non constant substring 10024 jmpb(SCAN_TO_SUBSTR); 10025 10026 // SP saved at sp+0 10027 // String saved at sp+1*wordSize 10028 // Substr saved at sp+2*wordSize 10029 // Substr count saved at sp+3*wordSize 10030 10031 // Reload substr for rescan, this code 10032 // is executed only for large substrings (> 8 chars) 10033 bind(RELOAD_SUBSTR); 10034 movptr(str2, Address(rsp, 2*wordSize)); 10035 movl(cnt2, Address(rsp, 3*wordSize)); 10036 movdqu(vec, Address(str2, 0)); 10037 // We came here after the beginning of the substring was 10038 // matched but the rest of it was not so we need to search 10039 // again. Start from the next element after the previous match. 10040 subptr(str1, result); // Restore counter 10041 shrl(str1, 1); 10042 addl(cnt1, str1); 10043 decrementl(cnt1); // Shift to next element 10044 cmpl(cnt1, cnt2); 10045 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10046 10047 addptr(result, 2); 10048 } // non constant 10049 10050 // Scan string for start of substr in 16-byte vectors 10051 bind(SCAN_TO_SUBSTR); 10052 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 10053 pcmpestri(vec, Address(result, 0), 0x0d); 10054 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 10055 subl(cnt1, 8); 10056 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 10057 cmpl(cnt1, cnt2); 10058 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10059 addptr(result, 16); 10060 10061 bind(ADJUST_STR); 10062 cmpl(cnt1, 8); // Do not read beyond string 10063 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 10064 // Back-up string to avoid reading beyond string. 10065 lea(result, Address(result, cnt1, Address::times_2, -16)); 10066 movl(cnt1, 8); 10067 jmpb(SCAN_TO_SUBSTR); 10068 10069 // Found a potential substr 10070 bind(FOUND_CANDIDATE); 10071 // After pcmpestri tmp(rcx) contains matched element index 10072 10073 // Make sure string is still long enough 10074 subl(cnt1, tmp); 10075 cmpl(cnt1, cnt2); 10076 jccb(Assembler::greaterEqual, FOUND_SUBSTR); 10077 // Left less then substring. 10078 10079 bind(RET_NOT_FOUND); 10080 movl(result, -1); 10081 jmpb(CLEANUP); 10082 10083 bind(FOUND_SUBSTR); 10084 // Compute start addr of substr 10085 lea(result, Address(result, tmp, Address::times_2)); 10086 10087 if (int_cnt2 > 0) { // Constant substring 10088 // Repeat search for small substring (< 8 chars) 10089 // from new point without reloading substring. 10090 // Have to check that we don't read beyond string. 10091 cmpl(tmp, 8-int_cnt2); 10092 jccb(Assembler::greater, ADJUST_STR); 10093 // Fall through if matched whole substring. 10094 } else { // non constant 10095 assert(int_cnt2 == -1, "should be != 0"); 10096 10097 addl(tmp, cnt2); 10098 // Found result if we matched whole substring. 10099 cmpl(tmp, 8); 10100 jccb(Assembler::lessEqual, RET_FOUND); 10101 10102 // Repeat search for small substring (<= 8 chars) 10103 // from new point 'str1' without reloading substring. 10104 cmpl(cnt2, 8); 10105 // Have to check that we don't read beyond string. 10106 jccb(Assembler::lessEqual, ADJUST_STR); 10107 10108 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; 10109 // Compare the rest of substring (> 8 chars). 10110 movptr(str1, result); 10111 10112 cmpl(tmp, cnt2); 10113 // First 8 chars are already matched. 10114 jccb(Assembler::equal, CHECK_NEXT); 10115 10116 bind(SCAN_SUBSTR); 10117 pcmpestri(vec, Address(str1, 0), 0x0d); 10118 // Need to reload strings pointers if not matched whole vector 10119 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 10120 10121 bind(CHECK_NEXT); 10122 subl(cnt2, 8); 10123 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring 10124 addptr(str1, 16); 10125 addptr(str2, 16); 10126 subl(cnt1, 8); 10127 cmpl(cnt2, 8); // Do not read beyond substring 10128 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); 10129 // Back-up strings to avoid reading beyond substring. 10130 lea(str2, Address(str2, cnt2, Address::times_2, -16)); 10131 lea(str1, Address(str1, cnt2, Address::times_2, -16)); 10132 subl(cnt1, cnt2); 10133 movl(cnt2, 8); 10134 addl(cnt1, 8); 10135 bind(CONT_SCAN_SUBSTR); 10136 movdqu(vec, Address(str2, 0)); 10137 jmpb(SCAN_SUBSTR); 10138 10139 bind(RET_FOUND_LONG); 10140 movptr(str1, Address(rsp, wordSize)); 10141 } // non constant 10142 10143 bind(RET_FOUND); 10144 // Compute substr offset 10145 subptr(result, str1); 10146 shrl(result, 1); // index 10147 10148 bind(CLEANUP); 10149 pop(rsp); // restore SP 10150 10151 } // string_indexof 10152 10153 // Compare strings. 10154 void MacroAssembler::string_compare(Register str1, Register str2, 10155 Register cnt1, Register cnt2, Register result, 10156 XMMRegister vec1) { 10157 ShortBranchVerifier sbv(this); 10158 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 10159 10160 // Compute the minimum of the string lengths and the 10161 // difference of the string lengths (stack). 10162 // Do the conditional move stuff 10163 movl(result, cnt1); 10164 subl(cnt1, cnt2); 10165 push(cnt1); 10166 cmov32(Assembler::lessEqual, cnt2, result); 10167 10168 // Is the minimum length zero? 10169 testl(cnt2, cnt2); 10170 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 10171 10172 // Load first characters 10173 load_unsigned_short(result, Address(str1, 0)); 10174 load_unsigned_short(cnt1, Address(str2, 0)); 10175 10176 // Compare first characters 10177 subl(result, cnt1); 10178 jcc(Assembler::notZero, POP_LABEL); 10179 decrementl(cnt2); 10180 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 10181 10182 { 10183 // Check after comparing first character to see if strings are equivalent 10184 Label LSkip2; 10185 // Check if the strings start at same location 10186 cmpptr(str1, str2); 10187 jccb(Assembler::notEqual, LSkip2); 10188 10189 // Check if the length difference is zero (from stack) 10190 cmpl(Address(rsp, 0), 0x0); 10191 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 10192 10193 // Strings might not be equivalent 10194 bind(LSkip2); 10195 } 10196 10197 Address::ScaleFactor scale = Address::times_2; 10198 int stride = 8; 10199 10200 // Advance to next element 10201 addptr(str1, 16/stride); 10202 addptr(str2, 16/stride); 10203 10204 if (UseSSE42Intrinsics) { 10205 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 10206 int pcmpmask = 0x19; 10207 // Setup to compare 16-byte vectors 10208 movl(result, cnt2); 10209 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 10210 jccb(Assembler::zero, COMPARE_TAIL); 10211 10212 lea(str1, Address(str1, result, scale)); 10213 lea(str2, Address(str2, result, scale)); 10214 negptr(result); 10215 10216 // pcmpestri 10217 // inputs: 10218 // vec1- substring 10219 // rax - negative string length (elements count) 10220 // mem - scaned string 10221 // rdx - string length (elements count) 10222 // pcmpmask - cmp mode: 11000 (string compare with negated result) 10223 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 10224 // outputs: 10225 // rcx - first mismatched element index 10226 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 10227 10228 bind(COMPARE_WIDE_VECTORS); 10229 movdqu(vec1, Address(str1, result, scale)); 10230 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 10231 // After pcmpestri cnt1(rcx) contains mismatched element index 10232 10233 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 10234 addptr(result, stride); 10235 subptr(cnt2, stride); 10236 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); 10237 10238 // compare wide vectors tail 10239 testl(result, result); 10240 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 10241 10242 movl(cnt2, stride); 10243 movl(result, stride); 10244 negptr(result); 10245 movdqu(vec1, Address(str1, result, scale)); 10246 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 10247 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); 10248 10249 // Mismatched characters in the vectors 10250 bind(VECTOR_NOT_EQUAL); 10251 addptr(result, cnt1); 10252 movptr(cnt2, result); 10253 load_unsigned_short(result, Address(str1, cnt2, scale)); 10254 load_unsigned_short(cnt1, Address(str2, cnt2, scale)); 10255 subl(result, cnt1); 10256 jmpb(POP_LABEL); 10257 10258 bind(COMPARE_TAIL); // limit is zero 10259 movl(cnt2, result); 10260 // Fallthru to tail compare 10261 } 10262 10263 // Shift str2 and str1 to the end of the arrays, negate min 10264 lea(str1, Address(str1, cnt2, scale, 0)); 10265 lea(str2, Address(str2, cnt2, scale, 0)); 10266 negptr(cnt2); 10267 10268 // Compare the rest of the elements 10269 bind(WHILE_HEAD_LABEL); 10270 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); 10271 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); 10272 subl(result, cnt1); 10273 jccb(Assembler::notZero, POP_LABEL); 10274 increment(cnt2); 10275 jccb(Assembler::notZero, WHILE_HEAD_LABEL); 10276 10277 // Strings are equal up to min length. Return the length difference. 10278 bind(LENGTH_DIFF_LABEL); 10279 pop(result); 10280 jmpb(DONE_LABEL); 10281 10282 // Discard the stored length difference 10283 bind(POP_LABEL); 10284 pop(cnt1); 10285 10286 // That's it 10287 bind(DONE_LABEL); 10288 } 10289 10290 // Compare char[] arrays aligned to 4 bytes or substrings. 10291 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 10292 Register limit, Register result, Register chr, 10293 XMMRegister vec1, XMMRegister vec2) { 10294 ShortBranchVerifier sbv(this); 10295 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 10296 10297 int length_offset = arrayOopDesc::length_offset_in_bytes(); 10298 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 10299 10300 // Check the input args 10301 cmpptr(ary1, ary2); 10302 jcc(Assembler::equal, TRUE_LABEL); 10303 10304 if (is_array_equ) { 10305 // Need additional checks for arrays_equals. 10306 testptr(ary1, ary1); 10307 jcc(Assembler::zero, FALSE_LABEL); 10308 testptr(ary2, ary2); 10309 jcc(Assembler::zero, FALSE_LABEL); 10310 10311 // Check the lengths 10312 movl(limit, Address(ary1, length_offset)); 10313 cmpl(limit, Address(ary2, length_offset)); 10314 jcc(Assembler::notEqual, FALSE_LABEL); 10315 } 10316 10317 // count == 0 10318 testl(limit, limit); 10319 jcc(Assembler::zero, TRUE_LABEL); 10320 10321 if (is_array_equ) { 10322 // Load array address 10323 lea(ary1, Address(ary1, base_offset)); 10324 lea(ary2, Address(ary2, base_offset)); 10325 } 10326 10327 shll(limit, 1); // byte count != 0 10328 movl(result, limit); // copy 10329 10330 if (UseSSE42Intrinsics) { 10331 // With SSE4.2, use double quad vector compare 10332 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 10333 10334 // Compare 16-byte vectors 10335 andl(result, 0x0000000e); // tail count (in bytes) 10336 andl(limit, 0xfffffff0); // vector count (in bytes) 10337 jccb(Assembler::zero, COMPARE_TAIL); 10338 10339 lea(ary1, Address(ary1, limit, Address::times_1)); 10340 lea(ary2, Address(ary2, limit, Address::times_1)); 10341 negptr(limit); 10342 10343 bind(COMPARE_WIDE_VECTORS); 10344 movdqu(vec1, Address(ary1, limit, Address::times_1)); 10345 movdqu(vec2, Address(ary2, limit, Address::times_1)); 10346 pxor(vec1, vec2); 10347 10348 ptest(vec1, vec1); 10349 jccb(Assembler::notZero, FALSE_LABEL); 10350 addptr(limit, 16); 10351 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 10352 10353 testl(result, result); 10354 jccb(Assembler::zero, TRUE_LABEL); 10355 10356 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); 10357 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); 10358 pxor(vec1, vec2); 10359 10360 ptest(vec1, vec1); 10361 jccb(Assembler::notZero, FALSE_LABEL); 10362 jmpb(TRUE_LABEL); 10363 10364 bind(COMPARE_TAIL); // limit is zero 10365 movl(limit, result); 10366 // Fallthru to tail compare 10367 } 10368 10369 // Compare 4-byte vectors 10370 andl(limit, 0xfffffffc); // vector count (in bytes) 10371 jccb(Assembler::zero, COMPARE_CHAR); 10372 10373 lea(ary1, Address(ary1, limit, Address::times_1)); 10374 lea(ary2, Address(ary2, limit, Address::times_1)); 10375 negptr(limit); 10376 10377 bind(COMPARE_VECTORS); 10378 movl(chr, Address(ary1, limit, Address::times_1)); 10379 cmpl(chr, Address(ary2, limit, Address::times_1)); 10380 jccb(Assembler::notEqual, FALSE_LABEL); 10381 addptr(limit, 4); 10382 jcc(Assembler::notZero, COMPARE_VECTORS); 10383 10384 // Compare trailing char (final 2 bytes), if any 10385 bind(COMPARE_CHAR); 10386 testl(result, 0x2); // tail char 10387 jccb(Assembler::zero, TRUE_LABEL); 10388 load_unsigned_short(chr, Address(ary1, 0)); 10389 load_unsigned_short(limit, Address(ary2, 0)); 10390 cmpl(chr, limit); 10391 jccb(Assembler::notEqual, FALSE_LABEL); 10392 10393 bind(TRUE_LABEL); 10394 movl(result, 1); // return true 10395 jmpb(DONE); 10396 10397 bind(FALSE_LABEL); 10398 xorl(result, result); // return false 10399 10400 // That's it 10401 bind(DONE); 10402 } 10403 10404 #ifdef PRODUCT 10405 #define BLOCK_COMMENT(str) /* nothing */ 10406 #else 10407 #define BLOCK_COMMENT(str) block_comment(str) 10408 #endif 10409 10410 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 10411 void MacroAssembler::generate_fill(BasicType t, bool aligned, 10412 Register to, Register value, Register count, 10413 Register rtmp, XMMRegister xtmp) { 10414 ShortBranchVerifier sbv(this); 10415 assert_different_registers(to, value, count, rtmp); 10416 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 10417 Label L_fill_2_bytes, L_fill_4_bytes; 10418 10419 int shift = -1; 10420 switch (t) { 10421 case T_BYTE: 10422 shift = 2; 10423 break; 10424 case T_SHORT: 10425 shift = 1; 10426 break; 10427 case T_INT: 10428 shift = 0; 10429 break; 10430 default: ShouldNotReachHere(); 10431 } 10432 10433 if (t == T_BYTE) { 10434 andl(value, 0xff); 10435 movl(rtmp, value); 10436 shll(rtmp, 8); 10437 orl(value, rtmp); 10438 } 10439 if (t == T_SHORT) { 10440 andl(value, 0xffff); 10441 } 10442 if (t == T_BYTE || t == T_SHORT) { 10443 movl(rtmp, value); 10444 shll(rtmp, 16); 10445 orl(value, rtmp); 10446 } 10447 10448 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 10449 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 10450 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 10451 // align source address at 4 bytes address boundary 10452 if (t == T_BYTE) { 10453 // One byte misalignment happens only for byte arrays 10454 testptr(to, 1); 10455 jccb(Assembler::zero, L_skip_align1); 10456 movb(Address(to, 0), value); 10457 increment(to); 10458 decrement(count); 10459 BIND(L_skip_align1); 10460 } 10461 // Two bytes misalignment happens only for byte and short (char) arrays 10462 testptr(to, 2); 10463 jccb(Assembler::zero, L_skip_align2); 10464 movw(Address(to, 0), value); 10465 addptr(to, 2); 10466 subl(count, 1<<(shift-1)); 10467 BIND(L_skip_align2); 10468 } 10469 if (UseSSE < 2) { 10470 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 10471 // Fill 32-byte chunks 10472 subl(count, 8 << shift); 10473 jcc(Assembler::less, L_check_fill_8_bytes); 10474 align(16); 10475 10476 BIND(L_fill_32_bytes_loop); 10477 10478 for (int i = 0; i < 32; i += 4) { 10479 movl(Address(to, i), value); 10480 } 10481 10482 addptr(to, 32); 10483 subl(count, 8 << shift); 10484 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 10485 BIND(L_check_fill_8_bytes); 10486 addl(count, 8 << shift); 10487 jccb(Assembler::zero, L_exit); 10488 jmpb(L_fill_8_bytes); 10489 10490 // 10491 // length is too short, just fill qwords 10492 // 10493 BIND(L_fill_8_bytes_loop); 10494 movl(Address(to, 0), value); 10495 movl(Address(to, 4), value); 10496 addptr(to, 8); 10497 BIND(L_fill_8_bytes); 10498 subl(count, 1 << (shift + 1)); 10499 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 10500 // fall through to fill 4 bytes 10501 } else { 10502 Label L_fill_32_bytes; 10503 if (!UseUnalignedLoadStores) { 10504 // align to 8 bytes, we know we are 4 byte aligned to start 10505 testptr(to, 4); 10506 jccb(Assembler::zero, L_fill_32_bytes); 10507 movl(Address(to, 0), value); 10508 addptr(to, 4); 10509 subl(count, 1<<shift); 10510 } 10511 BIND(L_fill_32_bytes); 10512 { 10513 assert( UseSSE >= 2, "supported cpu only" ); 10514 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 10515 // Fill 32-byte chunks 10516 movdl(xtmp, value); 10517 pshufd(xtmp, xtmp, 0); 10518 10519 subl(count, 8 << shift); 10520 jcc(Assembler::less, L_check_fill_8_bytes); 10521 align(16); 10522 10523 BIND(L_fill_32_bytes_loop); 10524 10525 if (UseUnalignedLoadStores) { 10526 movdqu(Address(to, 0), xtmp); 10527 movdqu(Address(to, 16), xtmp); 10528 } else { 10529 movq(Address(to, 0), xtmp); 10530 movq(Address(to, 8), xtmp); 10531 movq(Address(to, 16), xtmp); 10532 movq(Address(to, 24), xtmp); 10533 } 10534 10535 addptr(to, 32); 10536 subl(count, 8 << shift); 10537 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 10538 BIND(L_check_fill_8_bytes); 10539 addl(count, 8 << shift); 10540 jccb(Assembler::zero, L_exit); 10541 jmpb(L_fill_8_bytes); 10542 10543 // 10544 // length is too short, just fill qwords 10545 // 10546 BIND(L_fill_8_bytes_loop); 10547 movq(Address(to, 0), xtmp); 10548 addptr(to, 8); 10549 BIND(L_fill_8_bytes); 10550 subl(count, 1 << (shift + 1)); 10551 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 10552 } 10553 } 10554 // fill trailing 4 bytes 10555 BIND(L_fill_4_bytes); 10556 testl(count, 1<<shift); 10557 jccb(Assembler::zero, L_fill_2_bytes); 10558 movl(Address(to, 0), value); 10559 if (t == T_BYTE || t == T_SHORT) { 10560 addptr(to, 4); 10561 BIND(L_fill_2_bytes); 10562 // fill trailing 2 bytes 10563 testl(count, 1<<(shift-1)); 10564 jccb(Assembler::zero, L_fill_byte); 10565 movw(Address(to, 0), value); 10566 if (t == T_BYTE) { 10567 addptr(to, 2); 10568 BIND(L_fill_byte); 10569 // fill trailing byte 10570 testl(count, 1); 10571 jccb(Assembler::zero, L_exit); 10572 movb(Address(to, 0), value); 10573 } else { 10574 BIND(L_fill_byte); 10575 } 10576 } else { 10577 BIND(L_fill_2_bytes); 10578 } 10579 BIND(L_exit); 10580 } 10581 #undef BIND 10582 #undef BLOCK_COMMENT 10583 10584 10585 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 10586 switch (cond) { 10587 // Note some conditions are synonyms for others 10588 case Assembler::zero: return Assembler::notZero; 10589 case Assembler::notZero: return Assembler::zero; 10590 case Assembler::less: return Assembler::greaterEqual; 10591 case Assembler::lessEqual: return Assembler::greater; 10592 case Assembler::greater: return Assembler::lessEqual; 10593 case Assembler::greaterEqual: return Assembler::less; 10594 case Assembler::below: return Assembler::aboveEqual; 10595 case Assembler::belowEqual: return Assembler::above; 10596 case Assembler::above: return Assembler::belowEqual; 10597 case Assembler::aboveEqual: return Assembler::below; 10598 case Assembler::overflow: return Assembler::noOverflow; 10599 case Assembler::noOverflow: return Assembler::overflow; 10600 case Assembler::negative: return Assembler::positive; 10601 case Assembler::positive: return Assembler::negative; 10602 case Assembler::parity: return Assembler::noParity; 10603 case Assembler::noParity: return Assembler::parity; 10604 } 10605 ShouldNotReachHere(); return Assembler::overflow; 10606 } 10607 10608 SkipIfEqual::SkipIfEqual( 10609 MacroAssembler* masm, const bool* flag_addr, bool value) { 10610 _masm = masm; 10611 _masm->cmp8(ExternalAddress((address)flag_addr), value); 10612 _masm->jcc(Assembler::equal, _label); 10613 } 10614 10615 SkipIfEqual::~SkipIfEqual() { 10616 _masm->bind(_label); 10617 }