1 /* 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "assembler_x86.inline.hpp" 27 #include "gc_interface/collectedHeap.inline.hpp" 28 #include "interpreter/interpreter.hpp" 29 #include "memory/cardTableModRefBS.hpp" 30 #include "memory/resourceArea.hpp" 31 #include "prims/methodHandles.hpp" 32 #include "runtime/biasedLocking.hpp" 33 #include "runtime/interfaceSupport.hpp" 34 #include "runtime/objectMonitor.hpp" 35 #include "runtime/os.hpp" 36 #include "runtime/sharedRuntime.hpp" 37 #include "runtime/stubRoutines.hpp" 38 #ifndef SERIALGC 39 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 40 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 41 #include "gc_implementation/g1/heapRegion.hpp" 42 #endif 43 44 // Implementation of AddressLiteral 45 46 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { 47 _is_lval = false; 48 _target = target; 49 switch (rtype) { 50 case relocInfo::oop_type: 51 // Oops are a special case. Normally they would be their own section 52 // but in cases like icBuffer they are literals in the code stream that 53 // we don't have a section for. We use none so that we get a literal address 54 // which is always patchable. 55 break; 56 case relocInfo::external_word_type: 57 _rspec = external_word_Relocation::spec(target); 58 break; 59 case relocInfo::internal_word_type: 60 _rspec = internal_word_Relocation::spec(target); 61 break; 62 case relocInfo::opt_virtual_call_type: 63 _rspec = opt_virtual_call_Relocation::spec(); 64 break; 65 case relocInfo::static_call_type: 66 _rspec = static_call_Relocation::spec(); 67 break; 68 case relocInfo::runtime_call_type: 69 _rspec = runtime_call_Relocation::spec(); 70 break; 71 case relocInfo::poll_type: 72 case relocInfo::poll_return_type: 73 _rspec = Relocation::spec_simple(rtype); 74 break; 75 case relocInfo::none: 76 break; 77 default: 78 ShouldNotReachHere(); 79 break; 80 } 81 } 82 83 // Implementation of Address 84 85 #ifdef _LP64 86 87 Address Address::make_array(ArrayAddress adr) { 88 // Not implementable on 64bit machines 89 // Should have been handled higher up the call chain. 90 ShouldNotReachHere(); 91 return Address(); 92 } 93 94 // exceedingly dangerous constructor 95 Address::Address(int disp, address loc, relocInfo::relocType rtype) { 96 _base = noreg; 97 _index = noreg; 98 _scale = no_scale; 99 _disp = disp; 100 switch (rtype) { 101 case relocInfo::external_word_type: 102 _rspec = external_word_Relocation::spec(loc); 103 break; 104 case relocInfo::internal_word_type: 105 _rspec = internal_word_Relocation::spec(loc); 106 break; 107 case relocInfo::runtime_call_type: 108 // HMM 109 _rspec = runtime_call_Relocation::spec(); 110 break; 111 case relocInfo::poll_type: 112 case relocInfo::poll_return_type: 113 _rspec = Relocation::spec_simple(rtype); 114 break; 115 case relocInfo::none: 116 break; 117 default: 118 ShouldNotReachHere(); 119 } 120 } 121 #else // LP64 122 123 Address Address::make_array(ArrayAddress adr) { 124 AddressLiteral base = adr.base(); 125 Address index = adr.index(); 126 assert(index._disp == 0, "must not have disp"); // maybe it can? 127 Address array(index._base, index._index, index._scale, (intptr_t) base.target()); 128 array._rspec = base._rspec; 129 return array; 130 } 131 132 // exceedingly dangerous constructor 133 Address::Address(address loc, RelocationHolder spec) { 134 _base = noreg; 135 _index = noreg; 136 _scale = no_scale; 137 _disp = (intptr_t) loc; 138 _rspec = spec; 139 } 140 141 #endif // _LP64 142 143 144 145 // Convert the raw encoding form into the form expected by the constructor for 146 // Address. An index of 4 (rsp) corresponds to having no index, so convert 147 // that to noreg for the Address constructor. 148 Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) { 149 RelocationHolder rspec; 150 if (disp_is_oop) { 151 rspec = Relocation::spec_simple(relocInfo::oop_type); 152 } 153 bool valid_index = index != rsp->encoding(); 154 if (valid_index) { 155 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); 156 madr._rspec = rspec; 157 return madr; 158 } else { 159 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); 160 madr._rspec = rspec; 161 return madr; 162 } 163 } 164 165 // Implementation of Assembler 166 167 int AbstractAssembler::code_fill_byte() { 168 return (u_char)'\xF4'; // hlt 169 } 170 171 // make this go away someday 172 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { 173 if (rtype == relocInfo::none) 174 emit_long(data); 175 else emit_data(data, Relocation::spec_simple(rtype), format); 176 } 177 178 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { 179 assert(imm_operand == 0, "default format must be immediate in this file"); 180 assert(inst_mark() != NULL, "must be inside InstructionMark"); 181 if (rspec.type() != relocInfo::none) { 182 #ifdef ASSERT 183 check_relocation(rspec, format); 184 #endif 185 // Do not use AbstractAssembler::relocate, which is not intended for 186 // embedded words. Instead, relocate to the enclosing instruction. 187 188 // hack. call32 is too wide for mask so use disp32 189 if (format == call32_operand) 190 code_section()->relocate(inst_mark(), rspec, disp32_operand); 191 else 192 code_section()->relocate(inst_mark(), rspec, format); 193 } 194 emit_long(data); 195 } 196 197 static int encode(Register r) { 198 int enc = r->encoding(); 199 if (enc >= 8) { 200 enc -= 8; 201 } 202 return enc; 203 } 204 205 static int encode(XMMRegister r) { 206 int enc = r->encoding(); 207 if (enc >= 8) { 208 enc -= 8; 209 } 210 return enc; 211 } 212 213 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { 214 assert(dst->has_byte_register(), "must have byte register"); 215 assert(isByte(op1) && isByte(op2), "wrong opcode"); 216 assert(isByte(imm8), "not a byte"); 217 assert((op1 & 0x01) == 0, "should be 8bit operation"); 218 emit_byte(op1); 219 emit_byte(op2 | encode(dst)); 220 emit_byte(imm8); 221 } 222 223 224 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 225 assert(isByte(op1) && isByte(op2), "wrong opcode"); 226 assert((op1 & 0x01) == 1, "should be 32bit operation"); 227 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 228 if (is8bit(imm32)) { 229 emit_byte(op1 | 0x02); // set sign bit 230 emit_byte(op2 | encode(dst)); 231 emit_byte(imm32 & 0xFF); 232 } else { 233 emit_byte(op1); 234 emit_byte(op2 | encode(dst)); 235 emit_long(imm32); 236 } 237 } 238 239 // Force generation of a 4 byte immediate value even if it fits into 8bit 240 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) { 241 assert(isByte(op1) && isByte(op2), "wrong opcode"); 242 assert((op1 & 0x01) == 1, "should be 32bit operation"); 243 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 244 emit_byte(op1); 245 emit_byte(op2 | encode(dst)); 246 emit_long(imm32); 247 } 248 249 // immediate-to-memory forms 250 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 251 assert((op1 & 0x01) == 1, "should be 32bit operation"); 252 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 253 if (is8bit(imm32)) { 254 emit_byte(op1 | 0x02); // set sign bit 255 emit_operand(rm, adr, 1); 256 emit_byte(imm32 & 0xFF); 257 } else { 258 emit_byte(op1); 259 emit_operand(rm, adr, 4); 260 emit_long(imm32); 261 } 262 } 263 264 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) { 265 LP64_ONLY(ShouldNotReachHere()); 266 assert(isByte(op1) && isByte(op2), "wrong opcode"); 267 assert((op1 & 0x01) == 1, "should be 32bit operation"); 268 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 269 InstructionMark im(this); 270 emit_byte(op1); 271 emit_byte(op2 | encode(dst)); 272 emit_data((intptr_t)obj, relocInfo::oop_type, 0); 273 } 274 275 276 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { 277 assert(isByte(op1) && isByte(op2), "wrong opcode"); 278 emit_byte(op1); 279 emit_byte(op2 | encode(dst) << 3 | encode(src)); 280 } 281 282 283 void Assembler::emit_operand(Register reg, Register base, Register index, 284 Address::ScaleFactor scale, int disp, 285 RelocationHolder const& rspec, 286 int rip_relative_correction) { 287 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type(); 288 289 // Encode the registers as needed in the fields they are used in 290 291 int regenc = encode(reg) << 3; 292 int indexenc = index->is_valid() ? encode(index) << 3 : 0; 293 int baseenc = base->is_valid() ? encode(base) : 0; 294 295 if (base->is_valid()) { 296 if (index->is_valid()) { 297 assert(scale != Address::no_scale, "inconsistent address"); 298 // [base + index*scale + disp] 299 if (disp == 0 && rtype == relocInfo::none && 300 base != rbp LP64_ONLY(&& base != r13)) { 301 // [base + index*scale] 302 // [00 reg 100][ss index base] 303 assert(index != rsp, "illegal addressing mode"); 304 emit_byte(0x04 | regenc); 305 emit_byte(scale << 6 | indexenc | baseenc); 306 } else if (is8bit(disp) && rtype == relocInfo::none) { 307 // [base + index*scale + imm8] 308 // [01 reg 100][ss index base] imm8 309 assert(index != rsp, "illegal addressing mode"); 310 emit_byte(0x44 | regenc); 311 emit_byte(scale << 6 | indexenc | baseenc); 312 emit_byte(disp & 0xFF); 313 } else { 314 // [base + index*scale + disp32] 315 // [10 reg 100][ss index base] disp32 316 assert(index != rsp, "illegal addressing mode"); 317 emit_byte(0x84 | regenc); 318 emit_byte(scale << 6 | indexenc | baseenc); 319 emit_data(disp, rspec, disp32_operand); 320 } 321 } else if (base == rsp LP64_ONLY(|| base == r12)) { 322 // [rsp + disp] 323 if (disp == 0 && rtype == relocInfo::none) { 324 // [rsp] 325 // [00 reg 100][00 100 100] 326 emit_byte(0x04 | regenc); 327 emit_byte(0x24); 328 } else if (is8bit(disp) && rtype == relocInfo::none) { 329 // [rsp + imm8] 330 // [01 reg 100][00 100 100] disp8 331 emit_byte(0x44 | regenc); 332 emit_byte(0x24); 333 emit_byte(disp & 0xFF); 334 } else { 335 // [rsp + imm32] 336 // [10 reg 100][00 100 100] disp32 337 emit_byte(0x84 | regenc); 338 emit_byte(0x24); 339 emit_data(disp, rspec, disp32_operand); 340 } 341 } else { 342 // [base + disp] 343 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode"); 344 if (disp == 0 && rtype == relocInfo::none && 345 base != rbp LP64_ONLY(&& base != r13)) { 346 // [base] 347 // [00 reg base] 348 emit_byte(0x00 | regenc | baseenc); 349 } else if (is8bit(disp) && rtype == relocInfo::none) { 350 // [base + disp8] 351 // [01 reg base] disp8 352 emit_byte(0x40 | regenc | baseenc); 353 emit_byte(disp & 0xFF); 354 } else { 355 // [base + disp32] 356 // [10 reg base] disp32 357 emit_byte(0x80 | regenc | baseenc); 358 emit_data(disp, rspec, disp32_operand); 359 } 360 } 361 } else { 362 if (index->is_valid()) { 363 assert(scale != Address::no_scale, "inconsistent address"); 364 // [index*scale + disp] 365 // [00 reg 100][ss index 101] disp32 366 assert(index != rsp, "illegal addressing mode"); 367 emit_byte(0x04 | regenc); 368 emit_byte(scale << 6 | indexenc | 0x05); 369 emit_data(disp, rspec, disp32_operand); 370 } else if (rtype != relocInfo::none ) { 371 // [disp] (64bit) RIP-RELATIVE (32bit) abs 372 // [00 000 101] disp32 373 374 emit_byte(0x05 | regenc); 375 // Note that the RIP-rel. correction applies to the generated 376 // disp field, but _not_ to the target address in the rspec. 377 378 // disp was created by converting the target address minus the pc 379 // at the start of the instruction. That needs more correction here. 380 // intptr_t disp = target - next_ip; 381 assert(inst_mark() != NULL, "must be inside InstructionMark"); 382 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction; 383 int64_t adjusted = disp; 384 // Do rip-rel adjustment for 64bit 385 LP64_ONLY(adjusted -= (next_ip - inst_mark())); 386 assert(is_simm32(adjusted), 387 "must be 32bit offset (RIP relative address)"); 388 emit_data((int32_t) adjusted, rspec, disp32_operand); 389 390 } else { 391 // 32bit never did this, did everything as the rip-rel/disp code above 392 // [disp] ABSOLUTE 393 // [00 reg 100][00 100 101] disp32 394 emit_byte(0x04 | regenc); 395 emit_byte(0x25); 396 emit_data(disp, rspec, disp32_operand); 397 } 398 } 399 } 400 401 void Assembler::emit_operand(XMMRegister reg, Register base, Register index, 402 Address::ScaleFactor scale, int disp, 403 RelocationHolder const& rspec) { 404 emit_operand((Register)reg, base, index, scale, disp, rspec); 405 } 406 407 // Secret local extension to Assembler::WhichOperand: 408 #define end_pc_operand (_WhichOperand_limit) 409 410 address Assembler::locate_operand(address inst, WhichOperand which) { 411 // Decode the given instruction, and return the address of 412 // an embedded 32-bit operand word. 413 414 // If "which" is disp32_operand, selects the displacement portion 415 // of an effective address specifier. 416 // If "which" is imm64_operand, selects the trailing immediate constant. 417 // If "which" is call32_operand, selects the displacement of a call or jump. 418 // Caller is responsible for ensuring that there is such an operand, 419 // and that it is 32/64 bits wide. 420 421 // If "which" is end_pc_operand, find the end of the instruction. 422 423 address ip = inst; 424 bool is_64bit = false; 425 426 debug_only(bool has_disp32 = false); 427 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn 428 429 again_after_prefix: 430 switch (0xFF & *ip++) { 431 432 // These convenience macros generate groups of "case" labels for the switch. 433 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3 434 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \ 435 case (x)+4: case (x)+5: case (x)+6: case (x)+7 436 #define REP16(x) REP8((x)+0): \ 437 case REP8((x)+8) 438 439 case CS_segment: 440 case SS_segment: 441 case DS_segment: 442 case ES_segment: 443 case FS_segment: 444 case GS_segment: 445 // Seems dubious 446 LP64_ONLY(assert(false, "shouldn't have that prefix")); 447 assert(ip == inst+1, "only one prefix allowed"); 448 goto again_after_prefix; 449 450 case 0x67: 451 case REX: 452 case REX_B: 453 case REX_X: 454 case REX_XB: 455 case REX_R: 456 case REX_RB: 457 case REX_RX: 458 case REX_RXB: 459 NOT_LP64(assert(false, "64bit prefixes")); 460 goto again_after_prefix; 461 462 case REX_W: 463 case REX_WB: 464 case REX_WX: 465 case REX_WXB: 466 case REX_WR: 467 case REX_WRB: 468 case REX_WRX: 469 case REX_WRXB: 470 NOT_LP64(assert(false, "64bit prefixes")); 471 is_64bit = true; 472 goto again_after_prefix; 473 474 case 0xFF: // pushq a; decl a; incl a; call a; jmp a 475 case 0x88: // movb a, r 476 case 0x89: // movl a, r 477 case 0x8A: // movb r, a 478 case 0x8B: // movl r, a 479 case 0x8F: // popl a 480 debug_only(has_disp32 = true); 481 break; 482 483 case 0x68: // pushq #32 484 if (which == end_pc_operand) { 485 return ip + 4; 486 } 487 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate"); 488 return ip; // not produced by emit_operand 489 490 case 0x66: // movw ... (size prefix) 491 again_after_size_prefix2: 492 switch (0xFF & *ip++) { 493 case REX: 494 case REX_B: 495 case REX_X: 496 case REX_XB: 497 case REX_R: 498 case REX_RB: 499 case REX_RX: 500 case REX_RXB: 501 case REX_W: 502 case REX_WB: 503 case REX_WX: 504 case REX_WXB: 505 case REX_WR: 506 case REX_WRB: 507 case REX_WRX: 508 case REX_WRXB: 509 NOT_LP64(assert(false, "64bit prefix found")); 510 goto again_after_size_prefix2; 511 case 0x8B: // movw r, a 512 case 0x89: // movw a, r 513 debug_only(has_disp32 = true); 514 break; 515 case 0xC7: // movw a, #16 516 debug_only(has_disp32 = true); 517 tail_size = 2; // the imm16 518 break; 519 case 0x0F: // several SSE/SSE2 variants 520 ip--; // reparse the 0x0F 521 goto again_after_prefix; 522 default: 523 ShouldNotReachHere(); 524 } 525 break; 526 527 case REP8(0xB8): // movl/q r, #32/#64(oop?) 528 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4); 529 // these asserts are somewhat nonsensical 530 #ifndef _LP64 531 assert(which == imm_operand || which == disp32_operand, 532 err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip)); 533 #else 534 assert((which == call32_operand || which == imm_operand) && is_64bit || 535 which == narrow_oop_operand && !is_64bit, 536 err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip)); 537 #endif // _LP64 538 return ip; 539 540 case 0x69: // imul r, a, #32 541 case 0xC7: // movl a, #32(oop?) 542 tail_size = 4; 543 debug_only(has_disp32 = true); // has both kinds of operands! 544 break; 545 546 case 0x0F: // movx..., etc. 547 switch (0xFF & *ip++) { 548 case 0x3A: // pcmpestri 549 tail_size = 1; 550 case 0x38: // ptest, pmovzxbw 551 ip++; // skip opcode 552 debug_only(has_disp32 = true); // has both kinds of operands! 553 break; 554 555 case 0x70: // pshufd r, r/a, #8 556 debug_only(has_disp32 = true); // has both kinds of operands! 557 case 0x73: // psrldq r, #8 558 tail_size = 1; 559 break; 560 561 case 0x12: // movlps 562 case 0x28: // movaps 563 case 0x2E: // ucomiss 564 case 0x2F: // comiss 565 case 0x54: // andps 566 case 0x55: // andnps 567 case 0x56: // orps 568 case 0x57: // xorps 569 case 0x6E: // movd 570 case 0x7E: // movd 571 case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush 572 debug_only(has_disp32 = true); 573 break; 574 575 case 0xAD: // shrd r, a, %cl 576 case 0xAF: // imul r, a 577 case 0xBE: // movsbl r, a (movsxb) 578 case 0xBF: // movswl r, a (movsxw) 579 case 0xB6: // movzbl r, a (movzxb) 580 case 0xB7: // movzwl r, a (movzxw) 581 case REP16(0x40): // cmovl cc, r, a 582 case 0xB0: // cmpxchgb 583 case 0xB1: // cmpxchg 584 case 0xC1: // xaddl 585 case 0xC7: // cmpxchg8 586 case REP16(0x90): // setcc a 587 debug_only(has_disp32 = true); 588 // fall out of the switch to decode the address 589 break; 590 591 case 0xC4: // pinsrw r, a, #8 592 debug_only(has_disp32 = true); 593 case 0xC5: // pextrw r, r, #8 594 tail_size = 1; // the imm8 595 break; 596 597 case 0xAC: // shrd r, a, #8 598 debug_only(has_disp32 = true); 599 tail_size = 1; // the imm8 600 break; 601 602 case REP16(0x80): // jcc rdisp32 603 if (which == end_pc_operand) return ip + 4; 604 assert(which == call32_operand, "jcc has no disp32 or imm"); 605 return ip; 606 default: 607 ShouldNotReachHere(); 608 } 609 break; 610 611 case 0x81: // addl a, #32; addl r, #32 612 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 613 // on 32bit in the case of cmpl, the imm might be an oop 614 tail_size = 4; 615 debug_only(has_disp32 = true); // has both kinds of operands! 616 break; 617 618 case 0x83: // addl a, #8; addl r, #8 619 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 620 debug_only(has_disp32 = true); // has both kinds of operands! 621 tail_size = 1; 622 break; 623 624 case 0x9B: 625 switch (0xFF & *ip++) { 626 case 0xD9: // fnstcw a 627 debug_only(has_disp32 = true); 628 break; 629 default: 630 ShouldNotReachHere(); 631 } 632 break; 633 634 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a 635 case REP4(0x10): // adc... 636 case REP4(0x20): // and... 637 case REP4(0x30): // xor... 638 case REP4(0x08): // or... 639 case REP4(0x18): // sbb... 640 case REP4(0x28): // sub... 641 case 0xF7: // mull a 642 case 0x8D: // lea r, a 643 case 0x87: // xchg r, a 644 case REP4(0x38): // cmp... 645 case 0x85: // test r, a 646 debug_only(has_disp32 = true); // has both kinds of operands! 647 break; 648 649 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8 650 case 0xC6: // movb a, #8 651 case 0x80: // cmpb a, #8 652 case 0x6B: // imul r, a, #8 653 debug_only(has_disp32 = true); // has both kinds of operands! 654 tail_size = 1; // the imm8 655 break; 656 657 case 0xC4: // VEX_3bytes 658 case 0xC5: // VEX_2bytes 659 assert((UseAVX > 0), "shouldn't have VEX prefix"); 660 assert(ip == inst+1, "no prefixes allowed"); 661 // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions 662 // but they have prefix 0x0F and processed when 0x0F processed above. 663 // 664 // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES 665 // instructions (these instructions are not supported in 64-bit mode). 666 // To distinguish them bits [7:6] are set in the VEX second byte since 667 // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set 668 // those VEX bits REX and vvvv bits are inverted. 669 // 670 // Fortunately C2 doesn't generate these instructions so we don't need 671 // to check for them in product version. 672 673 // Check second byte 674 NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions")); 675 676 // First byte 677 if ((0xFF & *inst) == VEX_3bytes) { 678 ip++; // third byte 679 is_64bit = ((VEX_W & *ip) == VEX_W); 680 } 681 ip++; // opcode 682 // To find the end of instruction (which == end_pc_operand). 683 switch (0xFF & *ip) { 684 case 0x61: // pcmpestri r, r/a, #8 685 case 0x70: // pshufd r, r/a, #8 686 case 0x73: // psrldq r, #8 687 tail_size = 1; // the imm8 688 break; 689 default: 690 break; 691 } 692 ip++; // skip opcode 693 debug_only(has_disp32 = true); // has both kinds of operands! 694 break; 695 696 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 697 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 698 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a 699 case 0xDD: // fld_d a; fst_d a; fstp_d a 700 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a 701 case 0xDF: // fild_d a; fistp_d a 702 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a 703 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a 704 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a 705 debug_only(has_disp32 = true); 706 break; 707 708 case 0xE8: // call rdisp32 709 case 0xE9: // jmp rdisp32 710 if (which == end_pc_operand) return ip + 4; 711 assert(which == call32_operand, "call has no disp32 or imm"); 712 return ip; 713 714 case 0xF0: // Lock 715 assert(os::is_MP(), "only on MP"); 716 goto again_after_prefix; 717 718 case 0xF3: // For SSE 719 case 0xF2: // For SSE2 720 switch (0xFF & *ip++) { 721 case REX: 722 case REX_B: 723 case REX_X: 724 case REX_XB: 725 case REX_R: 726 case REX_RB: 727 case REX_RX: 728 case REX_RXB: 729 case REX_W: 730 case REX_WB: 731 case REX_WX: 732 case REX_WXB: 733 case REX_WR: 734 case REX_WRB: 735 case REX_WRX: 736 case REX_WRXB: 737 NOT_LP64(assert(false, "found 64bit prefix")); 738 ip++; 739 default: 740 ip++; 741 } 742 debug_only(has_disp32 = true); // has both kinds of operands! 743 break; 744 745 default: 746 ShouldNotReachHere(); 747 748 #undef REP8 749 #undef REP16 750 } 751 752 assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); 753 #ifdef _LP64 754 assert(which != imm_operand, "instruction is not a movq reg, imm64"); 755 #else 756 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field"); 757 assert(which != imm_operand || has_disp32, "instruction has no imm32 field"); 758 #endif // LP64 759 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field"); 760 761 // parse the output of emit_operand 762 int op2 = 0xFF & *ip++; 763 int base = op2 & 0x07; 764 int op3 = -1; 765 const int b100 = 4; 766 const int b101 = 5; 767 if (base == b100 && (op2 >> 6) != 3) { 768 op3 = 0xFF & *ip++; 769 base = op3 & 0x07; // refetch the base 770 } 771 // now ip points at the disp (if any) 772 773 switch (op2 >> 6) { 774 case 0: 775 // [00 reg 100][ss index base] 776 // [00 reg 100][00 100 esp] 777 // [00 reg base] 778 // [00 reg 100][ss index 101][disp32] 779 // [00 reg 101] [disp32] 780 781 if (base == b101) { 782 if (which == disp32_operand) 783 return ip; // caller wants the disp32 784 ip += 4; // skip the disp32 785 } 786 break; 787 788 case 1: 789 // [01 reg 100][ss index base][disp8] 790 // [01 reg 100][00 100 esp][disp8] 791 // [01 reg base] [disp8] 792 ip += 1; // skip the disp8 793 break; 794 795 case 2: 796 // [10 reg 100][ss index base][disp32] 797 // [10 reg 100][00 100 esp][disp32] 798 // [10 reg base] [disp32] 799 if (which == disp32_operand) 800 return ip; // caller wants the disp32 801 ip += 4; // skip the disp32 802 break; 803 804 case 3: 805 // [11 reg base] (not a memory addressing mode) 806 break; 807 } 808 809 if (which == end_pc_operand) { 810 return ip + tail_size; 811 } 812 813 #ifdef _LP64 814 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32"); 815 #else 816 assert(which == imm_operand, "instruction has only an imm field"); 817 #endif // LP64 818 return ip; 819 } 820 821 address Assembler::locate_next_instruction(address inst) { 822 // Secretly share code with locate_operand: 823 return locate_operand(inst, end_pc_operand); 824 } 825 826 827 #ifdef ASSERT 828 void Assembler::check_relocation(RelocationHolder const& rspec, int format) { 829 address inst = inst_mark(); 830 assert(inst != NULL && inst < pc(), "must point to beginning of instruction"); 831 address opnd; 832 833 Relocation* r = rspec.reloc(); 834 if (r->type() == relocInfo::none) { 835 return; 836 } else if (r->is_call() || format == call32_operand) { 837 // assert(format == imm32_operand, "cannot specify a nonzero format"); 838 opnd = locate_operand(inst, call32_operand); 839 } else if (r->is_data()) { 840 assert(format == imm_operand || format == disp32_operand 841 LP64_ONLY(|| format == narrow_oop_operand), "format ok"); 842 opnd = locate_operand(inst, (WhichOperand)format); 843 } else { 844 assert(format == imm_operand, "cannot specify a format"); 845 return; 846 } 847 assert(opnd == pc(), "must put operand where relocs can find it"); 848 } 849 #endif // ASSERT 850 851 void Assembler::emit_operand32(Register reg, Address adr) { 852 assert(reg->encoding() < 8, "no extended registers"); 853 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 854 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 855 adr._rspec); 856 } 857 858 void Assembler::emit_operand(Register reg, Address adr, 859 int rip_relative_correction) { 860 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 861 adr._rspec, 862 rip_relative_correction); 863 } 864 865 void Assembler::emit_operand(XMMRegister reg, Address adr) { 866 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 867 adr._rspec); 868 } 869 870 // MMX operations 871 void Assembler::emit_operand(MMXRegister reg, Address adr) { 872 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 873 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 874 } 875 876 // work around gcc (3.2.1-7a) bug 877 void Assembler::emit_operand(Address adr, MMXRegister reg) { 878 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 879 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 880 } 881 882 883 void Assembler::emit_farith(int b1, int b2, int i) { 884 assert(isByte(b1) && isByte(b2), "wrong opcode"); 885 assert(0 <= i && i < 8, "illegal stack offset"); 886 emit_byte(b1); 887 emit_byte(b2 + i); 888 } 889 890 891 // Now the Assembler instructions (identical for 32/64 bits) 892 893 void Assembler::adcl(Address dst, int32_t imm32) { 894 InstructionMark im(this); 895 prefix(dst); 896 emit_arith_operand(0x81, rdx, dst, imm32); 897 } 898 899 void Assembler::adcl(Address dst, Register src) { 900 InstructionMark im(this); 901 prefix(dst, src); 902 emit_byte(0x11); 903 emit_operand(src, dst); 904 } 905 906 void Assembler::adcl(Register dst, int32_t imm32) { 907 prefix(dst); 908 emit_arith(0x81, 0xD0, dst, imm32); 909 } 910 911 void Assembler::adcl(Register dst, Address src) { 912 InstructionMark im(this); 913 prefix(src, dst); 914 emit_byte(0x13); 915 emit_operand(dst, src); 916 } 917 918 void Assembler::adcl(Register dst, Register src) { 919 (void) prefix_and_encode(dst->encoding(), src->encoding()); 920 emit_arith(0x13, 0xC0, dst, src); 921 } 922 923 void Assembler::addl(Address dst, int32_t imm32) { 924 InstructionMark im(this); 925 prefix(dst); 926 emit_arith_operand(0x81, rax, dst, imm32); 927 } 928 929 void Assembler::addl(Address dst, Register src) { 930 InstructionMark im(this); 931 prefix(dst, src); 932 emit_byte(0x01); 933 emit_operand(src, dst); 934 } 935 936 void Assembler::addl(Register dst, int32_t imm32) { 937 prefix(dst); 938 emit_arith(0x81, 0xC0, dst, imm32); 939 } 940 941 void Assembler::addl(Register dst, Address src) { 942 InstructionMark im(this); 943 prefix(src, dst); 944 emit_byte(0x03); 945 emit_operand(dst, src); 946 } 947 948 void Assembler::addl(Register dst, Register src) { 949 (void) prefix_and_encode(dst->encoding(), src->encoding()); 950 emit_arith(0x03, 0xC0, dst, src); 951 } 952 953 void Assembler::addr_nop_4() { 954 assert(UseAddressNop, "no CPU support"); 955 // 4 bytes: NOP DWORD PTR [EAX+0] 956 emit_byte(0x0F); 957 emit_byte(0x1F); 958 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 959 emit_byte(0); // 8-bits offset (1 byte) 960 } 961 962 void Assembler::addr_nop_5() { 963 assert(UseAddressNop, "no CPU support"); 964 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 965 emit_byte(0x0F); 966 emit_byte(0x1F); 967 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 968 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 969 emit_byte(0); // 8-bits offset (1 byte) 970 } 971 972 void Assembler::addr_nop_7() { 973 assert(UseAddressNop, "no CPU support"); 974 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 975 emit_byte(0x0F); 976 emit_byte(0x1F); 977 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 978 emit_long(0); // 32-bits offset (4 bytes) 979 } 980 981 void Assembler::addr_nop_8() { 982 assert(UseAddressNop, "no CPU support"); 983 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 984 emit_byte(0x0F); 985 emit_byte(0x1F); 986 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 987 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 988 emit_long(0); // 32-bits offset (4 bytes) 989 } 990 991 void Assembler::addsd(XMMRegister dst, XMMRegister src) { 992 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 993 emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); 994 } 995 996 void Assembler::addsd(XMMRegister dst, Address src) { 997 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 998 emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); 999 } 1000 1001 void Assembler::addss(XMMRegister dst, XMMRegister src) { 1002 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1003 emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); 1004 } 1005 1006 void Assembler::addss(XMMRegister dst, Address src) { 1007 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1008 emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); 1009 } 1010 1011 void Assembler::andl(Address dst, int32_t imm32) { 1012 InstructionMark im(this); 1013 prefix(dst); 1014 emit_byte(0x81); 1015 emit_operand(rsp, dst, 4); 1016 emit_long(imm32); 1017 } 1018 1019 void Assembler::andl(Register dst, int32_t imm32) { 1020 prefix(dst); 1021 emit_arith(0x81, 0xE0, dst, imm32); 1022 } 1023 1024 void Assembler::andl(Register dst, Address src) { 1025 InstructionMark im(this); 1026 prefix(src, dst); 1027 emit_byte(0x23); 1028 emit_operand(dst, src); 1029 } 1030 1031 void Assembler::andl(Register dst, Register src) { 1032 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1033 emit_arith(0x23, 0xC0, dst, src); 1034 } 1035 1036 void Assembler::bsfl(Register dst, Register src) { 1037 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1038 emit_byte(0x0F); 1039 emit_byte(0xBC); 1040 emit_byte(0xC0 | encode); 1041 } 1042 1043 void Assembler::bsrl(Register dst, Register src) { 1044 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 1045 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1046 emit_byte(0x0F); 1047 emit_byte(0xBD); 1048 emit_byte(0xC0 | encode); 1049 } 1050 1051 void Assembler::bswapl(Register reg) { // bswap 1052 int encode = prefix_and_encode(reg->encoding()); 1053 emit_byte(0x0F); 1054 emit_byte(0xC8 | encode); 1055 } 1056 1057 void Assembler::call(Label& L, relocInfo::relocType rtype) { 1058 // suspect disp32 is always good 1059 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); 1060 1061 if (L.is_bound()) { 1062 const int long_size = 5; 1063 int offs = (int)( target(L) - pc() ); 1064 assert(offs <= 0, "assembler error"); 1065 InstructionMark im(this); 1066 // 1110 1000 #32-bit disp 1067 emit_byte(0xE8); 1068 emit_data(offs - long_size, rtype, operand); 1069 } else { 1070 InstructionMark im(this); 1071 // 1110 1000 #32-bit disp 1072 L.add_patch_at(code(), locator()); 1073 1074 emit_byte(0xE8); 1075 emit_data(int(0), rtype, operand); 1076 } 1077 } 1078 1079 void Assembler::call(Register dst) { 1080 int encode = prefix_and_encode(dst->encoding()); 1081 emit_byte(0xFF); 1082 emit_byte(0xD0 | encode); 1083 } 1084 1085 1086 void Assembler::call(Address adr) { 1087 InstructionMark im(this); 1088 prefix(adr); 1089 emit_byte(0xFF); 1090 emit_operand(rdx, adr); 1091 } 1092 1093 void Assembler::call_literal(address entry, RelocationHolder const& rspec) { 1094 assert(entry != NULL, "call most probably wrong"); 1095 InstructionMark im(this); 1096 emit_byte(0xE8); 1097 intptr_t disp = entry - (_code_pos + sizeof(int32_t)); 1098 assert(is_simm32(disp), "must be 32bit offset (call2)"); 1099 // Technically, should use call32_operand, but this format is 1100 // implied by the fact that we're emitting a call instruction. 1101 1102 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand); 1103 emit_data((int) disp, rspec, operand); 1104 } 1105 1106 void Assembler::cdql() { 1107 emit_byte(0x99); 1108 } 1109 1110 void Assembler::cmovl(Condition cc, Register dst, Register src) { 1111 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1112 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1113 emit_byte(0x0F); 1114 emit_byte(0x40 | cc); 1115 emit_byte(0xC0 | encode); 1116 } 1117 1118 1119 void Assembler::cmovl(Condition cc, Register dst, Address src) { 1120 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1121 prefix(src, dst); 1122 emit_byte(0x0F); 1123 emit_byte(0x40 | cc); 1124 emit_operand(dst, src); 1125 } 1126 1127 void Assembler::cmpb(Address dst, int imm8) { 1128 InstructionMark im(this); 1129 prefix(dst); 1130 emit_byte(0x80); 1131 emit_operand(rdi, dst, 1); 1132 emit_byte(imm8); 1133 } 1134 1135 void Assembler::cmpl(Address dst, int32_t imm32) { 1136 InstructionMark im(this); 1137 prefix(dst); 1138 emit_byte(0x81); 1139 emit_operand(rdi, dst, 4); 1140 emit_long(imm32); 1141 } 1142 1143 void Assembler::cmpl(Register dst, int32_t imm32) { 1144 prefix(dst); 1145 emit_arith(0x81, 0xF8, dst, imm32); 1146 } 1147 1148 void Assembler::cmpl(Register dst, Register src) { 1149 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1150 emit_arith(0x3B, 0xC0, dst, src); 1151 } 1152 1153 1154 void Assembler::cmpl(Register dst, Address src) { 1155 InstructionMark im(this); 1156 prefix(src, dst); 1157 emit_byte(0x3B); 1158 emit_operand(dst, src); 1159 } 1160 1161 void Assembler::cmpw(Address dst, int imm16) { 1162 InstructionMark im(this); 1163 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers"); 1164 emit_byte(0x66); 1165 emit_byte(0x81); 1166 emit_operand(rdi, dst, 2); 1167 emit_word(imm16); 1168 } 1169 1170 // The 32-bit cmpxchg compares the value at adr with the contents of rax, 1171 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1172 // The ZF is set if the compared values were equal, and cleared otherwise. 1173 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg 1174 if (Atomics & 2) { 1175 // caveat: no instructionmark, so this isn't relocatable. 1176 // Emit a synthetic, non-atomic, CAS equivalent. 1177 // Beware. The synthetic form sets all ICCs, not just ZF. 1178 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r) 1179 cmpl(rax, adr); 1180 movl(rax, adr); 1181 if (reg != rax) { 1182 Label L ; 1183 jcc(Assembler::notEqual, L); 1184 movl(adr, reg); 1185 bind(L); 1186 } 1187 } else { 1188 InstructionMark im(this); 1189 prefix(adr, reg); 1190 emit_byte(0x0F); 1191 emit_byte(0xB1); 1192 emit_operand(reg, adr); 1193 } 1194 } 1195 1196 void Assembler::comisd(XMMRegister dst, Address src) { 1197 // NOTE: dbx seems to decode this as comiss even though the 1198 // 0x66 is there. Strangly ucomisd comes out correct 1199 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1200 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); 1201 } 1202 1203 void Assembler::comisd(XMMRegister dst, XMMRegister src) { 1204 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1205 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); 1206 } 1207 1208 void Assembler::comiss(XMMRegister dst, Address src) { 1209 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1210 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE); 1211 } 1212 1213 void Assembler::comiss(XMMRegister dst, XMMRegister src) { 1214 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1215 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE); 1216 } 1217 1218 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 1219 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1220 emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3); 1221 } 1222 1223 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 1224 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1225 emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE); 1226 } 1227 1228 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 1229 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1230 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); 1231 } 1232 1233 void Assembler::cvtsd2ss(XMMRegister dst, Address src) { 1234 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1235 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); 1236 } 1237 1238 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { 1239 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1240 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1241 emit_byte(0x2A); 1242 emit_byte(0xC0 | encode); 1243 } 1244 1245 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { 1246 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1247 emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2); 1248 } 1249 1250 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { 1251 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1252 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1253 emit_byte(0x2A); 1254 emit_byte(0xC0 | encode); 1255 } 1256 1257 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { 1258 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1259 emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3); 1260 } 1261 1262 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { 1263 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1264 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); 1265 } 1266 1267 void Assembler::cvtss2sd(XMMRegister dst, Address src) { 1268 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1269 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); 1270 } 1271 1272 1273 void Assembler::cvttsd2sil(Register dst, XMMRegister src) { 1274 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1275 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); 1276 emit_byte(0x2C); 1277 emit_byte(0xC0 | encode); 1278 } 1279 1280 void Assembler::cvttss2sil(Register dst, XMMRegister src) { 1281 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1282 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1283 emit_byte(0x2C); 1284 emit_byte(0xC0 | encode); 1285 } 1286 1287 void Assembler::decl(Address dst) { 1288 // Don't use it directly. Use MacroAssembler::decrement() instead. 1289 InstructionMark im(this); 1290 prefix(dst); 1291 emit_byte(0xFF); 1292 emit_operand(rcx, dst); 1293 } 1294 1295 void Assembler::divsd(XMMRegister dst, Address src) { 1296 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1297 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); 1298 } 1299 1300 void Assembler::divsd(XMMRegister dst, XMMRegister src) { 1301 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1302 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); 1303 } 1304 1305 void Assembler::divss(XMMRegister dst, Address src) { 1306 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1307 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); 1308 } 1309 1310 void Assembler::divss(XMMRegister dst, XMMRegister src) { 1311 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1312 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); 1313 } 1314 1315 void Assembler::emms() { 1316 NOT_LP64(assert(VM_Version::supports_mmx(), "")); 1317 emit_byte(0x0F); 1318 emit_byte(0x77); 1319 } 1320 1321 void Assembler::hlt() { 1322 emit_byte(0xF4); 1323 } 1324 1325 void Assembler::idivl(Register src) { 1326 int encode = prefix_and_encode(src->encoding()); 1327 emit_byte(0xF7); 1328 emit_byte(0xF8 | encode); 1329 } 1330 1331 void Assembler::divl(Register src) { // Unsigned 1332 int encode = prefix_and_encode(src->encoding()); 1333 emit_byte(0xF7); 1334 emit_byte(0xF0 | encode); 1335 } 1336 1337 void Assembler::imull(Register dst, Register src) { 1338 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1339 emit_byte(0x0F); 1340 emit_byte(0xAF); 1341 emit_byte(0xC0 | encode); 1342 } 1343 1344 1345 void Assembler::imull(Register dst, Register src, int value) { 1346 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1347 if (is8bit(value)) { 1348 emit_byte(0x6B); 1349 emit_byte(0xC0 | encode); 1350 emit_byte(value & 0xFF); 1351 } else { 1352 emit_byte(0x69); 1353 emit_byte(0xC0 | encode); 1354 emit_long(value); 1355 } 1356 } 1357 1358 void Assembler::incl(Address dst) { 1359 // Don't use it directly. Use MacroAssembler::increment() instead. 1360 InstructionMark im(this); 1361 prefix(dst); 1362 emit_byte(0xFF); 1363 emit_operand(rax, dst); 1364 } 1365 1366 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) { 1367 InstructionMark im(this); 1368 assert((0 <= cc) && (cc < 16), "illegal cc"); 1369 if (L.is_bound()) { 1370 address dst = target(L); 1371 assert(dst != NULL, "jcc most probably wrong"); 1372 1373 const int short_size = 2; 1374 const int long_size = 6; 1375 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos; 1376 if (maybe_short && is8bit(offs - short_size)) { 1377 // 0111 tttn #8-bit disp 1378 emit_byte(0x70 | cc); 1379 emit_byte((offs - short_size) & 0xFF); 1380 } else { 1381 // 0000 1111 1000 tttn #32-bit disp 1382 assert(is_simm32(offs - long_size), 1383 "must be 32bit offset (call4)"); 1384 emit_byte(0x0F); 1385 emit_byte(0x80 | cc); 1386 emit_long(offs - long_size); 1387 } 1388 } else { 1389 // Note: could eliminate cond. jumps to this jump if condition 1390 // is the same however, seems to be rather unlikely case. 1391 // Note: use jccb() if label to be bound is very close to get 1392 // an 8-bit displacement 1393 L.add_patch_at(code(), locator()); 1394 emit_byte(0x0F); 1395 emit_byte(0x80 | cc); 1396 emit_long(0); 1397 } 1398 } 1399 1400 void Assembler::jccb(Condition cc, Label& L) { 1401 if (L.is_bound()) { 1402 const int short_size = 2; 1403 address entry = target(L); 1404 #ifdef ASSERT 1405 intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); 1406 intptr_t delta = short_branch_delta(); 1407 if (delta != 0) { 1408 dist += (dist < 0 ? (-delta) :delta); 1409 } 1410 assert(is8bit(dist), "Dispacement too large for a short jmp"); 1411 #endif 1412 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; 1413 // 0111 tttn #8-bit disp 1414 emit_byte(0x70 | cc); 1415 emit_byte((offs - short_size) & 0xFF); 1416 } else { 1417 InstructionMark im(this); 1418 L.add_patch_at(code(), locator()); 1419 emit_byte(0x70 | cc); 1420 emit_byte(0); 1421 } 1422 } 1423 1424 void Assembler::jmp(Address adr) { 1425 InstructionMark im(this); 1426 prefix(adr); 1427 emit_byte(0xFF); 1428 emit_operand(rsp, adr); 1429 } 1430 1431 void Assembler::jmp(Label& L, bool maybe_short) { 1432 if (L.is_bound()) { 1433 address entry = target(L); 1434 assert(entry != NULL, "jmp most probably wrong"); 1435 InstructionMark im(this); 1436 const int short_size = 2; 1437 const int long_size = 5; 1438 intptr_t offs = entry - _code_pos; 1439 if (maybe_short && is8bit(offs - short_size)) { 1440 emit_byte(0xEB); 1441 emit_byte((offs - short_size) & 0xFF); 1442 } else { 1443 emit_byte(0xE9); 1444 emit_long(offs - long_size); 1445 } 1446 } else { 1447 // By default, forward jumps are always 32-bit displacements, since 1448 // we can't yet know where the label will be bound. If you're sure that 1449 // the forward jump will not run beyond 256 bytes, use jmpb to 1450 // force an 8-bit displacement. 1451 InstructionMark im(this); 1452 L.add_patch_at(code(), locator()); 1453 emit_byte(0xE9); 1454 emit_long(0); 1455 } 1456 } 1457 1458 void Assembler::jmp(Register entry) { 1459 int encode = prefix_and_encode(entry->encoding()); 1460 emit_byte(0xFF); 1461 emit_byte(0xE0 | encode); 1462 } 1463 1464 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { 1465 InstructionMark im(this); 1466 emit_byte(0xE9); 1467 assert(dest != NULL, "must have a target"); 1468 intptr_t disp = dest - (_code_pos + sizeof(int32_t)); 1469 assert(is_simm32(disp), "must be 32bit offset (jmp)"); 1470 emit_data(disp, rspec.reloc(), call32_operand); 1471 } 1472 1473 void Assembler::jmpb(Label& L) { 1474 if (L.is_bound()) { 1475 const int short_size = 2; 1476 address entry = target(L); 1477 assert(entry != NULL, "jmp most probably wrong"); 1478 #ifdef ASSERT 1479 intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); 1480 intptr_t delta = short_branch_delta(); 1481 if (delta != 0) { 1482 dist += (dist < 0 ? (-delta) :delta); 1483 } 1484 assert(is8bit(dist), "Dispacement too large for a short jmp"); 1485 #endif 1486 intptr_t offs = entry - _code_pos; 1487 emit_byte(0xEB); 1488 emit_byte((offs - short_size) & 0xFF); 1489 } else { 1490 InstructionMark im(this); 1491 L.add_patch_at(code(), locator()); 1492 emit_byte(0xEB); 1493 emit_byte(0); 1494 } 1495 } 1496 1497 void Assembler::ldmxcsr( Address src) { 1498 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1499 InstructionMark im(this); 1500 prefix(src); 1501 emit_byte(0x0F); 1502 emit_byte(0xAE); 1503 emit_operand(as_Register(2), src); 1504 } 1505 1506 void Assembler::leal(Register dst, Address src) { 1507 InstructionMark im(this); 1508 #ifdef _LP64 1509 emit_byte(0x67); // addr32 1510 prefix(src, dst); 1511 #endif // LP64 1512 emit_byte(0x8D); 1513 emit_operand(dst, src); 1514 } 1515 1516 void Assembler::lock() { 1517 if (Atomics & 1) { 1518 // Emit either nothing, a NOP, or a NOP: prefix 1519 emit_byte(0x90) ; 1520 } else { 1521 emit_byte(0xF0); 1522 } 1523 } 1524 1525 void Assembler::lzcntl(Register dst, Register src) { 1526 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 1527 emit_byte(0xF3); 1528 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1529 emit_byte(0x0F); 1530 emit_byte(0xBD); 1531 emit_byte(0xC0 | encode); 1532 } 1533 1534 // Emit mfence instruction 1535 void Assembler::mfence() { 1536 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) 1537 emit_byte( 0x0F ); 1538 emit_byte( 0xAE ); 1539 emit_byte( 0xF0 ); 1540 } 1541 1542 void Assembler::mov(Register dst, Register src) { 1543 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 1544 } 1545 1546 void Assembler::movapd(XMMRegister dst, XMMRegister src) { 1547 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1548 emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66); 1549 } 1550 1551 void Assembler::movaps(XMMRegister dst, XMMRegister src) { 1552 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1553 emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE); 1554 } 1555 1556 void Assembler::movlhps(XMMRegister dst, XMMRegister src) { 1557 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1558 int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE); 1559 emit_byte(0x16); 1560 emit_byte(0xC0 | encode); 1561 } 1562 1563 void Assembler::movb(Register dst, Address src) { 1564 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 1565 InstructionMark im(this); 1566 prefix(src, dst, true); 1567 emit_byte(0x8A); 1568 emit_operand(dst, src); 1569 } 1570 1571 1572 void Assembler::movb(Address dst, int imm8) { 1573 InstructionMark im(this); 1574 prefix(dst); 1575 emit_byte(0xC6); 1576 emit_operand(rax, dst, 1); 1577 emit_byte(imm8); 1578 } 1579 1580 1581 void Assembler::movb(Address dst, Register src) { 1582 assert(src->has_byte_register(), "must have byte register"); 1583 InstructionMark im(this); 1584 prefix(dst, src, true); 1585 emit_byte(0x88); 1586 emit_operand(src, dst); 1587 } 1588 1589 void Assembler::movdl(XMMRegister dst, Register src) { 1590 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1591 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1592 emit_byte(0x6E); 1593 emit_byte(0xC0 | encode); 1594 } 1595 1596 void Assembler::movdl(Register dst, XMMRegister src) { 1597 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1598 // swap src/dst to get correct prefix 1599 int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66); 1600 emit_byte(0x7E); 1601 emit_byte(0xC0 | encode); 1602 } 1603 1604 void Assembler::movdl(XMMRegister dst, Address src) { 1605 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1606 InstructionMark im(this); 1607 simd_prefix(dst, src, VEX_SIMD_66); 1608 emit_byte(0x6E); 1609 emit_operand(dst, src); 1610 } 1611 1612 void Assembler::movdl(Address dst, XMMRegister src) { 1613 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1614 InstructionMark im(this); 1615 simd_prefix(dst, src, VEX_SIMD_66); 1616 emit_byte(0x7E); 1617 emit_operand(src, dst); 1618 } 1619 1620 void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 1621 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1622 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66); 1623 } 1624 1625 void Assembler::movdqu(XMMRegister dst, Address src) { 1626 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1627 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); 1628 } 1629 1630 void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 1631 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1632 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); 1633 } 1634 1635 void Assembler::movdqu(Address dst, XMMRegister src) { 1636 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1637 InstructionMark im(this); 1638 simd_prefix(dst, src, VEX_SIMD_F3); 1639 emit_byte(0x7F); 1640 emit_operand(src, dst); 1641 } 1642 1643 // Move Unaligned 256bit Vector 1644 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) { 1645 assert(UseAVX, ""); 1646 bool vector256 = true; 1647 int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256); 1648 emit_byte(0x6F); 1649 emit_byte(0xC0 | encode); 1650 } 1651 1652 void Assembler::vmovdqu(XMMRegister dst, Address src) { 1653 assert(UseAVX, ""); 1654 InstructionMark im(this); 1655 bool vector256 = true; 1656 vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256); 1657 emit_byte(0x6F); 1658 emit_operand(dst, src); 1659 } 1660 1661 void Assembler::vmovdqu(Address dst, XMMRegister src) { 1662 assert(UseAVX, ""); 1663 InstructionMark im(this); 1664 bool vector256 = true; 1665 // swap src<->dst for encoding 1666 assert(src != xnoreg, "sanity"); 1667 vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256); 1668 emit_byte(0x7F); 1669 emit_operand(src, dst); 1670 } 1671 1672 // Uses zero extension on 64bit 1673 1674 void Assembler::movl(Register dst, int32_t imm32) { 1675 int encode = prefix_and_encode(dst->encoding()); 1676 emit_byte(0xB8 | encode); 1677 emit_long(imm32); 1678 } 1679 1680 void Assembler::movl(Register dst, Register src) { 1681 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1682 emit_byte(0x8B); 1683 emit_byte(0xC0 | encode); 1684 } 1685 1686 void Assembler::movl(Register dst, Address src) { 1687 InstructionMark im(this); 1688 prefix(src, dst); 1689 emit_byte(0x8B); 1690 emit_operand(dst, src); 1691 } 1692 1693 void Assembler::movl(Address dst, int32_t imm32) { 1694 InstructionMark im(this); 1695 prefix(dst); 1696 emit_byte(0xC7); 1697 emit_operand(rax, dst, 4); 1698 emit_long(imm32); 1699 } 1700 1701 void Assembler::movl(Address dst, Register src) { 1702 InstructionMark im(this); 1703 prefix(dst, src); 1704 emit_byte(0x89); 1705 emit_operand(src, dst); 1706 } 1707 1708 // New cpus require to use movsd and movss to avoid partial register stall 1709 // when loading from memory. But for old Opteron use movlpd instead of movsd. 1710 // The selection is done in MacroAssembler::movdbl() and movflt(). 1711 void Assembler::movlpd(XMMRegister dst, Address src) { 1712 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1713 emit_simd_arith(0x12, dst, src, VEX_SIMD_66); 1714 } 1715 1716 void Assembler::movq( MMXRegister dst, Address src ) { 1717 assert( VM_Version::supports_mmx(), "" ); 1718 emit_byte(0x0F); 1719 emit_byte(0x6F); 1720 emit_operand(dst, src); 1721 } 1722 1723 void Assembler::movq( Address dst, MMXRegister src ) { 1724 assert( VM_Version::supports_mmx(), "" ); 1725 emit_byte(0x0F); 1726 emit_byte(0x7F); 1727 // workaround gcc (3.2.1-7a) bug 1728 // In that version of gcc with only an emit_operand(MMX, Address) 1729 // gcc will tail jump and try and reverse the parameters completely 1730 // obliterating dst in the process. By having a version available 1731 // that doesn't need to swap the args at the tail jump the bug is 1732 // avoided. 1733 emit_operand(dst, src); 1734 } 1735 1736 void Assembler::movq(XMMRegister dst, Address src) { 1737 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1738 InstructionMark im(this); 1739 simd_prefix(dst, src, VEX_SIMD_F3); 1740 emit_byte(0x7E); 1741 emit_operand(dst, src); 1742 } 1743 1744 void Assembler::movq(Address dst, XMMRegister src) { 1745 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1746 InstructionMark im(this); 1747 simd_prefix(dst, src, VEX_SIMD_66); 1748 emit_byte(0xD6); 1749 emit_operand(src, dst); 1750 } 1751 1752 void Assembler::movsbl(Register dst, Address src) { // movsxb 1753 InstructionMark im(this); 1754 prefix(src, dst); 1755 emit_byte(0x0F); 1756 emit_byte(0xBE); 1757 emit_operand(dst, src); 1758 } 1759 1760 void Assembler::movsbl(Register dst, Register src) { // movsxb 1761 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1762 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1763 emit_byte(0x0F); 1764 emit_byte(0xBE); 1765 emit_byte(0xC0 | encode); 1766 } 1767 1768 void Assembler::movsd(XMMRegister dst, XMMRegister src) { 1769 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1770 emit_simd_arith(0x10, dst, src, VEX_SIMD_F2); 1771 } 1772 1773 void Assembler::movsd(XMMRegister dst, Address src) { 1774 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1775 emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2); 1776 } 1777 1778 void Assembler::movsd(Address dst, XMMRegister src) { 1779 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1780 InstructionMark im(this); 1781 simd_prefix(dst, src, VEX_SIMD_F2); 1782 emit_byte(0x11); 1783 emit_operand(src, dst); 1784 } 1785 1786 void Assembler::movss(XMMRegister dst, XMMRegister src) { 1787 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1788 emit_simd_arith(0x10, dst, src, VEX_SIMD_F3); 1789 } 1790 1791 void Assembler::movss(XMMRegister dst, Address src) { 1792 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1793 emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3); 1794 } 1795 1796 void Assembler::movss(Address dst, XMMRegister src) { 1797 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1798 InstructionMark im(this); 1799 simd_prefix(dst, src, VEX_SIMD_F3); 1800 emit_byte(0x11); 1801 emit_operand(src, dst); 1802 } 1803 1804 void Assembler::movswl(Register dst, Address src) { // movsxw 1805 InstructionMark im(this); 1806 prefix(src, dst); 1807 emit_byte(0x0F); 1808 emit_byte(0xBF); 1809 emit_operand(dst, src); 1810 } 1811 1812 void Assembler::movswl(Register dst, Register src) { // movsxw 1813 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1814 emit_byte(0x0F); 1815 emit_byte(0xBF); 1816 emit_byte(0xC0 | encode); 1817 } 1818 1819 void Assembler::movw(Address dst, int imm16) { 1820 InstructionMark im(this); 1821 1822 emit_byte(0x66); // switch to 16-bit mode 1823 prefix(dst); 1824 emit_byte(0xC7); 1825 emit_operand(rax, dst, 2); 1826 emit_word(imm16); 1827 } 1828 1829 void Assembler::movw(Register dst, Address src) { 1830 InstructionMark im(this); 1831 emit_byte(0x66); 1832 prefix(src, dst); 1833 emit_byte(0x8B); 1834 emit_operand(dst, src); 1835 } 1836 1837 void Assembler::movw(Address dst, Register src) { 1838 InstructionMark im(this); 1839 emit_byte(0x66); 1840 prefix(dst, src); 1841 emit_byte(0x89); 1842 emit_operand(src, dst); 1843 } 1844 1845 void Assembler::movzbl(Register dst, Address src) { // movzxb 1846 InstructionMark im(this); 1847 prefix(src, dst); 1848 emit_byte(0x0F); 1849 emit_byte(0xB6); 1850 emit_operand(dst, src); 1851 } 1852 1853 void Assembler::movzbl(Register dst, Register src) { // movzxb 1854 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1855 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1856 emit_byte(0x0F); 1857 emit_byte(0xB6); 1858 emit_byte(0xC0 | encode); 1859 } 1860 1861 void Assembler::movzwl(Register dst, Address src) { // movzxw 1862 InstructionMark im(this); 1863 prefix(src, dst); 1864 emit_byte(0x0F); 1865 emit_byte(0xB7); 1866 emit_operand(dst, src); 1867 } 1868 1869 void Assembler::movzwl(Register dst, Register src) { // movzxw 1870 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1871 emit_byte(0x0F); 1872 emit_byte(0xB7); 1873 emit_byte(0xC0 | encode); 1874 } 1875 1876 void Assembler::mull(Address src) { 1877 InstructionMark im(this); 1878 prefix(src); 1879 emit_byte(0xF7); 1880 emit_operand(rsp, src); 1881 } 1882 1883 void Assembler::mull(Register src) { 1884 int encode = prefix_and_encode(src->encoding()); 1885 emit_byte(0xF7); 1886 emit_byte(0xE0 | encode); 1887 } 1888 1889 void Assembler::mulsd(XMMRegister dst, Address src) { 1890 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1891 emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); 1892 } 1893 1894 void Assembler::mulsd(XMMRegister dst, XMMRegister src) { 1895 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1896 emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); 1897 } 1898 1899 void Assembler::mulss(XMMRegister dst, Address src) { 1900 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1901 emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); 1902 } 1903 1904 void Assembler::mulss(XMMRegister dst, XMMRegister src) { 1905 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1906 emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); 1907 } 1908 1909 void Assembler::negl(Register dst) { 1910 int encode = prefix_and_encode(dst->encoding()); 1911 emit_byte(0xF7); 1912 emit_byte(0xD8 | encode); 1913 } 1914 1915 void Assembler::nop(int i) { 1916 #ifdef ASSERT 1917 assert(i > 0, " "); 1918 // The fancy nops aren't currently recognized by debuggers making it a 1919 // pain to disassemble code while debugging. If asserts are on clearly 1920 // speed is not an issue so simply use the single byte traditional nop 1921 // to do alignment. 1922 1923 for (; i > 0 ; i--) emit_byte(0x90); 1924 return; 1925 1926 #endif // ASSERT 1927 1928 if (UseAddressNop && VM_Version::is_intel()) { 1929 // 1930 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 1931 // 1: 0x90 1932 // 2: 0x66 0x90 1933 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 1934 // 4: 0x0F 0x1F 0x40 0x00 1935 // 5: 0x0F 0x1F 0x44 0x00 0x00 1936 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 1937 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1938 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1939 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1940 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1941 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1942 1943 // The rest coding is Intel specific - don't use consecutive address nops 1944 1945 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1946 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1947 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1948 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1949 1950 while(i >= 15) { 1951 // For Intel don't generate consecutive addess nops (mix with regular nops) 1952 i -= 15; 1953 emit_byte(0x66); // size prefix 1954 emit_byte(0x66); // size prefix 1955 emit_byte(0x66); // size prefix 1956 addr_nop_8(); 1957 emit_byte(0x66); // size prefix 1958 emit_byte(0x66); // size prefix 1959 emit_byte(0x66); // size prefix 1960 emit_byte(0x90); // nop 1961 } 1962 switch (i) { 1963 case 14: 1964 emit_byte(0x66); // size prefix 1965 case 13: 1966 emit_byte(0x66); // size prefix 1967 case 12: 1968 addr_nop_8(); 1969 emit_byte(0x66); // size prefix 1970 emit_byte(0x66); // size prefix 1971 emit_byte(0x66); // size prefix 1972 emit_byte(0x90); // nop 1973 break; 1974 case 11: 1975 emit_byte(0x66); // size prefix 1976 case 10: 1977 emit_byte(0x66); // size prefix 1978 case 9: 1979 emit_byte(0x66); // size prefix 1980 case 8: 1981 addr_nop_8(); 1982 break; 1983 case 7: 1984 addr_nop_7(); 1985 break; 1986 case 6: 1987 emit_byte(0x66); // size prefix 1988 case 5: 1989 addr_nop_5(); 1990 break; 1991 case 4: 1992 addr_nop_4(); 1993 break; 1994 case 3: 1995 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 1996 emit_byte(0x66); // size prefix 1997 case 2: 1998 emit_byte(0x66); // size prefix 1999 case 1: 2000 emit_byte(0x90); // nop 2001 break; 2002 default: 2003 assert(i == 0, " "); 2004 } 2005 return; 2006 } 2007 if (UseAddressNop && VM_Version::is_amd()) { 2008 // 2009 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD. 2010 // 1: 0x90 2011 // 2: 0x66 0x90 2012 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2013 // 4: 0x0F 0x1F 0x40 0x00 2014 // 5: 0x0F 0x1F 0x44 0x00 0x00 2015 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2016 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2017 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2018 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2019 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2020 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2021 2022 // The rest coding is AMD specific - use consecutive address nops 2023 2024 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2025 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2026 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2027 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2028 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2029 // Size prefixes (0x66) are added for larger sizes 2030 2031 while(i >= 22) { 2032 i -= 11; 2033 emit_byte(0x66); // size prefix 2034 emit_byte(0x66); // size prefix 2035 emit_byte(0x66); // size prefix 2036 addr_nop_8(); 2037 } 2038 // Generate first nop for size between 21-12 2039 switch (i) { 2040 case 21: 2041 i -= 1; 2042 emit_byte(0x66); // size prefix 2043 case 20: 2044 case 19: 2045 i -= 1; 2046 emit_byte(0x66); // size prefix 2047 case 18: 2048 case 17: 2049 i -= 1; 2050 emit_byte(0x66); // size prefix 2051 case 16: 2052 case 15: 2053 i -= 8; 2054 addr_nop_8(); 2055 break; 2056 case 14: 2057 case 13: 2058 i -= 7; 2059 addr_nop_7(); 2060 break; 2061 case 12: 2062 i -= 6; 2063 emit_byte(0x66); // size prefix 2064 addr_nop_5(); 2065 break; 2066 default: 2067 assert(i < 12, " "); 2068 } 2069 2070 // Generate second nop for size between 11-1 2071 switch (i) { 2072 case 11: 2073 emit_byte(0x66); // size prefix 2074 case 10: 2075 emit_byte(0x66); // size prefix 2076 case 9: 2077 emit_byte(0x66); // size prefix 2078 case 8: 2079 addr_nop_8(); 2080 break; 2081 case 7: 2082 addr_nop_7(); 2083 break; 2084 case 6: 2085 emit_byte(0x66); // size prefix 2086 case 5: 2087 addr_nop_5(); 2088 break; 2089 case 4: 2090 addr_nop_4(); 2091 break; 2092 case 3: 2093 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2094 emit_byte(0x66); // size prefix 2095 case 2: 2096 emit_byte(0x66); // size prefix 2097 case 1: 2098 emit_byte(0x90); // nop 2099 break; 2100 default: 2101 assert(i == 0, " "); 2102 } 2103 return; 2104 } 2105 2106 // Using nops with size prefixes "0x66 0x90". 2107 // From AMD Optimization Guide: 2108 // 1: 0x90 2109 // 2: 0x66 0x90 2110 // 3: 0x66 0x66 0x90 2111 // 4: 0x66 0x66 0x66 0x90 2112 // 5: 0x66 0x66 0x90 0x66 0x90 2113 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2114 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2115 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2116 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2117 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2118 // 2119 while(i > 12) { 2120 i -= 4; 2121 emit_byte(0x66); // size prefix 2122 emit_byte(0x66); 2123 emit_byte(0x66); 2124 emit_byte(0x90); // nop 2125 } 2126 // 1 - 12 nops 2127 if(i > 8) { 2128 if(i > 9) { 2129 i -= 1; 2130 emit_byte(0x66); 2131 } 2132 i -= 3; 2133 emit_byte(0x66); 2134 emit_byte(0x66); 2135 emit_byte(0x90); 2136 } 2137 // 1 - 8 nops 2138 if(i > 4) { 2139 if(i > 6) { 2140 i -= 1; 2141 emit_byte(0x66); 2142 } 2143 i -= 3; 2144 emit_byte(0x66); 2145 emit_byte(0x66); 2146 emit_byte(0x90); 2147 } 2148 switch (i) { 2149 case 4: 2150 emit_byte(0x66); 2151 case 3: 2152 emit_byte(0x66); 2153 case 2: 2154 emit_byte(0x66); 2155 case 1: 2156 emit_byte(0x90); 2157 break; 2158 default: 2159 assert(i == 0, " "); 2160 } 2161 } 2162 2163 void Assembler::notl(Register dst) { 2164 int encode = prefix_and_encode(dst->encoding()); 2165 emit_byte(0xF7); 2166 emit_byte(0xD0 | encode ); 2167 } 2168 2169 void Assembler::orl(Address dst, int32_t imm32) { 2170 InstructionMark im(this); 2171 prefix(dst); 2172 emit_arith_operand(0x81, rcx, dst, imm32); 2173 } 2174 2175 void Assembler::orl(Register dst, int32_t imm32) { 2176 prefix(dst); 2177 emit_arith(0x81, 0xC8, dst, imm32); 2178 } 2179 2180 void Assembler::orl(Register dst, Address src) { 2181 InstructionMark im(this); 2182 prefix(src, dst); 2183 emit_byte(0x0B); 2184 emit_operand(dst, src); 2185 } 2186 2187 void Assembler::orl(Register dst, Register src) { 2188 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2189 emit_arith(0x0B, 0xC0, dst, src); 2190 } 2191 2192 void Assembler::packuswb(XMMRegister dst, Address src) { 2193 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2194 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2195 emit_simd_arith(0x67, dst, src, VEX_SIMD_66); 2196 } 2197 2198 void Assembler::packuswb(XMMRegister dst, XMMRegister src) { 2199 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2200 emit_simd_arith(0x67, dst, src, VEX_SIMD_66); 2201 } 2202 2203 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { 2204 assert(VM_Version::supports_sse4_2(), ""); 2205 InstructionMark im(this); 2206 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2207 emit_byte(0x61); 2208 emit_operand(dst, src); 2209 emit_byte(imm8); 2210 } 2211 2212 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { 2213 assert(VM_Version::supports_sse4_2(), ""); 2214 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2215 emit_byte(0x61); 2216 emit_byte(0xC0 | encode); 2217 emit_byte(imm8); 2218 } 2219 2220 void Assembler::pmovzxbw(XMMRegister dst, Address src) { 2221 assert(VM_Version::supports_sse4_1(), ""); 2222 InstructionMark im(this); 2223 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2224 emit_byte(0x30); 2225 emit_operand(dst, src); 2226 } 2227 2228 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { 2229 assert(VM_Version::supports_sse4_1(), ""); 2230 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2231 emit_byte(0x30); 2232 emit_byte(0xC0 | encode); 2233 } 2234 2235 // generic 2236 void Assembler::pop(Register dst) { 2237 int encode = prefix_and_encode(dst->encoding()); 2238 emit_byte(0x58 | encode); 2239 } 2240 2241 void Assembler::popcntl(Register dst, Address src) { 2242 assert(VM_Version::supports_popcnt(), "must support"); 2243 InstructionMark im(this); 2244 emit_byte(0xF3); 2245 prefix(src, dst); 2246 emit_byte(0x0F); 2247 emit_byte(0xB8); 2248 emit_operand(dst, src); 2249 } 2250 2251 void Assembler::popcntl(Register dst, Register src) { 2252 assert(VM_Version::supports_popcnt(), "must support"); 2253 emit_byte(0xF3); 2254 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2255 emit_byte(0x0F); 2256 emit_byte(0xB8); 2257 emit_byte(0xC0 | encode); 2258 } 2259 2260 void Assembler::popf() { 2261 emit_byte(0x9D); 2262 } 2263 2264 #ifndef _LP64 // no 32bit push/pop on amd64 2265 void Assembler::popl(Address dst) { 2266 // NOTE: this will adjust stack by 8byte on 64bits 2267 InstructionMark im(this); 2268 prefix(dst); 2269 emit_byte(0x8F); 2270 emit_operand(rax, dst); 2271 } 2272 #endif 2273 2274 void Assembler::prefetch_prefix(Address src) { 2275 prefix(src); 2276 emit_byte(0x0F); 2277 } 2278 2279 void Assembler::prefetchnta(Address src) { 2280 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2281 InstructionMark im(this); 2282 prefetch_prefix(src); 2283 emit_byte(0x18); 2284 emit_operand(rax, src); // 0, src 2285 } 2286 2287 void Assembler::prefetchr(Address src) { 2288 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2289 InstructionMark im(this); 2290 prefetch_prefix(src); 2291 emit_byte(0x0D); 2292 emit_operand(rax, src); // 0, src 2293 } 2294 2295 void Assembler::prefetcht0(Address src) { 2296 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2297 InstructionMark im(this); 2298 prefetch_prefix(src); 2299 emit_byte(0x18); 2300 emit_operand(rcx, src); // 1, src 2301 } 2302 2303 void Assembler::prefetcht1(Address src) { 2304 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2305 InstructionMark im(this); 2306 prefetch_prefix(src); 2307 emit_byte(0x18); 2308 emit_operand(rdx, src); // 2, src 2309 } 2310 2311 void Assembler::prefetcht2(Address src) { 2312 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2313 InstructionMark im(this); 2314 prefetch_prefix(src); 2315 emit_byte(0x18); 2316 emit_operand(rbx, src); // 3, src 2317 } 2318 2319 void Assembler::prefetchw(Address src) { 2320 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2321 InstructionMark im(this); 2322 prefetch_prefix(src); 2323 emit_byte(0x0D); 2324 emit_operand(rcx, src); // 1, src 2325 } 2326 2327 void Assembler::prefix(Prefix p) { 2328 a_byte(p); 2329 } 2330 2331 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 2332 assert(isByte(mode), "invalid value"); 2333 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2334 emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66); 2335 emit_byte(mode & 0xFF); 2336 2337 } 2338 2339 void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 2340 assert(isByte(mode), "invalid value"); 2341 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2342 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2343 InstructionMark im(this); 2344 simd_prefix(dst, src, VEX_SIMD_66); 2345 emit_byte(0x70); 2346 emit_operand(dst, src); 2347 emit_byte(mode & 0xFF); 2348 } 2349 2350 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 2351 assert(isByte(mode), "invalid value"); 2352 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2353 emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2); 2354 emit_byte(mode & 0xFF); 2355 } 2356 2357 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 2358 assert(isByte(mode), "invalid value"); 2359 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2360 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2361 InstructionMark im(this); 2362 simd_prefix(dst, src, VEX_SIMD_F2); 2363 emit_byte(0x70); 2364 emit_operand(dst, src); 2365 emit_byte(mode & 0xFF); 2366 } 2367 2368 void Assembler::psrldq(XMMRegister dst, int shift) { 2369 // Shift 128 bit value in xmm register by number of bytes. 2370 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2371 int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66); 2372 emit_byte(0x73); 2373 emit_byte(0xC0 | encode); 2374 emit_byte(shift); 2375 } 2376 2377 void Assembler::ptest(XMMRegister dst, Address src) { 2378 assert(VM_Version::supports_sse4_1(), ""); 2379 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2380 InstructionMark im(this); 2381 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2382 emit_byte(0x17); 2383 emit_operand(dst, src); 2384 } 2385 2386 void Assembler::ptest(XMMRegister dst, XMMRegister src) { 2387 assert(VM_Version::supports_sse4_1(), ""); 2388 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2389 emit_byte(0x17); 2390 emit_byte(0xC0 | encode); 2391 } 2392 2393 void Assembler::punpcklbw(XMMRegister dst, Address src) { 2394 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2395 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2396 emit_simd_arith(0x60, dst, src, VEX_SIMD_66); 2397 } 2398 2399 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 2400 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2401 emit_simd_arith(0x60, dst, src, VEX_SIMD_66); 2402 } 2403 2404 void Assembler::punpckldq(XMMRegister dst, Address src) { 2405 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2406 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2407 emit_simd_arith(0x62, dst, src, VEX_SIMD_66); 2408 } 2409 2410 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { 2411 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2412 emit_simd_arith(0x62, dst, src, VEX_SIMD_66); 2413 } 2414 2415 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) { 2416 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2417 emit_simd_arith(0x6C, dst, src, VEX_SIMD_66); 2418 } 2419 2420 void Assembler::push(int32_t imm32) { 2421 // in 64bits we push 64bits onto the stack but only 2422 // take a 32bit immediate 2423 emit_byte(0x68); 2424 emit_long(imm32); 2425 } 2426 2427 void Assembler::push(Register src) { 2428 int encode = prefix_and_encode(src->encoding()); 2429 2430 emit_byte(0x50 | encode); 2431 } 2432 2433 void Assembler::pushf() { 2434 emit_byte(0x9C); 2435 } 2436 2437 #ifndef _LP64 // no 32bit push/pop on amd64 2438 void Assembler::pushl(Address src) { 2439 // Note this will push 64bit on 64bit 2440 InstructionMark im(this); 2441 prefix(src); 2442 emit_byte(0xFF); 2443 emit_operand(rsi, src); 2444 } 2445 #endif 2446 2447 void Assembler::rcll(Register dst, int imm8) { 2448 assert(isShiftCount(imm8), "illegal shift count"); 2449 int encode = prefix_and_encode(dst->encoding()); 2450 if (imm8 == 1) { 2451 emit_byte(0xD1); 2452 emit_byte(0xD0 | encode); 2453 } else { 2454 emit_byte(0xC1); 2455 emit_byte(0xD0 | encode); 2456 emit_byte(imm8); 2457 } 2458 } 2459 2460 // copies data from [esi] to [edi] using rcx pointer sized words 2461 // generic 2462 void Assembler::rep_mov() { 2463 emit_byte(0xF3); 2464 // MOVSQ 2465 LP64_ONLY(prefix(REX_W)); 2466 emit_byte(0xA5); 2467 } 2468 2469 // sets rcx pointer sized words with rax, value at [edi] 2470 // generic 2471 void Assembler::rep_set() { // rep_set 2472 emit_byte(0xF3); 2473 // STOSQ 2474 LP64_ONLY(prefix(REX_W)); 2475 emit_byte(0xAB); 2476 } 2477 2478 // scans rcx pointer sized words at [edi] for occurance of rax, 2479 // generic 2480 void Assembler::repne_scan() { // repne_scan 2481 emit_byte(0xF2); 2482 // SCASQ 2483 LP64_ONLY(prefix(REX_W)); 2484 emit_byte(0xAF); 2485 } 2486 2487 #ifdef _LP64 2488 // scans rcx 4 byte words at [edi] for occurance of rax, 2489 // generic 2490 void Assembler::repne_scanl() { // repne_scan 2491 emit_byte(0xF2); 2492 // SCASL 2493 emit_byte(0xAF); 2494 } 2495 #endif 2496 2497 void Assembler::ret(int imm16) { 2498 if (imm16 == 0) { 2499 emit_byte(0xC3); 2500 } else { 2501 emit_byte(0xC2); 2502 emit_word(imm16); 2503 } 2504 } 2505 2506 void Assembler::sahf() { 2507 #ifdef _LP64 2508 // Not supported in 64bit mode 2509 ShouldNotReachHere(); 2510 #endif 2511 emit_byte(0x9E); 2512 } 2513 2514 void Assembler::sarl(Register dst, int imm8) { 2515 int encode = prefix_and_encode(dst->encoding()); 2516 assert(isShiftCount(imm8), "illegal shift count"); 2517 if (imm8 == 1) { 2518 emit_byte(0xD1); 2519 emit_byte(0xF8 | encode); 2520 } else { 2521 emit_byte(0xC1); 2522 emit_byte(0xF8 | encode); 2523 emit_byte(imm8); 2524 } 2525 } 2526 2527 void Assembler::sarl(Register dst) { 2528 int encode = prefix_and_encode(dst->encoding()); 2529 emit_byte(0xD3); 2530 emit_byte(0xF8 | encode); 2531 } 2532 2533 void Assembler::sbbl(Address dst, int32_t imm32) { 2534 InstructionMark im(this); 2535 prefix(dst); 2536 emit_arith_operand(0x81, rbx, dst, imm32); 2537 } 2538 2539 void Assembler::sbbl(Register dst, int32_t imm32) { 2540 prefix(dst); 2541 emit_arith(0x81, 0xD8, dst, imm32); 2542 } 2543 2544 2545 void Assembler::sbbl(Register dst, Address src) { 2546 InstructionMark im(this); 2547 prefix(src, dst); 2548 emit_byte(0x1B); 2549 emit_operand(dst, src); 2550 } 2551 2552 void Assembler::sbbl(Register dst, Register src) { 2553 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2554 emit_arith(0x1B, 0xC0, dst, src); 2555 } 2556 2557 void Assembler::setb(Condition cc, Register dst) { 2558 assert(0 <= cc && cc < 16, "illegal cc"); 2559 int encode = prefix_and_encode(dst->encoding(), true); 2560 emit_byte(0x0F); 2561 emit_byte(0x90 | cc); 2562 emit_byte(0xC0 | encode); 2563 } 2564 2565 void Assembler::shll(Register dst, int imm8) { 2566 assert(isShiftCount(imm8), "illegal shift count"); 2567 int encode = prefix_and_encode(dst->encoding()); 2568 if (imm8 == 1 ) { 2569 emit_byte(0xD1); 2570 emit_byte(0xE0 | encode); 2571 } else { 2572 emit_byte(0xC1); 2573 emit_byte(0xE0 | encode); 2574 emit_byte(imm8); 2575 } 2576 } 2577 2578 void Assembler::shll(Register dst) { 2579 int encode = prefix_and_encode(dst->encoding()); 2580 emit_byte(0xD3); 2581 emit_byte(0xE0 | encode); 2582 } 2583 2584 void Assembler::shrl(Register dst, int imm8) { 2585 assert(isShiftCount(imm8), "illegal shift count"); 2586 int encode = prefix_and_encode(dst->encoding()); 2587 emit_byte(0xC1); 2588 emit_byte(0xE8 | encode); 2589 emit_byte(imm8); 2590 } 2591 2592 void Assembler::shrl(Register dst) { 2593 int encode = prefix_and_encode(dst->encoding()); 2594 emit_byte(0xD3); 2595 emit_byte(0xE8 | encode); 2596 } 2597 2598 // copies a single word from [esi] to [edi] 2599 void Assembler::smovl() { 2600 emit_byte(0xA5); 2601 } 2602 2603 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { 2604 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2605 emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); 2606 } 2607 2608 void Assembler::sqrtsd(XMMRegister dst, Address src) { 2609 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2610 emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); 2611 } 2612 2613 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { 2614 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2615 emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); 2616 } 2617 2618 void Assembler::sqrtss(XMMRegister dst, Address src) { 2619 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2620 emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); 2621 } 2622 2623 void Assembler::stmxcsr( Address dst) { 2624 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2625 InstructionMark im(this); 2626 prefix(dst); 2627 emit_byte(0x0F); 2628 emit_byte(0xAE); 2629 emit_operand(as_Register(3), dst); 2630 } 2631 2632 void Assembler::subl(Address dst, int32_t imm32) { 2633 InstructionMark im(this); 2634 prefix(dst); 2635 emit_arith_operand(0x81, rbp, dst, imm32); 2636 } 2637 2638 void Assembler::subl(Address dst, Register src) { 2639 InstructionMark im(this); 2640 prefix(dst, src); 2641 emit_byte(0x29); 2642 emit_operand(src, dst); 2643 } 2644 2645 void Assembler::subl(Register dst, int32_t imm32) { 2646 prefix(dst); 2647 emit_arith(0x81, 0xE8, dst, imm32); 2648 } 2649 2650 // Force generation of a 4 byte immediate value even if it fits into 8bit 2651 void Assembler::subl_imm32(Register dst, int32_t imm32) { 2652 prefix(dst); 2653 emit_arith_imm32(0x81, 0xE8, dst, imm32); 2654 } 2655 2656 void Assembler::subl(Register dst, Address src) { 2657 InstructionMark im(this); 2658 prefix(src, dst); 2659 emit_byte(0x2B); 2660 emit_operand(dst, src); 2661 } 2662 2663 void Assembler::subl(Register dst, Register src) { 2664 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2665 emit_arith(0x2B, 0xC0, dst, src); 2666 } 2667 2668 void Assembler::subsd(XMMRegister dst, XMMRegister src) { 2669 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2670 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); 2671 } 2672 2673 void Assembler::subsd(XMMRegister dst, Address src) { 2674 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2675 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); 2676 } 2677 2678 void Assembler::subss(XMMRegister dst, XMMRegister src) { 2679 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2680 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); 2681 } 2682 2683 void Assembler::subss(XMMRegister dst, Address src) { 2684 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2685 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); 2686 } 2687 2688 void Assembler::testb(Register dst, int imm8) { 2689 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 2690 (void) prefix_and_encode(dst->encoding(), true); 2691 emit_arith_b(0xF6, 0xC0, dst, imm8); 2692 } 2693 2694 void Assembler::testl(Register dst, int32_t imm32) { 2695 // not using emit_arith because test 2696 // doesn't support sign-extension of 2697 // 8bit operands 2698 int encode = dst->encoding(); 2699 if (encode == 0) { 2700 emit_byte(0xA9); 2701 } else { 2702 encode = prefix_and_encode(encode); 2703 emit_byte(0xF7); 2704 emit_byte(0xC0 | encode); 2705 } 2706 emit_long(imm32); 2707 } 2708 2709 void Assembler::testl(Register dst, Register src) { 2710 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2711 emit_arith(0x85, 0xC0, dst, src); 2712 } 2713 2714 void Assembler::testl(Register dst, Address src) { 2715 InstructionMark im(this); 2716 prefix(src, dst); 2717 emit_byte(0x85); 2718 emit_operand(dst, src); 2719 } 2720 2721 void Assembler::ucomisd(XMMRegister dst, Address src) { 2722 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2723 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); 2724 } 2725 2726 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { 2727 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2728 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); 2729 } 2730 2731 void Assembler::ucomiss(XMMRegister dst, Address src) { 2732 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2733 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE); 2734 } 2735 2736 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { 2737 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2738 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE); 2739 } 2740 2741 2742 void Assembler::xaddl(Address dst, Register src) { 2743 InstructionMark im(this); 2744 prefix(dst, src); 2745 emit_byte(0x0F); 2746 emit_byte(0xC1); 2747 emit_operand(src, dst); 2748 } 2749 2750 void Assembler::xchgl(Register dst, Address src) { // xchg 2751 InstructionMark im(this); 2752 prefix(src, dst); 2753 emit_byte(0x87); 2754 emit_operand(dst, src); 2755 } 2756 2757 void Assembler::xchgl(Register dst, Register src) { 2758 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2759 emit_byte(0x87); 2760 emit_byte(0xc0 | encode); 2761 } 2762 2763 void Assembler::xorl(Register dst, int32_t imm32) { 2764 prefix(dst); 2765 emit_arith(0x81, 0xF0, dst, imm32); 2766 } 2767 2768 void Assembler::xorl(Register dst, Address src) { 2769 InstructionMark im(this); 2770 prefix(src, dst); 2771 emit_byte(0x33); 2772 emit_operand(dst, src); 2773 } 2774 2775 void Assembler::xorl(Register dst, Register src) { 2776 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2777 emit_arith(0x33, 0xC0, dst, src); 2778 } 2779 2780 2781 // AVX 3-operands scalar float-point arithmetic instructions 2782 2783 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { 2784 assert(VM_Version::supports_avx(), ""); 2785 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2786 } 2787 2788 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2789 assert(VM_Version::supports_avx(), ""); 2790 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2791 } 2792 2793 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { 2794 assert(VM_Version::supports_avx(), ""); 2795 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2796 } 2797 2798 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2799 assert(VM_Version::supports_avx(), ""); 2800 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2801 } 2802 2803 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { 2804 assert(VM_Version::supports_avx(), ""); 2805 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2806 } 2807 2808 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2809 assert(VM_Version::supports_avx(), ""); 2810 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2811 } 2812 2813 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { 2814 assert(VM_Version::supports_avx(), ""); 2815 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2816 } 2817 2818 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2819 assert(VM_Version::supports_avx(), ""); 2820 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2821 } 2822 2823 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { 2824 assert(VM_Version::supports_avx(), ""); 2825 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2826 } 2827 2828 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2829 assert(VM_Version::supports_avx(), ""); 2830 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2831 } 2832 2833 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { 2834 assert(VM_Version::supports_avx(), ""); 2835 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2836 } 2837 2838 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2839 assert(VM_Version::supports_avx(), ""); 2840 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2841 } 2842 2843 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { 2844 assert(VM_Version::supports_avx(), ""); 2845 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2846 } 2847 2848 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2849 assert(VM_Version::supports_avx(), ""); 2850 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2851 } 2852 2853 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { 2854 assert(VM_Version::supports_avx(), ""); 2855 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2856 } 2857 2858 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2859 assert(VM_Version::supports_avx(), ""); 2860 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2861 } 2862 2863 //====================VECTOR ARITHMETIC===================================== 2864 2865 // Float-point vector arithmetic 2866 2867 void Assembler::addpd(XMMRegister dst, XMMRegister src) { 2868 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2869 emit_simd_arith(0x58, dst, src, VEX_SIMD_66); 2870 } 2871 2872 void Assembler::addps(XMMRegister dst, XMMRegister src) { 2873 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2874 emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE); 2875 } 2876 2877 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2878 assert(VM_Version::supports_avx(), ""); 2879 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256); 2880 } 2881 2882 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2883 assert(VM_Version::supports_avx(), ""); 2884 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256); 2885 } 2886 2887 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2888 assert(VM_Version::supports_avx(), ""); 2889 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256); 2890 } 2891 2892 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2893 assert(VM_Version::supports_avx(), ""); 2894 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256); 2895 } 2896 2897 void Assembler::subpd(XMMRegister dst, XMMRegister src) { 2898 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2899 emit_simd_arith(0x5C, dst, src, VEX_SIMD_66); 2900 } 2901 2902 void Assembler::subps(XMMRegister dst, XMMRegister src) { 2903 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2904 emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE); 2905 } 2906 2907 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2908 assert(VM_Version::supports_avx(), ""); 2909 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256); 2910 } 2911 2912 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2913 assert(VM_Version::supports_avx(), ""); 2914 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256); 2915 } 2916 2917 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2918 assert(VM_Version::supports_avx(), ""); 2919 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256); 2920 } 2921 2922 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2923 assert(VM_Version::supports_avx(), ""); 2924 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256); 2925 } 2926 2927 void Assembler::mulpd(XMMRegister dst, XMMRegister src) { 2928 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2929 emit_simd_arith(0x59, dst, src, VEX_SIMD_66); 2930 } 2931 2932 void Assembler::mulps(XMMRegister dst, XMMRegister src) { 2933 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2934 emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE); 2935 } 2936 2937 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2938 assert(VM_Version::supports_avx(), ""); 2939 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256); 2940 } 2941 2942 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2943 assert(VM_Version::supports_avx(), ""); 2944 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256); 2945 } 2946 2947 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2948 assert(VM_Version::supports_avx(), ""); 2949 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256); 2950 } 2951 2952 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2953 assert(VM_Version::supports_avx(), ""); 2954 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256); 2955 } 2956 2957 void Assembler::divpd(XMMRegister dst, XMMRegister src) { 2958 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2959 emit_simd_arith(0x5E, dst, src, VEX_SIMD_66); 2960 } 2961 2962 void Assembler::divps(XMMRegister dst, XMMRegister src) { 2963 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2964 emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE); 2965 } 2966 2967 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2968 assert(VM_Version::supports_avx(), ""); 2969 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256); 2970 } 2971 2972 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2973 assert(VM_Version::supports_avx(), ""); 2974 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256); 2975 } 2976 2977 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2978 assert(VM_Version::supports_avx(), ""); 2979 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256); 2980 } 2981 2982 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2983 assert(VM_Version::supports_avx(), ""); 2984 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256); 2985 } 2986 2987 void Assembler::andpd(XMMRegister dst, XMMRegister src) { 2988 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2989 emit_simd_arith(0x54, dst, src, VEX_SIMD_66); 2990 } 2991 2992 void Assembler::andps(XMMRegister dst, XMMRegister src) { 2993 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2994 emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE); 2995 } 2996 2997 void Assembler::andps(XMMRegister dst, Address src) { 2998 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2999 emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE); 3000 } 3001 3002 void Assembler::andpd(XMMRegister dst, Address src) { 3003 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3004 emit_simd_arith(0x54, dst, src, VEX_SIMD_66); 3005 } 3006 3007 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3008 assert(VM_Version::supports_avx(), ""); 3009 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256); 3010 } 3011 3012 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3013 assert(VM_Version::supports_avx(), ""); 3014 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256); 3015 } 3016 3017 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3018 assert(VM_Version::supports_avx(), ""); 3019 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256); 3020 } 3021 3022 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3023 assert(VM_Version::supports_avx(), ""); 3024 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256); 3025 } 3026 3027 void Assembler::xorpd(XMMRegister dst, XMMRegister src) { 3028 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3029 emit_simd_arith(0x57, dst, src, VEX_SIMD_66); 3030 } 3031 3032 void Assembler::xorps(XMMRegister dst, XMMRegister src) { 3033 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3034 emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE); 3035 } 3036 3037 void Assembler::xorpd(XMMRegister dst, Address src) { 3038 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3039 emit_simd_arith(0x57, dst, src, VEX_SIMD_66); 3040 } 3041 3042 void Assembler::xorps(XMMRegister dst, Address src) { 3043 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3044 emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE); 3045 } 3046 3047 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3048 assert(VM_Version::supports_avx(), ""); 3049 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256); 3050 } 3051 3052 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3053 assert(VM_Version::supports_avx(), ""); 3054 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256); 3055 } 3056 3057 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3058 assert(VM_Version::supports_avx(), ""); 3059 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256); 3060 } 3061 3062 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3063 assert(VM_Version::supports_avx(), ""); 3064 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256); 3065 } 3066 3067 3068 // Integer vector arithmetic 3069 void Assembler::paddb(XMMRegister dst, XMMRegister src) { 3070 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3071 emit_simd_arith(0xFC, dst, src, VEX_SIMD_66); 3072 } 3073 3074 void Assembler::paddw(XMMRegister dst, XMMRegister src) { 3075 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3076 emit_simd_arith(0xFD, dst, src, VEX_SIMD_66); 3077 } 3078 3079 void Assembler::paddd(XMMRegister dst, XMMRegister src) { 3080 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3081 emit_simd_arith(0xFE, dst, src, VEX_SIMD_66); 3082 } 3083 3084 void Assembler::paddq(XMMRegister dst, XMMRegister src) { 3085 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3086 emit_simd_arith(0xD4, dst, src, VEX_SIMD_66); 3087 } 3088 3089 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3090 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3091 emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256); 3092 } 3093 3094 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3095 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3096 emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256); 3097 } 3098 3099 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3100 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3101 emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256); 3102 } 3103 3104 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3105 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3106 emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256); 3107 } 3108 3109 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3110 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3111 emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256); 3112 } 3113 3114 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3115 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3116 emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256); 3117 } 3118 3119 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3120 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3121 emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256); 3122 } 3123 3124 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3125 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3126 emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256); 3127 } 3128 3129 void Assembler::psubb(XMMRegister dst, XMMRegister src) { 3130 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3131 emit_simd_arith(0xF8, dst, src, VEX_SIMD_66); 3132 } 3133 3134 void Assembler::psubw(XMMRegister dst, XMMRegister src) { 3135 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3136 emit_simd_arith(0xF9, dst, src, VEX_SIMD_66); 3137 } 3138 3139 void Assembler::psubd(XMMRegister dst, XMMRegister src) { 3140 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3141 emit_simd_arith(0xFA, dst, src, VEX_SIMD_66); 3142 } 3143 3144 void Assembler::psubq(XMMRegister dst, XMMRegister src) { 3145 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3146 emit_simd_arith(0xFB, dst, src, VEX_SIMD_66); 3147 } 3148 3149 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3150 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3151 emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256); 3152 } 3153 3154 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3155 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3156 emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256); 3157 } 3158 3159 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3160 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3161 emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256); 3162 } 3163 3164 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3165 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3166 emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256); 3167 } 3168 3169 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3170 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3171 emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256); 3172 } 3173 3174 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3175 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3176 emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256); 3177 } 3178 3179 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3180 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3181 emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256); 3182 } 3183 3184 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3185 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3186 emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256); 3187 } 3188 3189 void Assembler::pmullw(XMMRegister dst, XMMRegister src) { 3190 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3191 emit_simd_arith(0xD5, dst, src, VEX_SIMD_66); 3192 } 3193 3194 void Assembler::pmulld(XMMRegister dst, XMMRegister src) { 3195 assert(VM_Version::supports_sse4_1(), ""); 3196 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 3197 emit_byte(0x40); 3198 emit_byte(0xC0 | encode); 3199 } 3200 3201 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3202 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3203 emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256); 3204 } 3205 3206 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3207 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3208 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38); 3209 emit_byte(0x40); 3210 emit_byte(0xC0 | encode); 3211 } 3212 3213 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3214 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3215 emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256); 3216 } 3217 3218 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3219 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3220 InstructionMark im(this); 3221 int dst_enc = dst->encoding(); 3222 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 3223 vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256); 3224 emit_byte(0x40); 3225 emit_operand(dst, src); 3226 } 3227 3228 // Shift packed integers left by specified number of bits. 3229 void Assembler::psllw(XMMRegister dst, int shift) { 3230 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3231 // XMM6 is for /6 encoding: 66 0F 71 /6 ib 3232 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); 3233 emit_byte(0x71); 3234 emit_byte(0xC0 | encode); 3235 emit_byte(shift); 3236 } 3237 3238 void Assembler::pslld(XMMRegister dst, int shift) { 3239 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3240 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 3241 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); 3242 emit_byte(0x72); 3243 emit_byte(0xC0 | encode); 3244 emit_byte(shift); 3245 } 3246 3247 void Assembler::psllq(XMMRegister dst, int shift) { 3248 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3249 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 3250 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); 3251 emit_byte(0x73); 3252 emit_byte(0xC0 | encode); 3253 emit_byte(shift); 3254 } 3255 3256 void Assembler::psllw(XMMRegister dst, XMMRegister shift) { 3257 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3258 emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66); 3259 } 3260 3261 void Assembler::pslld(XMMRegister dst, XMMRegister shift) { 3262 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3263 emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66); 3264 } 3265 3266 void Assembler::psllq(XMMRegister dst, XMMRegister shift) { 3267 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3268 emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66); 3269 } 3270 3271 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3272 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3273 // XMM6 is for /6 encoding: 66 0F 71 /6 ib 3274 emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256); 3275 emit_byte(shift); 3276 } 3277 3278 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3279 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3280 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 3281 emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256); 3282 emit_byte(shift); 3283 } 3284 3285 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3286 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3287 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 3288 emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256); 3289 emit_byte(shift); 3290 } 3291 3292 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3293 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3294 emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256); 3295 } 3296 3297 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3298 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3299 emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256); 3300 } 3301 3302 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3303 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3304 emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256); 3305 } 3306 3307 // Shift packed integers logically right by specified number of bits. 3308 void Assembler::psrlw(XMMRegister dst, int shift) { 3309 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3310 // XMM2 is for /2 encoding: 66 0F 71 /2 ib 3311 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 3312 emit_byte(0x71); 3313 emit_byte(0xC0 | encode); 3314 emit_byte(shift); 3315 } 3316 3317 void Assembler::psrld(XMMRegister dst, int shift) { 3318 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3319 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 3320 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 3321 emit_byte(0x72); 3322 emit_byte(0xC0 | encode); 3323 emit_byte(shift); 3324 } 3325 3326 void Assembler::psrlq(XMMRegister dst, int shift) { 3327 // Do not confuse it with psrldq SSE2 instruction which 3328 // shifts 128 bit value in xmm register by number of bytes. 3329 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3330 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3331 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 3332 emit_byte(0x73); 3333 emit_byte(0xC0 | encode); 3334 emit_byte(shift); 3335 } 3336 3337 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) { 3338 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3339 emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66); 3340 } 3341 3342 void Assembler::psrld(XMMRegister dst, XMMRegister shift) { 3343 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3344 emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66); 3345 } 3346 3347 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) { 3348 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3349 emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66); 3350 } 3351 3352 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3353 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3354 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3355 emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256); 3356 emit_byte(shift); 3357 } 3358 3359 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3360 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3361 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3362 emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256); 3363 emit_byte(shift); 3364 } 3365 3366 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3367 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3368 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3369 emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256); 3370 emit_byte(shift); 3371 } 3372 3373 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3374 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3375 emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256); 3376 } 3377 3378 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3379 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3380 emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256); 3381 } 3382 3383 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3384 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3385 emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256); 3386 } 3387 3388 // Shift packed integers arithmetically right by specified number of bits. 3389 void Assembler::psraw(XMMRegister dst, int shift) { 3390 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3391 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 3392 int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66); 3393 emit_byte(0x71); 3394 emit_byte(0xC0 | encode); 3395 emit_byte(shift); 3396 } 3397 3398 void Assembler::psrad(XMMRegister dst, int shift) { 3399 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3400 // XMM4 is for /4 encoding: 66 0F 72 /4 ib 3401 int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66); 3402 emit_byte(0x72); 3403 emit_byte(0xC0 | encode); 3404 emit_byte(shift); 3405 } 3406 3407 void Assembler::psraw(XMMRegister dst, XMMRegister shift) { 3408 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3409 emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66); 3410 } 3411 3412 void Assembler::psrad(XMMRegister dst, XMMRegister shift) { 3413 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3414 emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66); 3415 } 3416 3417 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3418 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3419 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 3420 emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256); 3421 emit_byte(shift); 3422 } 3423 3424 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3425 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3426 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 3427 emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256); 3428 emit_byte(shift); 3429 } 3430 3431 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3432 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3433 emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256); 3434 } 3435 3436 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3437 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3438 emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256); 3439 } 3440 3441 3442 // AND packed integers 3443 void Assembler::pand(XMMRegister dst, XMMRegister src) { 3444 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3445 emit_simd_arith(0xDB, dst, src, VEX_SIMD_66); 3446 } 3447 3448 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3449 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3450 emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256); 3451 } 3452 3453 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3454 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3455 emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256); 3456 } 3457 3458 void Assembler::por(XMMRegister dst, XMMRegister src) { 3459 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3460 emit_simd_arith(0xEB, dst, src, VEX_SIMD_66); 3461 } 3462 3463 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3464 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3465 emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256); 3466 } 3467 3468 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3469 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3470 emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256); 3471 } 3472 3473 void Assembler::pxor(XMMRegister dst, XMMRegister src) { 3474 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3475 emit_simd_arith(0xEF, dst, src, VEX_SIMD_66); 3476 } 3477 3478 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3479 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3480 emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256); 3481 } 3482 3483 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3484 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3485 emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256); 3486 } 3487 3488 3489 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3490 assert(VM_Version::supports_avx(), ""); 3491 bool vector256 = true; 3492 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); 3493 emit_byte(0x18); 3494 emit_byte(0xC0 | encode); 3495 // 0x00 - insert into lower 128 bits 3496 // 0x01 - insert into upper 128 bits 3497 emit_byte(0x01); 3498 } 3499 3500 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3501 assert(VM_Version::supports_avx2(), ""); 3502 bool vector256 = true; 3503 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); 3504 emit_byte(0x38); 3505 emit_byte(0xC0 | encode); 3506 // 0x00 - insert into lower 128 bits 3507 // 0x01 - insert into upper 128 bits 3508 emit_byte(0x01); 3509 } 3510 3511 void Assembler::vzeroupper() { 3512 assert(VM_Version::supports_avx(), ""); 3513 (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE); 3514 emit_byte(0x77); 3515 } 3516 3517 3518 #ifndef _LP64 3519 // 32bit only pieces of the assembler 3520 3521 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { 3522 // NO PREFIX AS NEVER 64BIT 3523 InstructionMark im(this); 3524 emit_byte(0x81); 3525 emit_byte(0xF8 | src1->encoding()); 3526 emit_data(imm32, rspec, 0); 3527 } 3528 3529 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { 3530 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs 3531 InstructionMark im(this); 3532 emit_byte(0x81); 3533 emit_operand(rdi, src1); 3534 emit_data(imm32, rspec, 0); 3535 } 3536 3537 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax, 3538 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded 3539 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. 3540 void Assembler::cmpxchg8(Address adr) { 3541 InstructionMark im(this); 3542 emit_byte(0x0F); 3543 emit_byte(0xc7); 3544 emit_operand(rcx, adr); 3545 } 3546 3547 void Assembler::decl(Register dst) { 3548 // Don't use it directly. Use MacroAssembler::decrementl() instead. 3549 emit_byte(0x48 | dst->encoding()); 3550 } 3551 3552 #endif // _LP64 3553 3554 // 64bit typically doesn't use the x87 but needs to for the trig funcs 3555 3556 void Assembler::fabs() { 3557 emit_byte(0xD9); 3558 emit_byte(0xE1); 3559 } 3560 3561 void Assembler::fadd(int i) { 3562 emit_farith(0xD8, 0xC0, i); 3563 } 3564 3565 void Assembler::fadd_d(Address src) { 3566 InstructionMark im(this); 3567 emit_byte(0xDC); 3568 emit_operand32(rax, src); 3569 } 3570 3571 void Assembler::fadd_s(Address src) { 3572 InstructionMark im(this); 3573 emit_byte(0xD8); 3574 emit_operand32(rax, src); 3575 } 3576 3577 void Assembler::fadda(int i) { 3578 emit_farith(0xDC, 0xC0, i); 3579 } 3580 3581 void Assembler::faddp(int i) { 3582 emit_farith(0xDE, 0xC0, i); 3583 } 3584 3585 void Assembler::fchs() { 3586 emit_byte(0xD9); 3587 emit_byte(0xE0); 3588 } 3589 3590 void Assembler::fcom(int i) { 3591 emit_farith(0xD8, 0xD0, i); 3592 } 3593 3594 void Assembler::fcomp(int i) { 3595 emit_farith(0xD8, 0xD8, i); 3596 } 3597 3598 void Assembler::fcomp_d(Address src) { 3599 InstructionMark im(this); 3600 emit_byte(0xDC); 3601 emit_operand32(rbx, src); 3602 } 3603 3604 void Assembler::fcomp_s(Address src) { 3605 InstructionMark im(this); 3606 emit_byte(0xD8); 3607 emit_operand32(rbx, src); 3608 } 3609 3610 void Assembler::fcompp() { 3611 emit_byte(0xDE); 3612 emit_byte(0xD9); 3613 } 3614 3615 void Assembler::fcos() { 3616 emit_byte(0xD9); 3617 emit_byte(0xFF); 3618 } 3619 3620 void Assembler::fdecstp() { 3621 emit_byte(0xD9); 3622 emit_byte(0xF6); 3623 } 3624 3625 void Assembler::fdiv(int i) { 3626 emit_farith(0xD8, 0xF0, i); 3627 } 3628 3629 void Assembler::fdiv_d(Address src) { 3630 InstructionMark im(this); 3631 emit_byte(0xDC); 3632 emit_operand32(rsi, src); 3633 } 3634 3635 void Assembler::fdiv_s(Address src) { 3636 InstructionMark im(this); 3637 emit_byte(0xD8); 3638 emit_operand32(rsi, src); 3639 } 3640 3641 void Assembler::fdiva(int i) { 3642 emit_farith(0xDC, 0xF8, i); 3643 } 3644 3645 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) 3646 // is erroneous for some of the floating-point instructions below. 3647 3648 void Assembler::fdivp(int i) { 3649 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) 3650 } 3651 3652 void Assembler::fdivr(int i) { 3653 emit_farith(0xD8, 0xF8, i); 3654 } 3655 3656 void Assembler::fdivr_d(Address src) { 3657 InstructionMark im(this); 3658 emit_byte(0xDC); 3659 emit_operand32(rdi, src); 3660 } 3661 3662 void Assembler::fdivr_s(Address src) { 3663 InstructionMark im(this); 3664 emit_byte(0xD8); 3665 emit_operand32(rdi, src); 3666 } 3667 3668 void Assembler::fdivra(int i) { 3669 emit_farith(0xDC, 0xF0, i); 3670 } 3671 3672 void Assembler::fdivrp(int i) { 3673 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) 3674 } 3675 3676 void Assembler::ffree(int i) { 3677 emit_farith(0xDD, 0xC0, i); 3678 } 3679 3680 void Assembler::fild_d(Address adr) { 3681 InstructionMark im(this); 3682 emit_byte(0xDF); 3683 emit_operand32(rbp, adr); 3684 } 3685 3686 void Assembler::fild_s(Address adr) { 3687 InstructionMark im(this); 3688 emit_byte(0xDB); 3689 emit_operand32(rax, adr); 3690 } 3691 3692 void Assembler::fincstp() { 3693 emit_byte(0xD9); 3694 emit_byte(0xF7); 3695 } 3696 3697 void Assembler::finit() { 3698 emit_byte(0x9B); 3699 emit_byte(0xDB); 3700 emit_byte(0xE3); 3701 } 3702 3703 void Assembler::fist_s(Address adr) { 3704 InstructionMark im(this); 3705 emit_byte(0xDB); 3706 emit_operand32(rdx, adr); 3707 } 3708 3709 void Assembler::fistp_d(Address adr) { 3710 InstructionMark im(this); 3711 emit_byte(0xDF); 3712 emit_operand32(rdi, adr); 3713 } 3714 3715 void Assembler::fistp_s(Address adr) { 3716 InstructionMark im(this); 3717 emit_byte(0xDB); 3718 emit_operand32(rbx, adr); 3719 } 3720 3721 void Assembler::fld1() { 3722 emit_byte(0xD9); 3723 emit_byte(0xE8); 3724 } 3725 3726 void Assembler::fld_d(Address adr) { 3727 InstructionMark im(this); 3728 emit_byte(0xDD); 3729 emit_operand32(rax, adr); 3730 } 3731 3732 void Assembler::fld_s(Address adr) { 3733 InstructionMark im(this); 3734 emit_byte(0xD9); 3735 emit_operand32(rax, adr); 3736 } 3737 3738 3739 void Assembler::fld_s(int index) { 3740 emit_farith(0xD9, 0xC0, index); 3741 } 3742 3743 void Assembler::fld_x(Address adr) { 3744 InstructionMark im(this); 3745 emit_byte(0xDB); 3746 emit_operand32(rbp, adr); 3747 } 3748 3749 void Assembler::fldcw(Address src) { 3750 InstructionMark im(this); 3751 emit_byte(0xd9); 3752 emit_operand32(rbp, src); 3753 } 3754 3755 void Assembler::fldenv(Address src) { 3756 InstructionMark im(this); 3757 emit_byte(0xD9); 3758 emit_operand32(rsp, src); 3759 } 3760 3761 void Assembler::fldlg2() { 3762 emit_byte(0xD9); 3763 emit_byte(0xEC); 3764 } 3765 3766 void Assembler::fldln2() { 3767 emit_byte(0xD9); 3768 emit_byte(0xED); 3769 } 3770 3771 void Assembler::fldz() { 3772 emit_byte(0xD9); 3773 emit_byte(0xEE); 3774 } 3775 3776 void Assembler::flog() { 3777 fldln2(); 3778 fxch(); 3779 fyl2x(); 3780 } 3781 3782 void Assembler::flog10() { 3783 fldlg2(); 3784 fxch(); 3785 fyl2x(); 3786 } 3787 3788 void Assembler::fmul(int i) { 3789 emit_farith(0xD8, 0xC8, i); 3790 } 3791 3792 void Assembler::fmul_d(Address src) { 3793 InstructionMark im(this); 3794 emit_byte(0xDC); 3795 emit_operand32(rcx, src); 3796 } 3797 3798 void Assembler::fmul_s(Address src) { 3799 InstructionMark im(this); 3800 emit_byte(0xD8); 3801 emit_operand32(rcx, src); 3802 } 3803 3804 void Assembler::fmula(int i) { 3805 emit_farith(0xDC, 0xC8, i); 3806 } 3807 3808 void Assembler::fmulp(int i) { 3809 emit_farith(0xDE, 0xC8, i); 3810 } 3811 3812 void Assembler::fnsave(Address dst) { 3813 InstructionMark im(this); 3814 emit_byte(0xDD); 3815 emit_operand32(rsi, dst); 3816 } 3817 3818 void Assembler::fnstcw(Address src) { 3819 InstructionMark im(this); 3820 emit_byte(0x9B); 3821 emit_byte(0xD9); 3822 emit_operand32(rdi, src); 3823 } 3824 3825 void Assembler::fnstsw_ax() { 3826 emit_byte(0xdF); 3827 emit_byte(0xE0); 3828 } 3829 3830 void Assembler::fprem() { 3831 emit_byte(0xD9); 3832 emit_byte(0xF8); 3833 } 3834 3835 void Assembler::fprem1() { 3836 emit_byte(0xD9); 3837 emit_byte(0xF5); 3838 } 3839 3840 void Assembler::frstor(Address src) { 3841 InstructionMark im(this); 3842 emit_byte(0xDD); 3843 emit_operand32(rsp, src); 3844 } 3845 3846 void Assembler::fsin() { 3847 emit_byte(0xD9); 3848 emit_byte(0xFE); 3849 } 3850 3851 void Assembler::fsqrt() { 3852 emit_byte(0xD9); 3853 emit_byte(0xFA); 3854 } 3855 3856 void Assembler::fst_d(Address adr) { 3857 InstructionMark im(this); 3858 emit_byte(0xDD); 3859 emit_operand32(rdx, adr); 3860 } 3861 3862 void Assembler::fst_s(Address adr) { 3863 InstructionMark im(this); 3864 emit_byte(0xD9); 3865 emit_operand32(rdx, adr); 3866 } 3867 3868 void Assembler::fstp_d(Address adr) { 3869 InstructionMark im(this); 3870 emit_byte(0xDD); 3871 emit_operand32(rbx, adr); 3872 } 3873 3874 void Assembler::fstp_d(int index) { 3875 emit_farith(0xDD, 0xD8, index); 3876 } 3877 3878 void Assembler::fstp_s(Address adr) { 3879 InstructionMark im(this); 3880 emit_byte(0xD9); 3881 emit_operand32(rbx, adr); 3882 } 3883 3884 void Assembler::fstp_x(Address adr) { 3885 InstructionMark im(this); 3886 emit_byte(0xDB); 3887 emit_operand32(rdi, adr); 3888 } 3889 3890 void Assembler::fsub(int i) { 3891 emit_farith(0xD8, 0xE0, i); 3892 } 3893 3894 void Assembler::fsub_d(Address src) { 3895 InstructionMark im(this); 3896 emit_byte(0xDC); 3897 emit_operand32(rsp, src); 3898 } 3899 3900 void Assembler::fsub_s(Address src) { 3901 InstructionMark im(this); 3902 emit_byte(0xD8); 3903 emit_operand32(rsp, src); 3904 } 3905 3906 void Assembler::fsuba(int i) { 3907 emit_farith(0xDC, 0xE8, i); 3908 } 3909 3910 void Assembler::fsubp(int i) { 3911 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) 3912 } 3913 3914 void Assembler::fsubr(int i) { 3915 emit_farith(0xD8, 0xE8, i); 3916 } 3917 3918 void Assembler::fsubr_d(Address src) { 3919 InstructionMark im(this); 3920 emit_byte(0xDC); 3921 emit_operand32(rbp, src); 3922 } 3923 3924 void Assembler::fsubr_s(Address src) { 3925 InstructionMark im(this); 3926 emit_byte(0xD8); 3927 emit_operand32(rbp, src); 3928 } 3929 3930 void Assembler::fsubra(int i) { 3931 emit_farith(0xDC, 0xE0, i); 3932 } 3933 3934 void Assembler::fsubrp(int i) { 3935 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) 3936 } 3937 3938 void Assembler::ftan() { 3939 emit_byte(0xD9); 3940 emit_byte(0xF2); 3941 emit_byte(0xDD); 3942 emit_byte(0xD8); 3943 } 3944 3945 void Assembler::ftst() { 3946 emit_byte(0xD9); 3947 emit_byte(0xE4); 3948 } 3949 3950 void Assembler::fucomi(int i) { 3951 // make sure the instruction is supported (introduced for P6, together with cmov) 3952 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3953 emit_farith(0xDB, 0xE8, i); 3954 } 3955 3956 void Assembler::fucomip(int i) { 3957 // make sure the instruction is supported (introduced for P6, together with cmov) 3958 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 3959 emit_farith(0xDF, 0xE8, i); 3960 } 3961 3962 void Assembler::fwait() { 3963 emit_byte(0x9B); 3964 } 3965 3966 void Assembler::fxch(int i) { 3967 emit_farith(0xD9, 0xC8, i); 3968 } 3969 3970 void Assembler::fyl2x() { 3971 emit_byte(0xD9); 3972 emit_byte(0xF1); 3973 } 3974 3975 void Assembler::frndint() { 3976 emit_byte(0xD9); 3977 emit_byte(0xFC); 3978 } 3979 3980 void Assembler::f2xm1() { 3981 emit_byte(0xD9); 3982 emit_byte(0xF0); 3983 } 3984 3985 void Assembler::fldl2e() { 3986 emit_byte(0xD9); 3987 emit_byte(0xEA); 3988 } 3989 3990 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. 3991 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; 3992 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding. 3993 static int simd_opc[4] = { 0, 0, 0x38, 0x3A }; 3994 3995 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding. 3996 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 3997 if (pre > 0) { 3998 emit_byte(simd_pre[pre]); 3999 } 4000 if (rex_w) { 4001 prefixq(adr, xreg); 4002 } else { 4003 prefix(adr, xreg); 4004 } 4005 if (opc > 0) { 4006 emit_byte(0x0F); 4007 int opc2 = simd_opc[opc]; 4008 if (opc2 > 0) { 4009 emit_byte(opc2); 4010 } 4011 } 4012 } 4013 4014 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 4015 if (pre > 0) { 4016 emit_byte(simd_pre[pre]); 4017 } 4018 int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : 4019 prefix_and_encode(dst_enc, src_enc); 4020 if (opc > 0) { 4021 emit_byte(0x0F); 4022 int opc2 = simd_opc[opc]; 4023 if (opc2 > 0) { 4024 emit_byte(opc2); 4025 } 4026 } 4027 return encode; 4028 } 4029 4030 4031 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) { 4032 if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { 4033 prefix(VEX_3bytes); 4034 4035 int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0); 4036 byte1 = (~byte1) & 0xE0; 4037 byte1 |= opc; 4038 a_byte(byte1); 4039 4040 int byte2 = ((~nds_enc) & 0xf) << 3; 4041 byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre; 4042 emit_byte(byte2); 4043 } else { 4044 prefix(VEX_2bytes); 4045 4046 int byte1 = vex_r ? VEX_R : 0; 4047 byte1 = (~byte1) & 0x80; 4048 byte1 |= ((~nds_enc) & 0xf) << 3; 4049 byte1 |= (vector256 ? 4 : 0) | pre; 4050 emit_byte(byte1); 4051 } 4052 } 4053 4054 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){ 4055 bool vex_r = (xreg_enc >= 8); 4056 bool vex_b = adr.base_needs_rex(); 4057 bool vex_x = adr.index_needs_rex(); 4058 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 4059 } 4060 4061 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) { 4062 bool vex_r = (dst_enc >= 8); 4063 bool vex_b = (src_enc >= 8); 4064 bool vex_x = false; 4065 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 4066 return (((dst_enc & 7) << 3) | (src_enc & 7)); 4067 } 4068 4069 4070 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 4071 if (UseAVX > 0) { 4072 int xreg_enc = xreg->encoding(); 4073 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 4074 vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256); 4075 } else { 4076 assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); 4077 rex_prefix(adr, xreg, pre, opc, rex_w); 4078 } 4079 } 4080 4081 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 4082 int dst_enc = dst->encoding(); 4083 int src_enc = src->encoding(); 4084 if (UseAVX > 0) { 4085 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 4086 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256); 4087 } else { 4088 assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); 4089 return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); 4090 } 4091 } 4092 4093 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) { 4094 InstructionMark im(this); 4095 simd_prefix(dst, dst, src, pre); 4096 emit_byte(opcode); 4097 emit_operand(dst, src); 4098 } 4099 4100 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) { 4101 int encode = simd_prefix_and_encode(dst, dst, src, pre); 4102 emit_byte(opcode); 4103 emit_byte(0xC0 | encode); 4104 } 4105 4106 // Versions with no second source register (non-destructive source). 4107 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) { 4108 InstructionMark im(this); 4109 simd_prefix(dst, xnoreg, src, pre); 4110 emit_byte(opcode); 4111 emit_operand(dst, src); 4112 } 4113 4114 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) { 4115 int encode = simd_prefix_and_encode(dst, xnoreg, src, pre); 4116 emit_byte(opcode); 4117 emit_byte(0xC0 | encode); 4118 } 4119 4120 // 3-operands AVX instructions 4121 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, 4122 Address src, VexSimdPrefix pre, bool vector256) { 4123 InstructionMark im(this); 4124 vex_prefix(dst, nds, src, pre, vector256); 4125 emit_byte(opcode); 4126 emit_operand(dst, src); 4127 } 4128 4129 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, 4130 XMMRegister src, VexSimdPrefix pre, bool vector256) { 4131 int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256); 4132 emit_byte(opcode); 4133 emit_byte(0xC0 | encode); 4134 } 4135 4136 #ifndef _LP64 4137 4138 void Assembler::incl(Register dst) { 4139 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4140 emit_byte(0x40 | dst->encoding()); 4141 } 4142 4143 void Assembler::lea(Register dst, Address src) { 4144 leal(dst, src); 4145 } 4146 4147 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4148 InstructionMark im(this); 4149 emit_byte(0xC7); 4150 emit_operand(rax, dst); 4151 emit_data((int)imm32, rspec, 0); 4152 } 4153 4154 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4155 InstructionMark im(this); 4156 int encode = prefix_and_encode(dst->encoding()); 4157 emit_byte(0xB8 | encode); 4158 emit_data((int)imm32, rspec, 0); 4159 } 4160 4161 void Assembler::popa() { // 32bit 4162 emit_byte(0x61); 4163 } 4164 4165 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { 4166 InstructionMark im(this); 4167 emit_byte(0x68); 4168 emit_data(imm32, rspec, 0); 4169 } 4170 4171 void Assembler::pusha() { // 32bit 4172 emit_byte(0x60); 4173 } 4174 4175 void Assembler::set_byte_if_not_zero(Register dst) { 4176 emit_byte(0x0F); 4177 emit_byte(0x95); 4178 emit_byte(0xE0 | dst->encoding()); 4179 } 4180 4181 void Assembler::shldl(Register dst, Register src) { 4182 emit_byte(0x0F); 4183 emit_byte(0xA5); 4184 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 4185 } 4186 4187 void Assembler::shrdl(Register dst, Register src) { 4188 emit_byte(0x0F); 4189 emit_byte(0xAD); 4190 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 4191 } 4192 4193 #else // LP64 4194 4195 void Assembler::set_byte_if_not_zero(Register dst) { 4196 int enc = prefix_and_encode(dst->encoding(), true); 4197 emit_byte(0x0F); 4198 emit_byte(0x95); 4199 emit_byte(0xE0 | enc); 4200 } 4201 4202 // 64bit only pieces of the assembler 4203 // This should only be used by 64bit instructions that can use rip-relative 4204 // it cannot be used by instructions that want an immediate value. 4205 4206 bool Assembler::reachable(AddressLiteral adr) { 4207 int64_t disp; 4208 // None will force a 64bit literal to the code stream. Likely a placeholder 4209 // for something that will be patched later and we need to certain it will 4210 // always be reachable. 4211 if (adr.reloc() == relocInfo::none) { 4212 return false; 4213 } 4214 if (adr.reloc() == relocInfo::internal_word_type) { 4215 // This should be rip relative and easily reachable. 4216 return true; 4217 } 4218 if (adr.reloc() == relocInfo::virtual_call_type || 4219 adr.reloc() == relocInfo::opt_virtual_call_type || 4220 adr.reloc() == relocInfo::static_call_type || 4221 adr.reloc() == relocInfo::static_stub_type ) { 4222 // This should be rip relative within the code cache and easily 4223 // reachable until we get huge code caches. (At which point 4224 // ic code is going to have issues). 4225 return true; 4226 } 4227 if (adr.reloc() != relocInfo::external_word_type && 4228 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special 4229 adr.reloc() != relocInfo::poll_type && // relocs to identify them 4230 adr.reloc() != relocInfo::runtime_call_type ) { 4231 return false; 4232 } 4233 4234 // Stress the correction code 4235 if (ForceUnreachable) { 4236 // Must be runtimecall reloc, see if it is in the codecache 4237 // Flipping stuff in the codecache to be unreachable causes issues 4238 // with things like inline caches where the additional instructions 4239 // are not handled. 4240 if (CodeCache::find_blob(adr._target) == NULL) { 4241 return false; 4242 } 4243 } 4244 // For external_word_type/runtime_call_type if it is reachable from where we 4245 // are now (possibly a temp buffer) and where we might end up 4246 // anywhere in the codeCache then we are always reachable. 4247 // This would have to change if we ever save/restore shared code 4248 // to be more pessimistic. 4249 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int)); 4250 if (!is_simm32(disp)) return false; 4251 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int)); 4252 if (!is_simm32(disp)) return false; 4253 4254 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int)); 4255 4256 // Because rip relative is a disp + address_of_next_instruction and we 4257 // don't know the value of address_of_next_instruction we apply a fudge factor 4258 // to make sure we will be ok no matter the size of the instruction we get placed into. 4259 // We don't have to fudge the checks above here because they are already worst case. 4260 4261 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal 4262 // + 4 because better safe than sorry. 4263 const int fudge = 12 + 4; 4264 if (disp < 0) { 4265 disp -= fudge; 4266 } else { 4267 disp += fudge; 4268 } 4269 return is_simm32(disp); 4270 } 4271 4272 // Check if the polling page is not reachable from the code cache using rip-relative 4273 // addressing. 4274 bool Assembler::is_polling_page_far() { 4275 intptr_t addr = (intptr_t)os::get_polling_page(); 4276 return ForceUnreachable || 4277 !is_simm32(addr - (intptr_t)CodeCache::low_bound()) || 4278 !is_simm32(addr - (intptr_t)CodeCache::high_bound()); 4279 } 4280 4281 void Assembler::emit_data64(jlong data, 4282 relocInfo::relocType rtype, 4283 int format) { 4284 if (rtype == relocInfo::none) { 4285 emit_long64(data); 4286 } else { 4287 emit_data64(data, Relocation::spec_simple(rtype), format); 4288 } 4289 } 4290 4291 void Assembler::emit_data64(jlong data, 4292 RelocationHolder const& rspec, 4293 int format) { 4294 assert(imm_operand == 0, "default format must be immediate in this file"); 4295 assert(imm_operand == format, "must be immediate"); 4296 assert(inst_mark() != NULL, "must be inside InstructionMark"); 4297 // Do not use AbstractAssembler::relocate, which is not intended for 4298 // embedded words. Instead, relocate to the enclosing instruction. 4299 code_section()->relocate(inst_mark(), rspec, format); 4300 #ifdef ASSERT 4301 check_relocation(rspec, format); 4302 #endif 4303 emit_long64(data); 4304 } 4305 4306 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { 4307 if (reg_enc >= 8) { 4308 prefix(REX_B); 4309 reg_enc -= 8; 4310 } else if (byteinst && reg_enc >= 4) { 4311 prefix(REX); 4312 } 4313 return reg_enc; 4314 } 4315 4316 int Assembler::prefixq_and_encode(int reg_enc) { 4317 if (reg_enc < 8) { 4318 prefix(REX_W); 4319 } else { 4320 prefix(REX_WB); 4321 reg_enc -= 8; 4322 } 4323 return reg_enc; 4324 } 4325 4326 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { 4327 if (dst_enc < 8) { 4328 if (src_enc >= 8) { 4329 prefix(REX_B); 4330 src_enc -= 8; 4331 } else if (byteinst && src_enc >= 4) { 4332 prefix(REX); 4333 } 4334 } else { 4335 if (src_enc < 8) { 4336 prefix(REX_R); 4337 } else { 4338 prefix(REX_RB); 4339 src_enc -= 8; 4340 } 4341 dst_enc -= 8; 4342 } 4343 return dst_enc << 3 | src_enc; 4344 } 4345 4346 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { 4347 if (dst_enc < 8) { 4348 if (src_enc < 8) { 4349 prefix(REX_W); 4350 } else { 4351 prefix(REX_WB); 4352 src_enc -= 8; 4353 } 4354 } else { 4355 if (src_enc < 8) { 4356 prefix(REX_WR); 4357 } else { 4358 prefix(REX_WRB); 4359 src_enc -= 8; 4360 } 4361 dst_enc -= 8; 4362 } 4363 return dst_enc << 3 | src_enc; 4364 } 4365 4366 void Assembler::prefix(Register reg) { 4367 if (reg->encoding() >= 8) { 4368 prefix(REX_B); 4369 } 4370 } 4371 4372 void Assembler::prefix(Address adr) { 4373 if (adr.base_needs_rex()) { 4374 if (adr.index_needs_rex()) { 4375 prefix(REX_XB); 4376 } else { 4377 prefix(REX_B); 4378 } 4379 } else { 4380 if (adr.index_needs_rex()) { 4381 prefix(REX_X); 4382 } 4383 } 4384 } 4385 4386 void Assembler::prefixq(Address adr) { 4387 if (adr.base_needs_rex()) { 4388 if (adr.index_needs_rex()) { 4389 prefix(REX_WXB); 4390 } else { 4391 prefix(REX_WB); 4392 } 4393 } else { 4394 if (adr.index_needs_rex()) { 4395 prefix(REX_WX); 4396 } else { 4397 prefix(REX_W); 4398 } 4399 } 4400 } 4401 4402 4403 void Assembler::prefix(Address adr, Register reg, bool byteinst) { 4404 if (reg->encoding() < 8) { 4405 if (adr.base_needs_rex()) { 4406 if (adr.index_needs_rex()) { 4407 prefix(REX_XB); 4408 } else { 4409 prefix(REX_B); 4410 } 4411 } else { 4412 if (adr.index_needs_rex()) { 4413 prefix(REX_X); 4414 } else if (byteinst && reg->encoding() >= 4 ) { 4415 prefix(REX); 4416 } 4417 } 4418 } else { 4419 if (adr.base_needs_rex()) { 4420 if (adr.index_needs_rex()) { 4421 prefix(REX_RXB); 4422 } else { 4423 prefix(REX_RB); 4424 } 4425 } else { 4426 if (adr.index_needs_rex()) { 4427 prefix(REX_RX); 4428 } else { 4429 prefix(REX_R); 4430 } 4431 } 4432 } 4433 } 4434 4435 void Assembler::prefixq(Address adr, Register src) { 4436 if (src->encoding() < 8) { 4437 if (adr.base_needs_rex()) { 4438 if (adr.index_needs_rex()) { 4439 prefix(REX_WXB); 4440 } else { 4441 prefix(REX_WB); 4442 } 4443 } else { 4444 if (adr.index_needs_rex()) { 4445 prefix(REX_WX); 4446 } else { 4447 prefix(REX_W); 4448 } 4449 } 4450 } else { 4451 if (adr.base_needs_rex()) { 4452 if (adr.index_needs_rex()) { 4453 prefix(REX_WRXB); 4454 } else { 4455 prefix(REX_WRB); 4456 } 4457 } else { 4458 if (adr.index_needs_rex()) { 4459 prefix(REX_WRX); 4460 } else { 4461 prefix(REX_WR); 4462 } 4463 } 4464 } 4465 } 4466 4467 void Assembler::prefix(Address adr, XMMRegister reg) { 4468 if (reg->encoding() < 8) { 4469 if (adr.base_needs_rex()) { 4470 if (adr.index_needs_rex()) { 4471 prefix(REX_XB); 4472 } else { 4473 prefix(REX_B); 4474 } 4475 } else { 4476 if (adr.index_needs_rex()) { 4477 prefix(REX_X); 4478 } 4479 } 4480 } else { 4481 if (adr.base_needs_rex()) { 4482 if (adr.index_needs_rex()) { 4483 prefix(REX_RXB); 4484 } else { 4485 prefix(REX_RB); 4486 } 4487 } else { 4488 if (adr.index_needs_rex()) { 4489 prefix(REX_RX); 4490 } else { 4491 prefix(REX_R); 4492 } 4493 } 4494 } 4495 } 4496 4497 void Assembler::prefixq(Address adr, XMMRegister src) { 4498 if (src->encoding() < 8) { 4499 if (adr.base_needs_rex()) { 4500 if (adr.index_needs_rex()) { 4501 prefix(REX_WXB); 4502 } else { 4503 prefix(REX_WB); 4504 } 4505 } else { 4506 if (adr.index_needs_rex()) { 4507 prefix(REX_WX); 4508 } else { 4509 prefix(REX_W); 4510 } 4511 } 4512 } else { 4513 if (adr.base_needs_rex()) { 4514 if (adr.index_needs_rex()) { 4515 prefix(REX_WRXB); 4516 } else { 4517 prefix(REX_WRB); 4518 } 4519 } else { 4520 if (adr.index_needs_rex()) { 4521 prefix(REX_WRX); 4522 } else { 4523 prefix(REX_WR); 4524 } 4525 } 4526 } 4527 } 4528 4529 void Assembler::adcq(Register dst, int32_t imm32) { 4530 (void) prefixq_and_encode(dst->encoding()); 4531 emit_arith(0x81, 0xD0, dst, imm32); 4532 } 4533 4534 void Assembler::adcq(Register dst, Address src) { 4535 InstructionMark im(this); 4536 prefixq(src, dst); 4537 emit_byte(0x13); 4538 emit_operand(dst, src); 4539 } 4540 4541 void Assembler::adcq(Register dst, Register src) { 4542 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 4543 emit_arith(0x13, 0xC0, dst, src); 4544 } 4545 4546 void Assembler::addq(Address dst, int32_t imm32) { 4547 InstructionMark im(this); 4548 prefixq(dst); 4549 emit_arith_operand(0x81, rax, dst,imm32); 4550 } 4551 4552 void Assembler::addq(Address dst, Register src) { 4553 InstructionMark im(this); 4554 prefixq(dst, src); 4555 emit_byte(0x01); 4556 emit_operand(src, dst); 4557 } 4558 4559 void Assembler::addq(Register dst, int32_t imm32) { 4560 (void) prefixq_and_encode(dst->encoding()); 4561 emit_arith(0x81, 0xC0, dst, imm32); 4562 } 4563 4564 void Assembler::addq(Register dst, Address src) { 4565 InstructionMark im(this); 4566 prefixq(src, dst); 4567 emit_byte(0x03); 4568 emit_operand(dst, src); 4569 } 4570 4571 void Assembler::addq(Register dst, Register src) { 4572 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4573 emit_arith(0x03, 0xC0, dst, src); 4574 } 4575 4576 void Assembler::andq(Address dst, int32_t imm32) { 4577 InstructionMark im(this); 4578 prefixq(dst); 4579 emit_byte(0x81); 4580 emit_operand(rsp, dst, 4); 4581 emit_long(imm32); 4582 } 4583 4584 void Assembler::andq(Register dst, int32_t imm32) { 4585 (void) prefixq_and_encode(dst->encoding()); 4586 emit_arith(0x81, 0xE0, dst, imm32); 4587 } 4588 4589 void Assembler::andq(Register dst, Address src) { 4590 InstructionMark im(this); 4591 prefixq(src, dst); 4592 emit_byte(0x23); 4593 emit_operand(dst, src); 4594 } 4595 4596 void Assembler::andq(Register dst, Register src) { 4597 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 4598 emit_arith(0x23, 0xC0, dst, src); 4599 } 4600 4601 void Assembler::bsfq(Register dst, Register src) { 4602 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4603 emit_byte(0x0F); 4604 emit_byte(0xBC); 4605 emit_byte(0xC0 | encode); 4606 } 4607 4608 void Assembler::bsrq(Register dst, Register src) { 4609 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 4610 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4611 emit_byte(0x0F); 4612 emit_byte(0xBD); 4613 emit_byte(0xC0 | encode); 4614 } 4615 4616 void Assembler::bswapq(Register reg) { 4617 int encode = prefixq_and_encode(reg->encoding()); 4618 emit_byte(0x0F); 4619 emit_byte(0xC8 | encode); 4620 } 4621 4622 void Assembler::cdqq() { 4623 prefix(REX_W); 4624 emit_byte(0x99); 4625 } 4626 4627 void Assembler::clflush(Address adr) { 4628 prefix(adr); 4629 emit_byte(0x0F); 4630 emit_byte(0xAE); 4631 emit_operand(rdi, adr); 4632 } 4633 4634 void Assembler::cmovq(Condition cc, Register dst, Register src) { 4635 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4636 emit_byte(0x0F); 4637 emit_byte(0x40 | cc); 4638 emit_byte(0xC0 | encode); 4639 } 4640 4641 void Assembler::cmovq(Condition cc, Register dst, Address src) { 4642 InstructionMark im(this); 4643 prefixq(src, dst); 4644 emit_byte(0x0F); 4645 emit_byte(0x40 | cc); 4646 emit_operand(dst, src); 4647 } 4648 4649 void Assembler::cmpq(Address dst, int32_t imm32) { 4650 InstructionMark im(this); 4651 prefixq(dst); 4652 emit_byte(0x81); 4653 emit_operand(rdi, dst, 4); 4654 emit_long(imm32); 4655 } 4656 4657 void Assembler::cmpq(Register dst, int32_t imm32) { 4658 (void) prefixq_and_encode(dst->encoding()); 4659 emit_arith(0x81, 0xF8, dst, imm32); 4660 } 4661 4662 void Assembler::cmpq(Address dst, Register src) { 4663 InstructionMark im(this); 4664 prefixq(dst, src); 4665 emit_byte(0x3B); 4666 emit_operand(src, dst); 4667 } 4668 4669 void Assembler::cmpq(Register dst, Register src) { 4670 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4671 emit_arith(0x3B, 0xC0, dst, src); 4672 } 4673 4674 void Assembler::cmpq(Register dst, Address src) { 4675 InstructionMark im(this); 4676 prefixq(src, dst); 4677 emit_byte(0x3B); 4678 emit_operand(dst, src); 4679 } 4680 4681 void Assembler::cmpxchgq(Register reg, Address adr) { 4682 InstructionMark im(this); 4683 prefixq(adr, reg); 4684 emit_byte(0x0F); 4685 emit_byte(0xB1); 4686 emit_operand(reg, adr); 4687 } 4688 4689 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { 4690 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4691 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2); 4692 emit_byte(0x2A); 4693 emit_byte(0xC0 | encode); 4694 } 4695 4696 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { 4697 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4698 InstructionMark im(this); 4699 simd_prefix_q(dst, dst, src, VEX_SIMD_F2); 4700 emit_byte(0x2A); 4701 emit_operand(dst, src); 4702 } 4703 4704 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { 4705 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4706 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3); 4707 emit_byte(0x2A); 4708 emit_byte(0xC0 | encode); 4709 } 4710 4711 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { 4712 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4713 InstructionMark im(this); 4714 simd_prefix_q(dst, dst, src, VEX_SIMD_F3); 4715 emit_byte(0x2A); 4716 emit_operand(dst, src); 4717 } 4718 4719 void Assembler::cvttsd2siq(Register dst, XMMRegister src) { 4720 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4721 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2); 4722 emit_byte(0x2C); 4723 emit_byte(0xC0 | encode); 4724 } 4725 4726 void Assembler::cvttss2siq(Register dst, XMMRegister src) { 4727 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4728 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3); 4729 emit_byte(0x2C); 4730 emit_byte(0xC0 | encode); 4731 } 4732 4733 void Assembler::decl(Register dst) { 4734 // Don't use it directly. Use MacroAssembler::decrementl() instead. 4735 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 4736 int encode = prefix_and_encode(dst->encoding()); 4737 emit_byte(0xFF); 4738 emit_byte(0xC8 | encode); 4739 } 4740 4741 void Assembler::decq(Register dst) { 4742 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4743 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4744 int encode = prefixq_and_encode(dst->encoding()); 4745 emit_byte(0xFF); 4746 emit_byte(0xC8 | encode); 4747 } 4748 4749 void Assembler::decq(Address dst) { 4750 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4751 InstructionMark im(this); 4752 prefixq(dst); 4753 emit_byte(0xFF); 4754 emit_operand(rcx, dst); 4755 } 4756 4757 void Assembler::fxrstor(Address src) { 4758 prefixq(src); 4759 emit_byte(0x0F); 4760 emit_byte(0xAE); 4761 emit_operand(as_Register(1), src); 4762 } 4763 4764 void Assembler::fxsave(Address dst) { 4765 prefixq(dst); 4766 emit_byte(0x0F); 4767 emit_byte(0xAE); 4768 emit_operand(as_Register(0), dst); 4769 } 4770 4771 void Assembler::idivq(Register src) { 4772 int encode = prefixq_and_encode(src->encoding()); 4773 emit_byte(0xF7); 4774 emit_byte(0xF8 | encode); 4775 } 4776 4777 void Assembler::imulq(Register dst, Register src) { 4778 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4779 emit_byte(0x0F); 4780 emit_byte(0xAF); 4781 emit_byte(0xC0 | encode); 4782 } 4783 4784 void Assembler::imulq(Register dst, Register src, int value) { 4785 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4786 if (is8bit(value)) { 4787 emit_byte(0x6B); 4788 emit_byte(0xC0 | encode); 4789 emit_byte(value & 0xFF); 4790 } else { 4791 emit_byte(0x69); 4792 emit_byte(0xC0 | encode); 4793 emit_long(value); 4794 } 4795 } 4796 4797 void Assembler::incl(Register dst) { 4798 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4799 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4800 int encode = prefix_and_encode(dst->encoding()); 4801 emit_byte(0xFF); 4802 emit_byte(0xC0 | encode); 4803 } 4804 4805 void Assembler::incq(Register dst) { 4806 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4807 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4808 int encode = prefixq_and_encode(dst->encoding()); 4809 emit_byte(0xFF); 4810 emit_byte(0xC0 | encode); 4811 } 4812 4813 void Assembler::incq(Address dst) { 4814 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4815 InstructionMark im(this); 4816 prefixq(dst); 4817 emit_byte(0xFF); 4818 emit_operand(rax, dst); 4819 } 4820 4821 void Assembler::lea(Register dst, Address src) { 4822 leaq(dst, src); 4823 } 4824 4825 void Assembler::leaq(Register dst, Address src) { 4826 InstructionMark im(this); 4827 prefixq(src, dst); 4828 emit_byte(0x8D); 4829 emit_operand(dst, src); 4830 } 4831 4832 void Assembler::mov64(Register dst, int64_t imm64) { 4833 InstructionMark im(this); 4834 int encode = prefixq_and_encode(dst->encoding()); 4835 emit_byte(0xB8 | encode); 4836 emit_long64(imm64); 4837 } 4838 4839 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { 4840 InstructionMark im(this); 4841 int encode = prefixq_and_encode(dst->encoding()); 4842 emit_byte(0xB8 | encode); 4843 emit_data64(imm64, rspec); 4844 } 4845 4846 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4847 InstructionMark im(this); 4848 int encode = prefix_and_encode(dst->encoding()); 4849 emit_byte(0xB8 | encode); 4850 emit_data((int)imm32, rspec, narrow_oop_operand); 4851 } 4852 4853 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4854 InstructionMark im(this); 4855 prefix(dst); 4856 emit_byte(0xC7); 4857 emit_operand(rax, dst, 4); 4858 emit_data((int)imm32, rspec, narrow_oop_operand); 4859 } 4860 4861 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) { 4862 InstructionMark im(this); 4863 int encode = prefix_and_encode(src1->encoding()); 4864 emit_byte(0x81); 4865 emit_byte(0xF8 | encode); 4866 emit_data((int)imm32, rspec, narrow_oop_operand); 4867 } 4868 4869 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) { 4870 InstructionMark im(this); 4871 prefix(src1); 4872 emit_byte(0x81); 4873 emit_operand(rax, src1, 4); 4874 emit_data((int)imm32, rspec, narrow_oop_operand); 4875 } 4876 4877 void Assembler::lzcntq(Register dst, Register src) { 4878 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 4879 emit_byte(0xF3); 4880 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4881 emit_byte(0x0F); 4882 emit_byte(0xBD); 4883 emit_byte(0xC0 | encode); 4884 } 4885 4886 void Assembler::movdq(XMMRegister dst, Register src) { 4887 // table D-1 says MMX/SSE2 4888 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4889 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66); 4890 emit_byte(0x6E); 4891 emit_byte(0xC0 | encode); 4892 } 4893 4894 void Assembler::movdq(Register dst, XMMRegister src) { 4895 // table D-1 says MMX/SSE2 4896 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4897 // swap src/dst to get correct prefix 4898 int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66); 4899 emit_byte(0x7E); 4900 emit_byte(0xC0 | encode); 4901 } 4902 4903 void Assembler::movq(Register dst, Register src) { 4904 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4905 emit_byte(0x8B); 4906 emit_byte(0xC0 | encode); 4907 } 4908 4909 void Assembler::movq(Register dst, Address src) { 4910 InstructionMark im(this); 4911 prefixq(src, dst); 4912 emit_byte(0x8B); 4913 emit_operand(dst, src); 4914 } 4915 4916 void Assembler::movq(Address dst, Register src) { 4917 InstructionMark im(this); 4918 prefixq(dst, src); 4919 emit_byte(0x89); 4920 emit_operand(src, dst); 4921 } 4922 4923 void Assembler::movsbq(Register dst, Address src) { 4924 InstructionMark im(this); 4925 prefixq(src, dst); 4926 emit_byte(0x0F); 4927 emit_byte(0xBE); 4928 emit_operand(dst, src); 4929 } 4930 4931 void Assembler::movsbq(Register dst, Register src) { 4932 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4933 emit_byte(0x0F); 4934 emit_byte(0xBE); 4935 emit_byte(0xC0 | encode); 4936 } 4937 4938 void Assembler::movslq(Register dst, int32_t imm32) { 4939 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx) 4940 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx) 4941 // as a result we shouldn't use until tested at runtime... 4942 ShouldNotReachHere(); 4943 InstructionMark im(this); 4944 int encode = prefixq_and_encode(dst->encoding()); 4945 emit_byte(0xC7 | encode); 4946 emit_long(imm32); 4947 } 4948 4949 void Assembler::movslq(Address dst, int32_t imm32) { 4950 assert(is_simm32(imm32), "lost bits"); 4951 InstructionMark im(this); 4952 prefixq(dst); 4953 emit_byte(0xC7); 4954 emit_operand(rax, dst, 4); 4955 emit_long(imm32); 4956 } 4957 4958 void Assembler::movslq(Register dst, Address src) { 4959 InstructionMark im(this); 4960 prefixq(src, dst); 4961 emit_byte(0x63); 4962 emit_operand(dst, src); 4963 } 4964 4965 void Assembler::movslq(Register dst, Register src) { 4966 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4967 emit_byte(0x63); 4968 emit_byte(0xC0 | encode); 4969 } 4970 4971 void Assembler::movswq(Register dst, Address src) { 4972 InstructionMark im(this); 4973 prefixq(src, dst); 4974 emit_byte(0x0F); 4975 emit_byte(0xBF); 4976 emit_operand(dst, src); 4977 } 4978 4979 void Assembler::movswq(Register dst, Register src) { 4980 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4981 emit_byte(0x0F); 4982 emit_byte(0xBF); 4983 emit_byte(0xC0 | encode); 4984 } 4985 4986 void Assembler::movzbq(Register dst, Address src) { 4987 InstructionMark im(this); 4988 prefixq(src, dst); 4989 emit_byte(0x0F); 4990 emit_byte(0xB6); 4991 emit_operand(dst, src); 4992 } 4993 4994 void Assembler::movzbq(Register dst, Register src) { 4995 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4996 emit_byte(0x0F); 4997 emit_byte(0xB6); 4998 emit_byte(0xC0 | encode); 4999 } 5000 5001 void Assembler::movzwq(Register dst, Address src) { 5002 InstructionMark im(this); 5003 prefixq(src, dst); 5004 emit_byte(0x0F); 5005 emit_byte(0xB7); 5006 emit_operand(dst, src); 5007 } 5008 5009 void Assembler::movzwq(Register dst, Register src) { 5010 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5011 emit_byte(0x0F); 5012 emit_byte(0xB7); 5013 emit_byte(0xC0 | encode); 5014 } 5015 5016 void Assembler::negq(Register dst) { 5017 int encode = prefixq_and_encode(dst->encoding()); 5018 emit_byte(0xF7); 5019 emit_byte(0xD8 | encode); 5020 } 5021 5022 void Assembler::notq(Register dst) { 5023 int encode = prefixq_and_encode(dst->encoding()); 5024 emit_byte(0xF7); 5025 emit_byte(0xD0 | encode); 5026 } 5027 5028 void Assembler::orq(Address dst, int32_t imm32) { 5029 InstructionMark im(this); 5030 prefixq(dst); 5031 emit_byte(0x81); 5032 emit_operand(rcx, dst, 4); 5033 emit_long(imm32); 5034 } 5035 5036 void Assembler::orq(Register dst, int32_t imm32) { 5037 (void) prefixq_and_encode(dst->encoding()); 5038 emit_arith(0x81, 0xC8, dst, imm32); 5039 } 5040 5041 void Assembler::orq(Register dst, Address src) { 5042 InstructionMark im(this); 5043 prefixq(src, dst); 5044 emit_byte(0x0B); 5045 emit_operand(dst, src); 5046 } 5047 5048 void Assembler::orq(Register dst, Register src) { 5049 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5050 emit_arith(0x0B, 0xC0, dst, src); 5051 } 5052 5053 void Assembler::popa() { // 64bit 5054 movq(r15, Address(rsp, 0)); 5055 movq(r14, Address(rsp, wordSize)); 5056 movq(r13, Address(rsp, 2 * wordSize)); 5057 movq(r12, Address(rsp, 3 * wordSize)); 5058 movq(r11, Address(rsp, 4 * wordSize)); 5059 movq(r10, Address(rsp, 5 * wordSize)); 5060 movq(r9, Address(rsp, 6 * wordSize)); 5061 movq(r8, Address(rsp, 7 * wordSize)); 5062 movq(rdi, Address(rsp, 8 * wordSize)); 5063 movq(rsi, Address(rsp, 9 * wordSize)); 5064 movq(rbp, Address(rsp, 10 * wordSize)); 5065 // skip rsp 5066 movq(rbx, Address(rsp, 12 * wordSize)); 5067 movq(rdx, Address(rsp, 13 * wordSize)); 5068 movq(rcx, Address(rsp, 14 * wordSize)); 5069 movq(rax, Address(rsp, 15 * wordSize)); 5070 5071 addq(rsp, 16 * wordSize); 5072 } 5073 5074 void Assembler::popcntq(Register dst, Address src) { 5075 assert(VM_Version::supports_popcnt(), "must support"); 5076 InstructionMark im(this); 5077 emit_byte(0xF3); 5078 prefixq(src, dst); 5079 emit_byte(0x0F); 5080 emit_byte(0xB8); 5081 emit_operand(dst, src); 5082 } 5083 5084 void Assembler::popcntq(Register dst, Register src) { 5085 assert(VM_Version::supports_popcnt(), "must support"); 5086 emit_byte(0xF3); 5087 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5088 emit_byte(0x0F); 5089 emit_byte(0xB8); 5090 emit_byte(0xC0 | encode); 5091 } 5092 5093 void Assembler::popq(Address dst) { 5094 InstructionMark im(this); 5095 prefixq(dst); 5096 emit_byte(0x8F); 5097 emit_operand(rax, dst); 5098 } 5099 5100 void Assembler::pusha() { // 64bit 5101 // we have to store original rsp. ABI says that 128 bytes 5102 // below rsp are local scratch. 5103 movq(Address(rsp, -5 * wordSize), rsp); 5104 5105 subq(rsp, 16 * wordSize); 5106 5107 movq(Address(rsp, 15 * wordSize), rax); 5108 movq(Address(rsp, 14 * wordSize), rcx); 5109 movq(Address(rsp, 13 * wordSize), rdx); 5110 movq(Address(rsp, 12 * wordSize), rbx); 5111 // skip rsp 5112 movq(Address(rsp, 10 * wordSize), rbp); 5113 movq(Address(rsp, 9 * wordSize), rsi); 5114 movq(Address(rsp, 8 * wordSize), rdi); 5115 movq(Address(rsp, 7 * wordSize), r8); 5116 movq(Address(rsp, 6 * wordSize), r9); 5117 movq(Address(rsp, 5 * wordSize), r10); 5118 movq(Address(rsp, 4 * wordSize), r11); 5119 movq(Address(rsp, 3 * wordSize), r12); 5120 movq(Address(rsp, 2 * wordSize), r13); 5121 movq(Address(rsp, wordSize), r14); 5122 movq(Address(rsp, 0), r15); 5123 } 5124 5125 void Assembler::pushq(Address src) { 5126 InstructionMark im(this); 5127 prefixq(src); 5128 emit_byte(0xFF); 5129 emit_operand(rsi, src); 5130 } 5131 5132 void Assembler::rclq(Register dst, int imm8) { 5133 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5134 int encode = prefixq_and_encode(dst->encoding()); 5135 if (imm8 == 1) { 5136 emit_byte(0xD1); 5137 emit_byte(0xD0 | encode); 5138 } else { 5139 emit_byte(0xC1); 5140 emit_byte(0xD0 | encode); 5141 emit_byte(imm8); 5142 } 5143 } 5144 void Assembler::sarq(Register dst, int imm8) { 5145 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5146 int encode = prefixq_and_encode(dst->encoding()); 5147 if (imm8 == 1) { 5148 emit_byte(0xD1); 5149 emit_byte(0xF8 | encode); 5150 } else { 5151 emit_byte(0xC1); 5152 emit_byte(0xF8 | encode); 5153 emit_byte(imm8); 5154 } 5155 } 5156 5157 void Assembler::sarq(Register dst) { 5158 int encode = prefixq_and_encode(dst->encoding()); 5159 emit_byte(0xD3); 5160 emit_byte(0xF8 | encode); 5161 } 5162 5163 void Assembler::sbbq(Address dst, int32_t imm32) { 5164 InstructionMark im(this); 5165 prefixq(dst); 5166 emit_arith_operand(0x81, rbx, dst, imm32); 5167 } 5168 5169 void Assembler::sbbq(Register dst, int32_t imm32) { 5170 (void) prefixq_and_encode(dst->encoding()); 5171 emit_arith(0x81, 0xD8, dst, imm32); 5172 } 5173 5174 void Assembler::sbbq(Register dst, Address src) { 5175 InstructionMark im(this); 5176 prefixq(src, dst); 5177 emit_byte(0x1B); 5178 emit_operand(dst, src); 5179 } 5180 5181 void Assembler::sbbq(Register dst, Register src) { 5182 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5183 emit_arith(0x1B, 0xC0, dst, src); 5184 } 5185 5186 void Assembler::shlq(Register dst, int imm8) { 5187 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5188 int encode = prefixq_and_encode(dst->encoding()); 5189 if (imm8 == 1) { 5190 emit_byte(0xD1); 5191 emit_byte(0xE0 | encode); 5192 } else { 5193 emit_byte(0xC1); 5194 emit_byte(0xE0 | encode); 5195 emit_byte(imm8); 5196 } 5197 } 5198 5199 void Assembler::shlq(Register dst) { 5200 int encode = prefixq_and_encode(dst->encoding()); 5201 emit_byte(0xD3); 5202 emit_byte(0xE0 | encode); 5203 } 5204 5205 void Assembler::shrq(Register dst, int imm8) { 5206 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5207 int encode = prefixq_and_encode(dst->encoding()); 5208 emit_byte(0xC1); 5209 emit_byte(0xE8 | encode); 5210 emit_byte(imm8); 5211 } 5212 5213 void Assembler::shrq(Register dst) { 5214 int encode = prefixq_and_encode(dst->encoding()); 5215 emit_byte(0xD3); 5216 emit_byte(0xE8 | encode); 5217 } 5218 5219 void Assembler::subq(Address dst, int32_t imm32) { 5220 InstructionMark im(this); 5221 prefixq(dst); 5222 emit_arith_operand(0x81, rbp, dst, imm32); 5223 } 5224 5225 void Assembler::subq(Address dst, Register src) { 5226 InstructionMark im(this); 5227 prefixq(dst, src); 5228 emit_byte(0x29); 5229 emit_operand(src, dst); 5230 } 5231 5232 void Assembler::subq(Register dst, int32_t imm32) { 5233 (void) prefixq_and_encode(dst->encoding()); 5234 emit_arith(0x81, 0xE8, dst, imm32); 5235 } 5236 5237 // Force generation of a 4 byte immediate value even if it fits into 8bit 5238 void Assembler::subq_imm32(Register dst, int32_t imm32) { 5239 (void) prefixq_and_encode(dst->encoding()); 5240 emit_arith_imm32(0x81, 0xE8, dst, imm32); 5241 } 5242 5243 void Assembler::subq(Register dst, Address src) { 5244 InstructionMark im(this); 5245 prefixq(src, dst); 5246 emit_byte(0x2B); 5247 emit_operand(dst, src); 5248 } 5249 5250 void Assembler::subq(Register dst, Register src) { 5251 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5252 emit_arith(0x2B, 0xC0, dst, src); 5253 } 5254 5255 void Assembler::testq(Register dst, int32_t imm32) { 5256 // not using emit_arith because test 5257 // doesn't support sign-extension of 5258 // 8bit operands 5259 int encode = dst->encoding(); 5260 if (encode == 0) { 5261 prefix(REX_W); 5262 emit_byte(0xA9); 5263 } else { 5264 encode = prefixq_and_encode(encode); 5265 emit_byte(0xF7); 5266 emit_byte(0xC0 | encode); 5267 } 5268 emit_long(imm32); 5269 } 5270 5271 void Assembler::testq(Register dst, Register src) { 5272 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5273 emit_arith(0x85, 0xC0, dst, src); 5274 } 5275 5276 void Assembler::xaddq(Address dst, Register src) { 5277 InstructionMark im(this); 5278 prefixq(dst, src); 5279 emit_byte(0x0F); 5280 emit_byte(0xC1); 5281 emit_operand(src, dst); 5282 } 5283 5284 void Assembler::xchgq(Register dst, Address src) { 5285 InstructionMark im(this); 5286 prefixq(src, dst); 5287 emit_byte(0x87); 5288 emit_operand(dst, src); 5289 } 5290 5291 void Assembler::xchgq(Register dst, Register src) { 5292 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5293 emit_byte(0x87); 5294 emit_byte(0xc0 | encode); 5295 } 5296 5297 void Assembler::xorq(Register dst, Register src) { 5298 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5299 emit_arith(0x33, 0xC0, dst, src); 5300 } 5301 5302 void Assembler::xorq(Register dst, Address src) { 5303 InstructionMark im(this); 5304 prefixq(src, dst); 5305 emit_byte(0x33); 5306 emit_operand(dst, src); 5307 } 5308 5309 #endif // !LP64 5310 5311 static Assembler::Condition reverse[] = { 5312 Assembler::noOverflow /* overflow = 0x0 */ , 5313 Assembler::overflow /* noOverflow = 0x1 */ , 5314 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 5315 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 5316 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 5317 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 5318 Assembler::above /* belowEqual = 0x6 */ , 5319 Assembler::belowEqual /* above = 0x7 */ , 5320 Assembler::positive /* negative = 0x8 */ , 5321 Assembler::negative /* positive = 0x9 */ , 5322 Assembler::noParity /* parity = 0xa */ , 5323 Assembler::parity /* noParity = 0xb */ , 5324 Assembler::greaterEqual /* less = 0xc */ , 5325 Assembler::less /* greaterEqual = 0xd */ , 5326 Assembler::greater /* lessEqual = 0xe */ , 5327 Assembler::lessEqual /* greater = 0xf, */ 5328 5329 }; 5330 5331 5332 // Implementation of MacroAssembler 5333 5334 // First all the versions that have distinct versions depending on 32/64 bit 5335 // Unless the difference is trivial (1 line or so). 5336 5337 #ifndef _LP64 5338 5339 // 32bit versions 5340 5341 Address MacroAssembler::as_Address(AddressLiteral adr) { 5342 return Address(adr.target(), adr.rspec()); 5343 } 5344 5345 Address MacroAssembler::as_Address(ArrayAddress adr) { 5346 return Address::make_array(adr); 5347 } 5348 5349 int MacroAssembler::biased_locking_enter(Register lock_reg, 5350 Register obj_reg, 5351 Register swap_reg, 5352 Register tmp_reg, 5353 bool swap_reg_contains_mark, 5354 Label& done, 5355 Label* slow_case, 5356 BiasedLockingCounters* counters) { 5357 assert(UseBiasedLocking, "why call this otherwise?"); 5358 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 5359 assert_different_registers(lock_reg, obj_reg, swap_reg); 5360 5361 if (PrintBiasedLockingStatistics && counters == NULL) 5362 counters = BiasedLocking::counters(); 5363 5364 bool need_tmp_reg = false; 5365 if (tmp_reg == noreg) { 5366 need_tmp_reg = true; 5367 tmp_reg = lock_reg; 5368 } else { 5369 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 5370 } 5371 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 5372 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 5373 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 5374 Address saved_mark_addr(lock_reg, 0); 5375 5376 // Biased locking 5377 // See whether the lock is currently biased toward our thread and 5378 // whether the epoch is still valid 5379 // Note that the runtime guarantees sufficient alignment of JavaThread 5380 // pointers to allow age to be placed into low bits 5381 // First check to see whether biasing is even enabled for this object 5382 Label cas_label; 5383 int null_check_offset = -1; 5384 if (!swap_reg_contains_mark) { 5385 null_check_offset = offset(); 5386 movl(swap_reg, mark_addr); 5387 } 5388 if (need_tmp_reg) { 5389 push(tmp_reg); 5390 } 5391 movl(tmp_reg, swap_reg); 5392 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5393 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 5394 if (need_tmp_reg) { 5395 pop(tmp_reg); 5396 } 5397 jcc(Assembler::notEqual, cas_label); 5398 // The bias pattern is present in the object's header. Need to check 5399 // whether the bias owner and the epoch are both still current. 5400 // Note that because there is no current thread register on x86 we 5401 // need to store off the mark word we read out of the object to 5402 // avoid reloading it and needing to recheck invariants below. This 5403 // store is unfortunate but it makes the overall code shorter and 5404 // simpler. 5405 movl(saved_mark_addr, swap_reg); 5406 if (need_tmp_reg) { 5407 push(tmp_reg); 5408 } 5409 get_thread(tmp_reg); 5410 xorl(swap_reg, tmp_reg); 5411 if (swap_reg_contains_mark) { 5412 null_check_offset = offset(); 5413 } 5414 movl(tmp_reg, klass_addr); 5415 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); 5416 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 5417 if (need_tmp_reg) { 5418 pop(tmp_reg); 5419 } 5420 if (counters != NULL) { 5421 cond_inc32(Assembler::zero, 5422 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 5423 } 5424 jcc(Assembler::equal, done); 5425 5426 Label try_revoke_bias; 5427 Label try_rebias; 5428 5429 // At this point we know that the header has the bias pattern and 5430 // that we are not the bias owner in the current epoch. We need to 5431 // figure out more details about the state of the header in order to 5432 // know what operations can be legally performed on the object's 5433 // header. 5434 5435 // If the low three bits in the xor result aren't clear, that means 5436 // the prototype header is no longer biased and we have to revoke 5437 // the bias on this object. 5438 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 5439 jcc(Assembler::notZero, try_revoke_bias); 5440 5441 // Biasing is still enabled for this data type. See whether the 5442 // epoch of the current bias is still valid, meaning that the epoch 5443 // bits of the mark word are equal to the epoch bits of the 5444 // prototype header. (Note that the prototype header's epoch bits 5445 // only change at a safepoint.) If not, attempt to rebias the object 5446 // toward the current thread. Note that we must be absolutely sure 5447 // that the current epoch is invalid in order to do this because 5448 // otherwise the manipulations it performs on the mark word are 5449 // illegal. 5450 testl(swap_reg, markOopDesc::epoch_mask_in_place); 5451 jcc(Assembler::notZero, try_rebias); 5452 5453 // The epoch of the current bias is still valid but we know nothing 5454 // about the owner; it might be set or it might be clear. Try to 5455 // acquire the bias of the object using an atomic operation. If this 5456 // fails we will go in to the runtime to revoke the object's bias. 5457 // Note that we first construct the presumed unbiased header so we 5458 // don't accidentally blow away another thread's valid bias. 5459 movl(swap_reg, saved_mark_addr); 5460 andl(swap_reg, 5461 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 5462 if (need_tmp_reg) { 5463 push(tmp_reg); 5464 } 5465 get_thread(tmp_reg); 5466 orl(tmp_reg, swap_reg); 5467 if (os::is_MP()) { 5468 lock(); 5469 } 5470 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5471 if (need_tmp_reg) { 5472 pop(tmp_reg); 5473 } 5474 // If the biasing toward our thread failed, this means that 5475 // another thread succeeded in biasing it toward itself and we 5476 // need to revoke that bias. The revocation will occur in the 5477 // interpreter runtime in the slow case. 5478 if (counters != NULL) { 5479 cond_inc32(Assembler::zero, 5480 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 5481 } 5482 if (slow_case != NULL) { 5483 jcc(Assembler::notZero, *slow_case); 5484 } 5485 jmp(done); 5486 5487 bind(try_rebias); 5488 // At this point we know the epoch has expired, meaning that the 5489 // current "bias owner", if any, is actually invalid. Under these 5490 // circumstances _only_, we are allowed to use the current header's 5491 // value as the comparison value when doing the cas to acquire the 5492 // bias in the current epoch. In other words, we allow transfer of 5493 // the bias from one thread to another directly in this situation. 5494 // 5495 // FIXME: due to a lack of registers we currently blow away the age 5496 // bits in this situation. Should attempt to preserve them. 5497 if (need_tmp_reg) { 5498 push(tmp_reg); 5499 } 5500 get_thread(tmp_reg); 5501 movl(swap_reg, klass_addr); 5502 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); 5503 movl(swap_reg, saved_mark_addr); 5504 if (os::is_MP()) { 5505 lock(); 5506 } 5507 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5508 if (need_tmp_reg) { 5509 pop(tmp_reg); 5510 } 5511 // If the biasing toward our thread failed, then another thread 5512 // succeeded in biasing it toward itself and we need to revoke that 5513 // bias. The revocation will occur in the runtime in the slow case. 5514 if (counters != NULL) { 5515 cond_inc32(Assembler::zero, 5516 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 5517 } 5518 if (slow_case != NULL) { 5519 jcc(Assembler::notZero, *slow_case); 5520 } 5521 jmp(done); 5522 5523 bind(try_revoke_bias); 5524 // The prototype mark in the klass doesn't have the bias bit set any 5525 // more, indicating that objects of this data type are not supposed 5526 // to be biased any more. We are going to try to reset the mark of 5527 // this object to the prototype value and fall through to the 5528 // CAS-based locking scheme. Note that if our CAS fails, it means 5529 // that another thread raced us for the privilege of revoking the 5530 // bias of this particular object, so it's okay to continue in the 5531 // normal locking code. 5532 // 5533 // FIXME: due to a lack of registers we currently blow away the age 5534 // bits in this situation. Should attempt to preserve them. 5535 movl(swap_reg, saved_mark_addr); 5536 if (need_tmp_reg) { 5537 push(tmp_reg); 5538 } 5539 movl(tmp_reg, klass_addr); 5540 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 5541 if (os::is_MP()) { 5542 lock(); 5543 } 5544 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5545 if (need_tmp_reg) { 5546 pop(tmp_reg); 5547 } 5548 // Fall through to the normal CAS-based lock, because no matter what 5549 // the result of the above CAS, some thread must have succeeded in 5550 // removing the bias bit from the object's header. 5551 if (counters != NULL) { 5552 cond_inc32(Assembler::zero, 5553 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 5554 } 5555 5556 bind(cas_label); 5557 5558 return null_check_offset; 5559 } 5560 void MacroAssembler::call_VM_leaf_base(address entry_point, 5561 int number_of_arguments) { 5562 call(RuntimeAddress(entry_point)); 5563 increment(rsp, number_of_arguments * wordSize); 5564 } 5565 5566 void MacroAssembler::cmpoop(Address src1, jobject obj) { 5567 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5568 } 5569 5570 void MacroAssembler::cmpoop(Register src1, jobject obj) { 5571 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5572 } 5573 5574 void MacroAssembler::extend_sign(Register hi, Register lo) { 5575 // According to Intel Doc. AP-526, "Integer Divide", p.18. 5576 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 5577 cdql(); 5578 } else { 5579 movl(hi, lo); 5580 sarl(hi, 31); 5581 } 5582 } 5583 5584 void MacroAssembler::jC2(Register tmp, Label& L) { 5585 // set parity bit if FPU flag C2 is set (via rax) 5586 save_rax(tmp); 5587 fwait(); fnstsw_ax(); 5588 sahf(); 5589 restore_rax(tmp); 5590 // branch 5591 jcc(Assembler::parity, L); 5592 } 5593 5594 void MacroAssembler::jnC2(Register tmp, Label& L) { 5595 // set parity bit if FPU flag C2 is set (via rax) 5596 save_rax(tmp); 5597 fwait(); fnstsw_ax(); 5598 sahf(); 5599 restore_rax(tmp); 5600 // branch 5601 jcc(Assembler::noParity, L); 5602 } 5603 5604 // 32bit can do a case table jump in one instruction but we no longer allow the base 5605 // to be installed in the Address class 5606 void MacroAssembler::jump(ArrayAddress entry) { 5607 jmp(as_Address(entry)); 5608 } 5609 5610 // Note: y_lo will be destroyed 5611 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5612 // Long compare for Java (semantics as described in JVM spec.) 5613 Label high, low, done; 5614 5615 cmpl(x_hi, y_hi); 5616 jcc(Assembler::less, low); 5617 jcc(Assembler::greater, high); 5618 // x_hi is the return register 5619 xorl(x_hi, x_hi); 5620 cmpl(x_lo, y_lo); 5621 jcc(Assembler::below, low); 5622 jcc(Assembler::equal, done); 5623 5624 bind(high); 5625 xorl(x_hi, x_hi); 5626 increment(x_hi); 5627 jmp(done); 5628 5629 bind(low); 5630 xorl(x_hi, x_hi); 5631 decrementl(x_hi); 5632 5633 bind(done); 5634 } 5635 5636 void MacroAssembler::lea(Register dst, AddressLiteral src) { 5637 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 5638 } 5639 5640 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5641 // leal(dst, as_Address(adr)); 5642 // see note in movl as to why we must use a move 5643 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 5644 } 5645 5646 void MacroAssembler::leave() { 5647 mov(rsp, rbp); 5648 pop(rbp); 5649 } 5650 5651 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 5652 // Multiplication of two Java long values stored on the stack 5653 // as illustrated below. Result is in rdx:rax. 5654 // 5655 // rsp ---> [ ?? ] \ \ 5656 // .... | y_rsp_offset | 5657 // [ y_lo ] / (in bytes) | x_rsp_offset 5658 // [ y_hi ] | (in bytes) 5659 // .... | 5660 // [ x_lo ] / 5661 // [ x_hi ] 5662 // .... 5663 // 5664 // Basic idea: lo(result) = lo(x_lo * y_lo) 5665 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 5666 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 5667 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 5668 Label quick; 5669 // load x_hi, y_hi and check if quick 5670 // multiplication is possible 5671 movl(rbx, x_hi); 5672 movl(rcx, y_hi); 5673 movl(rax, rbx); 5674 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 5675 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 5676 // do full multiplication 5677 // 1st step 5678 mull(y_lo); // x_hi * y_lo 5679 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 5680 // 2nd step 5681 movl(rax, x_lo); 5682 mull(rcx); // x_lo * y_hi 5683 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 5684 // 3rd step 5685 bind(quick); // note: rbx, = 0 if quick multiply! 5686 movl(rax, x_lo); 5687 mull(y_lo); // x_lo * y_lo 5688 addl(rdx, rbx); // correct hi(x_lo * y_lo) 5689 } 5690 5691 void MacroAssembler::lneg(Register hi, Register lo) { 5692 negl(lo); 5693 adcl(hi, 0); 5694 negl(hi); 5695 } 5696 5697 void MacroAssembler::lshl(Register hi, Register lo) { 5698 // Java shift left long support (semantics as described in JVM spec., p.305) 5699 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 5700 // shift value is in rcx ! 5701 assert(hi != rcx, "must not use rcx"); 5702 assert(lo != rcx, "must not use rcx"); 5703 const Register s = rcx; // shift count 5704 const int n = BitsPerWord; 5705 Label L; 5706 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5707 cmpl(s, n); // if (s < n) 5708 jcc(Assembler::less, L); // else (s >= n) 5709 movl(hi, lo); // x := x << n 5710 xorl(lo, lo); 5711 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5712 bind(L); // s (mod n) < n 5713 shldl(hi, lo); // x := x << s 5714 shll(lo); 5715 } 5716 5717 5718 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 5719 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 5720 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 5721 assert(hi != rcx, "must not use rcx"); 5722 assert(lo != rcx, "must not use rcx"); 5723 const Register s = rcx; // shift count 5724 const int n = BitsPerWord; 5725 Label L; 5726 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5727 cmpl(s, n); // if (s < n) 5728 jcc(Assembler::less, L); // else (s >= n) 5729 movl(lo, hi); // x := x >> n 5730 if (sign_extension) sarl(hi, 31); 5731 else xorl(hi, hi); 5732 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5733 bind(L); // s (mod n) < n 5734 shrdl(lo, hi); // x := x >> s 5735 if (sign_extension) sarl(hi); 5736 else shrl(hi); 5737 } 5738 5739 void MacroAssembler::movoop(Register dst, jobject obj) { 5740 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5741 } 5742 5743 void MacroAssembler::movoop(Address dst, jobject obj) { 5744 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5745 } 5746 5747 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5748 if (src.is_lval()) { 5749 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 5750 } else { 5751 movl(dst, as_Address(src)); 5752 } 5753 } 5754 5755 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5756 movl(as_Address(dst), src); 5757 } 5758 5759 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5760 movl(dst, as_Address(src)); 5761 } 5762 5763 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 5764 void MacroAssembler::movptr(Address dst, intptr_t src) { 5765 movl(dst, src); 5766 } 5767 5768 5769 void MacroAssembler::pop_callee_saved_registers() { 5770 pop(rcx); 5771 pop(rdx); 5772 pop(rdi); 5773 pop(rsi); 5774 } 5775 5776 void MacroAssembler::pop_fTOS() { 5777 fld_d(Address(rsp, 0)); 5778 addl(rsp, 2 * wordSize); 5779 } 5780 5781 void MacroAssembler::push_callee_saved_registers() { 5782 push(rsi); 5783 push(rdi); 5784 push(rdx); 5785 push(rcx); 5786 } 5787 5788 void MacroAssembler::push_fTOS() { 5789 subl(rsp, 2 * wordSize); 5790 fstp_d(Address(rsp, 0)); 5791 } 5792 5793 5794 void MacroAssembler::pushoop(jobject obj) { 5795 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 5796 } 5797 5798 5799 void MacroAssembler::pushptr(AddressLiteral src) { 5800 if (src.is_lval()) { 5801 push_literal32((int32_t)src.target(), src.rspec()); 5802 } else { 5803 pushl(as_Address(src)); 5804 } 5805 } 5806 5807 void MacroAssembler::set_word_if_not_zero(Register dst) { 5808 xorl(dst, dst); 5809 set_byte_if_not_zero(dst); 5810 } 5811 5812 static void pass_arg0(MacroAssembler* masm, Register arg) { 5813 masm->push(arg); 5814 } 5815 5816 static void pass_arg1(MacroAssembler* masm, Register arg) { 5817 masm->push(arg); 5818 } 5819 5820 static void pass_arg2(MacroAssembler* masm, Register arg) { 5821 masm->push(arg); 5822 } 5823 5824 static void pass_arg3(MacroAssembler* masm, Register arg) { 5825 masm->push(arg); 5826 } 5827 5828 #ifndef PRODUCT 5829 extern "C" void findpc(intptr_t x); 5830 #endif 5831 5832 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 5833 // In order to get locks to work, we need to fake a in_VM state 5834 JavaThread* thread = JavaThread::current(); 5835 JavaThreadState saved_state = thread->thread_state(); 5836 thread->set_thread_state(_thread_in_vm); 5837 if (ShowMessageBoxOnError) { 5838 JavaThread* thread = JavaThread::current(); 5839 JavaThreadState saved_state = thread->thread_state(); 5840 thread->set_thread_state(_thread_in_vm); 5841 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5842 ttyLocker ttyl; 5843 BytecodeCounter::print(); 5844 } 5845 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5846 // This is the value of eip which points to where verify_oop will return. 5847 if (os::message_box(msg, "Execution stopped, print registers?")) { 5848 ttyLocker ttyl; 5849 tty->print_cr("eip = 0x%08x", eip); 5850 #ifndef PRODUCT 5851 if ((WizardMode || Verbose) && PrintMiscellaneous) { 5852 tty->cr(); 5853 findpc(eip); 5854 tty->cr(); 5855 } 5856 #endif 5857 tty->print_cr("rax = 0x%08x", rax); 5858 tty->print_cr("rbx = 0x%08x", rbx); 5859 tty->print_cr("rcx = 0x%08x", rcx); 5860 tty->print_cr("rdx = 0x%08x", rdx); 5861 tty->print_cr("rdi = 0x%08x", rdi); 5862 tty->print_cr("rsi = 0x%08x", rsi); 5863 tty->print_cr("rbp = 0x%08x", rbp); 5864 tty->print_cr("rsp = 0x%08x", rsp); 5865 BREAKPOINT; 5866 assert(false, "start up GDB"); 5867 } 5868 } else { 5869 ttyLocker ttyl; 5870 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 5871 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 5872 } 5873 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5874 } 5875 5876 void MacroAssembler::stop(const char* msg) { 5877 ExternalAddress message((address)msg); 5878 // push address of message 5879 pushptr(message.addr()); 5880 { Label L; call(L, relocInfo::none); bind(L); } // push eip 5881 pusha(); // push registers 5882 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 5883 hlt(); 5884 } 5885 5886 void MacroAssembler::warn(const char* msg) { 5887 push_CPU_state(); 5888 5889 ExternalAddress message((address) msg); 5890 // push address of message 5891 pushptr(message.addr()); 5892 5893 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 5894 addl(rsp, wordSize); // discard argument 5895 pop_CPU_state(); 5896 } 5897 5898 #else // _LP64 5899 5900 // 64 bit versions 5901 5902 Address MacroAssembler::as_Address(AddressLiteral adr) { 5903 // amd64 always does this as a pc-rel 5904 // we can be absolute or disp based on the instruction type 5905 // jmp/call are displacements others are absolute 5906 assert(!adr.is_lval(), "must be rval"); 5907 assert(reachable(adr), "must be"); 5908 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 5909 5910 } 5911 5912 Address MacroAssembler::as_Address(ArrayAddress adr) { 5913 AddressLiteral base = adr.base(); 5914 lea(rscratch1, base); 5915 Address index = adr.index(); 5916 assert(index._disp == 0, "must not have disp"); // maybe it can? 5917 Address array(rscratch1, index._index, index._scale, index._disp); 5918 return array; 5919 } 5920 5921 int MacroAssembler::biased_locking_enter(Register lock_reg, 5922 Register obj_reg, 5923 Register swap_reg, 5924 Register tmp_reg, 5925 bool swap_reg_contains_mark, 5926 Label& done, 5927 Label* slow_case, 5928 BiasedLockingCounters* counters) { 5929 assert(UseBiasedLocking, "why call this otherwise?"); 5930 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 5931 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 5932 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 5933 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 5934 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 5935 Address saved_mark_addr(lock_reg, 0); 5936 5937 if (PrintBiasedLockingStatistics && counters == NULL) 5938 counters = BiasedLocking::counters(); 5939 5940 // Biased locking 5941 // See whether the lock is currently biased toward our thread and 5942 // whether the epoch is still valid 5943 // Note that the runtime guarantees sufficient alignment of JavaThread 5944 // pointers to allow age to be placed into low bits 5945 // First check to see whether biasing is even enabled for this object 5946 Label cas_label; 5947 int null_check_offset = -1; 5948 if (!swap_reg_contains_mark) { 5949 null_check_offset = offset(); 5950 movq(swap_reg, mark_addr); 5951 } 5952 movq(tmp_reg, swap_reg); 5953 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5954 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 5955 jcc(Assembler::notEqual, cas_label); 5956 // The bias pattern is present in the object's header. Need to check 5957 // whether the bias owner and the epoch are both still current. 5958 load_prototype_header(tmp_reg, obj_reg); 5959 orq(tmp_reg, r15_thread); 5960 xorq(tmp_reg, swap_reg); 5961 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 5962 if (counters != NULL) { 5963 cond_inc32(Assembler::zero, 5964 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 5965 } 5966 jcc(Assembler::equal, done); 5967 5968 Label try_revoke_bias; 5969 Label try_rebias; 5970 5971 // At this point we know that the header has the bias pattern and 5972 // that we are not the bias owner in the current epoch. We need to 5973 // figure out more details about the state of the header in order to 5974 // know what operations can be legally performed on the object's 5975 // header. 5976 5977 // If the low three bits in the xor result aren't clear, that means 5978 // the prototype header is no longer biased and we have to revoke 5979 // the bias on this object. 5980 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5981 jcc(Assembler::notZero, try_revoke_bias); 5982 5983 // Biasing is still enabled for this data type. See whether the 5984 // epoch of the current bias is still valid, meaning that the epoch 5985 // bits of the mark word are equal to the epoch bits of the 5986 // prototype header. (Note that the prototype header's epoch bits 5987 // only change at a safepoint.) If not, attempt to rebias the object 5988 // toward the current thread. Note that we must be absolutely sure 5989 // that the current epoch is invalid in order to do this because 5990 // otherwise the manipulations it performs on the mark word are 5991 // illegal. 5992 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 5993 jcc(Assembler::notZero, try_rebias); 5994 5995 // The epoch of the current bias is still valid but we know nothing 5996 // about the owner; it might be set or it might be clear. Try to 5997 // acquire the bias of the object using an atomic operation. If this 5998 // fails we will go in to the runtime to revoke the object's bias. 5999 // Note that we first construct the presumed unbiased header so we 6000 // don't accidentally blow away another thread's valid bias. 6001 andq(swap_reg, 6002 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 6003 movq(tmp_reg, swap_reg); 6004 orq(tmp_reg, r15_thread); 6005 if (os::is_MP()) { 6006 lock(); 6007 } 6008 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 6009 // If the biasing toward our thread failed, this means that 6010 // another thread succeeded in biasing it toward itself and we 6011 // need to revoke that bias. The revocation will occur in the 6012 // interpreter runtime in the slow case. 6013 if (counters != NULL) { 6014 cond_inc32(Assembler::zero, 6015 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 6016 } 6017 if (slow_case != NULL) { 6018 jcc(Assembler::notZero, *slow_case); 6019 } 6020 jmp(done); 6021 6022 bind(try_rebias); 6023 // At this point we know the epoch has expired, meaning that the 6024 // current "bias owner", if any, is actually invalid. Under these 6025 // circumstances _only_, we are allowed to use the current header's 6026 // value as the comparison value when doing the cas to acquire the 6027 // bias in the current epoch. In other words, we allow transfer of 6028 // the bias from one thread to another directly in this situation. 6029 // 6030 // FIXME: due to a lack of registers we currently blow away the age 6031 // bits in this situation. Should attempt to preserve them. 6032 load_prototype_header(tmp_reg, obj_reg); 6033 orq(tmp_reg, r15_thread); 6034 if (os::is_MP()) { 6035 lock(); 6036 } 6037 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 6038 // If the biasing toward our thread failed, then another thread 6039 // succeeded in biasing it toward itself and we need to revoke that 6040 // bias. The revocation will occur in the runtime in the slow case. 6041 if (counters != NULL) { 6042 cond_inc32(Assembler::zero, 6043 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 6044 } 6045 if (slow_case != NULL) { 6046 jcc(Assembler::notZero, *slow_case); 6047 } 6048 jmp(done); 6049 6050 bind(try_revoke_bias); 6051 // The prototype mark in the klass doesn't have the bias bit set any 6052 // more, indicating that objects of this data type are not supposed 6053 // to be biased any more. We are going to try to reset the mark of 6054 // this object to the prototype value and fall through to the 6055 // CAS-based locking scheme. Note that if our CAS fails, it means 6056 // that another thread raced us for the privilege of revoking the 6057 // bias of this particular object, so it's okay to continue in the 6058 // normal locking code. 6059 // 6060 // FIXME: due to a lack of registers we currently blow away the age 6061 // bits in this situation. Should attempt to preserve them. 6062 load_prototype_header(tmp_reg, obj_reg); 6063 if (os::is_MP()) { 6064 lock(); 6065 } 6066 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 6067 // Fall through to the normal CAS-based lock, because no matter what 6068 // the result of the above CAS, some thread must have succeeded in 6069 // removing the bias bit from the object's header. 6070 if (counters != NULL) { 6071 cond_inc32(Assembler::zero, 6072 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 6073 } 6074 6075 bind(cas_label); 6076 6077 return null_check_offset; 6078 } 6079 6080 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 6081 Label L, E; 6082 6083 #ifdef _WIN64 6084 // Windows always allocates space for it's register args 6085 assert(num_args <= 4, "only register arguments supported"); 6086 subq(rsp, frame::arg_reg_save_area_bytes); 6087 #endif 6088 6089 // Align stack if necessary 6090 testl(rsp, 15); 6091 jcc(Assembler::zero, L); 6092 6093 subq(rsp, 8); 6094 { 6095 call(RuntimeAddress(entry_point)); 6096 } 6097 addq(rsp, 8); 6098 jmp(E); 6099 6100 bind(L); 6101 { 6102 call(RuntimeAddress(entry_point)); 6103 } 6104 6105 bind(E); 6106 6107 #ifdef _WIN64 6108 // restore stack pointer 6109 addq(rsp, frame::arg_reg_save_area_bytes); 6110 #endif 6111 6112 } 6113 6114 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 6115 assert(!src2.is_lval(), "should use cmpptr"); 6116 6117 if (reachable(src2)) { 6118 cmpq(src1, as_Address(src2)); 6119 } else { 6120 lea(rscratch1, src2); 6121 Assembler::cmpq(src1, Address(rscratch1, 0)); 6122 } 6123 } 6124 6125 int MacroAssembler::corrected_idivq(Register reg) { 6126 // Full implementation of Java ldiv and lrem; checks for special 6127 // case as described in JVM spec., p.243 & p.271. The function 6128 // returns the (pc) offset of the idivl instruction - may be needed 6129 // for implicit exceptions. 6130 // 6131 // normal case special case 6132 // 6133 // input : rax: dividend min_long 6134 // reg: divisor (may not be eax/edx) -1 6135 // 6136 // output: rax: quotient (= rax idiv reg) min_long 6137 // rdx: remainder (= rax irem reg) 0 6138 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 6139 static const int64_t min_long = 0x8000000000000000; 6140 Label normal_case, special_case; 6141 6142 // check for special case 6143 cmp64(rax, ExternalAddress((address) &min_long)); 6144 jcc(Assembler::notEqual, normal_case); 6145 xorl(rdx, rdx); // prepare rdx for possible special case (where 6146 // remainder = 0) 6147 cmpq(reg, -1); 6148 jcc(Assembler::equal, special_case); 6149 6150 // handle normal case 6151 bind(normal_case); 6152 cdqq(); 6153 int idivq_offset = offset(); 6154 idivq(reg); 6155 6156 // normal and special case exit 6157 bind(special_case); 6158 6159 return idivq_offset; 6160 } 6161 6162 void MacroAssembler::decrementq(Register reg, int value) { 6163 if (value == min_jint) { subq(reg, value); return; } 6164 if (value < 0) { incrementq(reg, -value); return; } 6165 if (value == 0) { ; return; } 6166 if (value == 1 && UseIncDec) { decq(reg) ; return; } 6167 /* else */ { subq(reg, value) ; return; } 6168 } 6169 6170 void MacroAssembler::decrementq(Address dst, int value) { 6171 if (value == min_jint) { subq(dst, value); return; } 6172 if (value < 0) { incrementq(dst, -value); return; } 6173 if (value == 0) { ; return; } 6174 if (value == 1 && UseIncDec) { decq(dst) ; return; } 6175 /* else */ { subq(dst, value) ; return; } 6176 } 6177 6178 void MacroAssembler::incrementq(Register reg, int value) { 6179 if (value == min_jint) { addq(reg, value); return; } 6180 if (value < 0) { decrementq(reg, -value); return; } 6181 if (value == 0) { ; return; } 6182 if (value == 1 && UseIncDec) { incq(reg) ; return; } 6183 /* else */ { addq(reg, value) ; return; } 6184 } 6185 6186 void MacroAssembler::incrementq(Address dst, int value) { 6187 if (value == min_jint) { addq(dst, value); return; } 6188 if (value < 0) { decrementq(dst, -value); return; } 6189 if (value == 0) { ; return; } 6190 if (value == 1 && UseIncDec) { incq(dst) ; return; } 6191 /* else */ { addq(dst, value) ; return; } 6192 } 6193 6194 // 32bit can do a case table jump in one instruction but we no longer allow the base 6195 // to be installed in the Address class 6196 void MacroAssembler::jump(ArrayAddress entry) { 6197 lea(rscratch1, entry.base()); 6198 Address dispatch = entry.index(); 6199 assert(dispatch._base == noreg, "must be"); 6200 dispatch._base = rscratch1; 6201 jmp(dispatch); 6202 } 6203 6204 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 6205 ShouldNotReachHere(); // 64bit doesn't use two regs 6206 cmpq(x_lo, y_lo); 6207 } 6208 6209 void MacroAssembler::lea(Register dst, AddressLiteral src) { 6210 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 6211 } 6212 6213 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 6214 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 6215 movptr(dst, rscratch1); 6216 } 6217 6218 void MacroAssembler::leave() { 6219 // %%% is this really better? Why not on 32bit too? 6220 emit_byte(0xC9); // LEAVE 6221 } 6222 6223 void MacroAssembler::lneg(Register hi, Register lo) { 6224 ShouldNotReachHere(); // 64bit doesn't use two regs 6225 negq(lo); 6226 } 6227 6228 void MacroAssembler::movoop(Register dst, jobject obj) { 6229 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 6230 } 6231 6232 void MacroAssembler::movoop(Address dst, jobject obj) { 6233 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 6234 movq(dst, rscratch1); 6235 } 6236 6237 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 6238 if (src.is_lval()) { 6239 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 6240 } else { 6241 if (reachable(src)) { 6242 movq(dst, as_Address(src)); 6243 } else { 6244 lea(rscratch1, src); 6245 movq(dst, Address(rscratch1,0)); 6246 } 6247 } 6248 } 6249 6250 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 6251 movq(as_Address(dst), src); 6252 } 6253 6254 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 6255 movq(dst, as_Address(src)); 6256 } 6257 6258 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 6259 void MacroAssembler::movptr(Address dst, intptr_t src) { 6260 mov64(rscratch1, src); 6261 movq(dst, rscratch1); 6262 } 6263 6264 // These are mostly for initializing NULL 6265 void MacroAssembler::movptr(Address dst, int32_t src) { 6266 movslq(dst, src); 6267 } 6268 6269 void MacroAssembler::movptr(Register dst, int32_t src) { 6270 mov64(dst, (intptr_t)src); 6271 } 6272 6273 void MacroAssembler::pushoop(jobject obj) { 6274 movoop(rscratch1, obj); 6275 push(rscratch1); 6276 } 6277 6278 void MacroAssembler::pushptr(AddressLiteral src) { 6279 lea(rscratch1, src); 6280 if (src.is_lval()) { 6281 push(rscratch1); 6282 } else { 6283 pushq(Address(rscratch1, 0)); 6284 } 6285 } 6286 6287 void MacroAssembler::reset_last_Java_frame(bool clear_fp, 6288 bool clear_pc) { 6289 // we must set sp to zero to clear frame 6290 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 6291 // must clear fp, so that compiled frames are not confused; it is 6292 // possible that we need it only for debugging 6293 if (clear_fp) { 6294 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 6295 } 6296 6297 if (clear_pc) { 6298 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 6299 } 6300 } 6301 6302 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 6303 Register last_java_fp, 6304 address last_java_pc) { 6305 // determine last_java_sp register 6306 if (!last_java_sp->is_valid()) { 6307 last_java_sp = rsp; 6308 } 6309 6310 // last_java_fp is optional 6311 if (last_java_fp->is_valid()) { 6312 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 6313 last_java_fp); 6314 } 6315 6316 // last_java_pc is optional 6317 if (last_java_pc != NULL) { 6318 Address java_pc(r15_thread, 6319 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 6320 lea(rscratch1, InternalAddress(last_java_pc)); 6321 movptr(java_pc, rscratch1); 6322 } 6323 6324 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 6325 } 6326 6327 static void pass_arg0(MacroAssembler* masm, Register arg) { 6328 if (c_rarg0 != arg ) { 6329 masm->mov(c_rarg0, arg); 6330 } 6331 } 6332 6333 static void pass_arg1(MacroAssembler* masm, Register arg) { 6334 if (c_rarg1 != arg ) { 6335 masm->mov(c_rarg1, arg); 6336 } 6337 } 6338 6339 static void pass_arg2(MacroAssembler* masm, Register arg) { 6340 if (c_rarg2 != arg ) { 6341 masm->mov(c_rarg2, arg); 6342 } 6343 } 6344 6345 static void pass_arg3(MacroAssembler* masm, Register arg) { 6346 if (c_rarg3 != arg ) { 6347 masm->mov(c_rarg3, arg); 6348 } 6349 } 6350 6351 void MacroAssembler::stop(const char* msg) { 6352 address rip = pc(); 6353 pusha(); // get regs on stack 6354 lea(c_rarg0, ExternalAddress((address) msg)); 6355 lea(c_rarg1, InternalAddress(rip)); 6356 movq(c_rarg2, rsp); // pass pointer to regs array 6357 andq(rsp, -16); // align stack as required by ABI 6358 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 6359 hlt(); 6360 } 6361 6362 void MacroAssembler::warn(const char* msg) { 6363 push(rsp); 6364 andq(rsp, -16); // align stack as required by push_CPU_state and call 6365 6366 push_CPU_state(); // keeps alignment at 16 bytes 6367 lea(c_rarg0, ExternalAddress((address) msg)); 6368 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 6369 pop_CPU_state(); 6370 pop(rsp); 6371 } 6372 6373 #ifndef PRODUCT 6374 extern "C" void findpc(intptr_t x); 6375 #endif 6376 6377 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 6378 // In order to get locks to work, we need to fake a in_VM state 6379 if (ShowMessageBoxOnError ) { 6380 JavaThread* thread = JavaThread::current(); 6381 JavaThreadState saved_state = thread->thread_state(); 6382 thread->set_thread_state(_thread_in_vm); 6383 #ifndef PRODUCT 6384 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 6385 ttyLocker ttyl; 6386 BytecodeCounter::print(); 6387 } 6388 #endif 6389 // To see where a verify_oop failed, get $ebx+40/X for this frame. 6390 // XXX correct this offset for amd64 6391 // This is the value of eip which points to where verify_oop will return. 6392 if (os::message_box(msg, "Execution stopped, print registers?")) { 6393 ttyLocker ttyl; 6394 tty->print_cr("rip = 0x%016lx", pc); 6395 #ifndef PRODUCT 6396 tty->cr(); 6397 findpc(pc); 6398 tty->cr(); 6399 #endif 6400 tty->print_cr("rax = 0x%016lx", regs[15]); 6401 tty->print_cr("rbx = 0x%016lx", regs[12]); 6402 tty->print_cr("rcx = 0x%016lx", regs[14]); 6403 tty->print_cr("rdx = 0x%016lx", regs[13]); 6404 tty->print_cr("rdi = 0x%016lx", regs[8]); 6405 tty->print_cr("rsi = 0x%016lx", regs[9]); 6406 tty->print_cr("rbp = 0x%016lx", regs[10]); 6407 tty->print_cr("rsp = 0x%016lx", regs[11]); 6408 tty->print_cr("r8 = 0x%016lx", regs[7]); 6409 tty->print_cr("r9 = 0x%016lx", regs[6]); 6410 tty->print_cr("r10 = 0x%016lx", regs[5]); 6411 tty->print_cr("r11 = 0x%016lx", regs[4]); 6412 tty->print_cr("r12 = 0x%016lx", regs[3]); 6413 tty->print_cr("r13 = 0x%016lx", regs[2]); 6414 tty->print_cr("r14 = 0x%016lx", regs[1]); 6415 tty->print_cr("r15 = 0x%016lx", regs[0]); 6416 BREAKPOINT; 6417 } 6418 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 6419 } else { 6420 ttyLocker ttyl; 6421 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 6422 msg); 6423 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 6424 } 6425 } 6426 6427 #endif // _LP64 6428 6429 // Now versions that are common to 32/64 bit 6430 6431 void MacroAssembler::addptr(Register dst, int32_t imm32) { 6432 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 6433 } 6434 6435 void MacroAssembler::addptr(Register dst, Register src) { 6436 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 6437 } 6438 6439 void MacroAssembler::addptr(Address dst, Register src) { 6440 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 6441 } 6442 6443 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { 6444 if (reachable(src)) { 6445 Assembler::addsd(dst, as_Address(src)); 6446 } else { 6447 lea(rscratch1, src); 6448 Assembler::addsd(dst, Address(rscratch1, 0)); 6449 } 6450 } 6451 6452 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { 6453 if (reachable(src)) { 6454 addss(dst, as_Address(src)); 6455 } else { 6456 lea(rscratch1, src); 6457 addss(dst, Address(rscratch1, 0)); 6458 } 6459 } 6460 6461 void MacroAssembler::align(int modulus) { 6462 if (offset() % modulus != 0) { 6463 nop(modulus - (offset() % modulus)); 6464 } 6465 } 6466 6467 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 6468 // Used in sign-masking with aligned address. 6469 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 6470 if (reachable(src)) { 6471 Assembler::andpd(dst, as_Address(src)); 6472 } else { 6473 lea(rscratch1, src); 6474 Assembler::andpd(dst, Address(rscratch1, 0)); 6475 } 6476 } 6477 6478 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { 6479 // Used in sign-masking with aligned address. 6480 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 6481 if (reachable(src)) { 6482 Assembler::andps(dst, as_Address(src)); 6483 } else { 6484 lea(rscratch1, src); 6485 Assembler::andps(dst, Address(rscratch1, 0)); 6486 } 6487 } 6488 6489 void MacroAssembler::andptr(Register dst, int32_t imm32) { 6490 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 6491 } 6492 6493 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 6494 pushf(); 6495 if (os::is_MP()) 6496 lock(); 6497 incrementl(counter_addr); 6498 popf(); 6499 } 6500 6501 // Writes to stack successive pages until offset reached to check for 6502 // stack overflow + shadow pages. This clobbers tmp. 6503 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 6504 movptr(tmp, rsp); 6505 // Bang stack for total size given plus shadow page size. 6506 // Bang one page at a time because large size can bang beyond yellow and 6507 // red zones. 6508 Label loop; 6509 bind(loop); 6510 movl(Address(tmp, (-os::vm_page_size())), size ); 6511 subptr(tmp, os::vm_page_size()); 6512 subl(size, os::vm_page_size()); 6513 jcc(Assembler::greater, loop); 6514 6515 // Bang down shadow pages too. 6516 // The -1 because we already subtracted 1 page. 6517 for (int i = 0; i< StackShadowPages-1; i++) { 6518 // this could be any sized move but this is can be a debugging crumb 6519 // so the bigger the better. 6520 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 6521 } 6522 } 6523 6524 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 6525 assert(UseBiasedLocking, "why call this otherwise?"); 6526 6527 // Check for biased locking unlock case, which is a no-op 6528 // Note: we do not have to check the thread ID for two reasons. 6529 // First, the interpreter checks for IllegalMonitorStateException at 6530 // a higher level. Second, if the bias was revoked while we held the 6531 // lock, the object could not be rebiased toward another thread, so 6532 // the bias bit would be clear. 6533 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 6534 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 6535 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 6536 jcc(Assembler::equal, done); 6537 } 6538 6539 void MacroAssembler::c2bool(Register x) { 6540 // implements x == 0 ? 0 : 1 6541 // note: must only look at least-significant byte of x 6542 // since C-style booleans are stored in one byte 6543 // only! (was bug) 6544 andl(x, 0xFF); 6545 setb(Assembler::notZero, x); 6546 } 6547 6548 // Wouldn't need if AddressLiteral version had new name 6549 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 6550 Assembler::call(L, rtype); 6551 } 6552 6553 void MacroAssembler::call(Register entry) { 6554 Assembler::call(entry); 6555 } 6556 6557 void MacroAssembler::call(AddressLiteral entry) { 6558 if (reachable(entry)) { 6559 Assembler::call_literal(entry.target(), entry.rspec()); 6560 } else { 6561 lea(rscratch1, entry); 6562 Assembler::call(rscratch1); 6563 } 6564 } 6565 6566 // Implementation of call_VM versions 6567 6568 void MacroAssembler::call_VM(Register oop_result, 6569 address entry_point, 6570 bool check_exceptions) { 6571 Label C, E; 6572 call(C, relocInfo::none); 6573 jmp(E); 6574 6575 bind(C); 6576 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 6577 ret(0); 6578 6579 bind(E); 6580 } 6581 6582 void MacroAssembler::call_VM(Register oop_result, 6583 address entry_point, 6584 Register arg_1, 6585 bool check_exceptions) { 6586 Label C, E; 6587 call(C, relocInfo::none); 6588 jmp(E); 6589 6590 bind(C); 6591 pass_arg1(this, arg_1); 6592 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 6593 ret(0); 6594 6595 bind(E); 6596 } 6597 6598 void MacroAssembler::call_VM(Register oop_result, 6599 address entry_point, 6600 Register arg_1, 6601 Register arg_2, 6602 bool check_exceptions) { 6603 Label C, E; 6604 call(C, relocInfo::none); 6605 jmp(E); 6606 6607 bind(C); 6608 6609 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6610 6611 pass_arg2(this, arg_2); 6612 pass_arg1(this, arg_1); 6613 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 6614 ret(0); 6615 6616 bind(E); 6617 } 6618 6619 void MacroAssembler::call_VM(Register oop_result, 6620 address entry_point, 6621 Register arg_1, 6622 Register arg_2, 6623 Register arg_3, 6624 bool check_exceptions) { 6625 Label C, E; 6626 call(C, relocInfo::none); 6627 jmp(E); 6628 6629 bind(C); 6630 6631 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6632 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6633 pass_arg3(this, arg_3); 6634 6635 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6636 pass_arg2(this, arg_2); 6637 6638 pass_arg1(this, arg_1); 6639 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 6640 ret(0); 6641 6642 bind(E); 6643 } 6644 6645 void MacroAssembler::call_VM(Register oop_result, 6646 Register last_java_sp, 6647 address entry_point, 6648 int number_of_arguments, 6649 bool check_exceptions) { 6650 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6651 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6652 } 6653 6654 void MacroAssembler::call_VM(Register oop_result, 6655 Register last_java_sp, 6656 address entry_point, 6657 Register arg_1, 6658 bool check_exceptions) { 6659 pass_arg1(this, arg_1); 6660 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6661 } 6662 6663 void MacroAssembler::call_VM(Register oop_result, 6664 Register last_java_sp, 6665 address entry_point, 6666 Register arg_1, 6667 Register arg_2, 6668 bool check_exceptions) { 6669 6670 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6671 pass_arg2(this, arg_2); 6672 pass_arg1(this, arg_1); 6673 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6674 } 6675 6676 void MacroAssembler::call_VM(Register oop_result, 6677 Register last_java_sp, 6678 address entry_point, 6679 Register arg_1, 6680 Register arg_2, 6681 Register arg_3, 6682 bool check_exceptions) { 6683 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6684 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6685 pass_arg3(this, arg_3); 6686 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6687 pass_arg2(this, arg_2); 6688 pass_arg1(this, arg_1); 6689 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6690 } 6691 6692 void MacroAssembler::super_call_VM(Register oop_result, 6693 Register last_java_sp, 6694 address entry_point, 6695 int number_of_arguments, 6696 bool check_exceptions) { 6697 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6698 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6699 } 6700 6701 void MacroAssembler::super_call_VM(Register oop_result, 6702 Register last_java_sp, 6703 address entry_point, 6704 Register arg_1, 6705 bool check_exceptions) { 6706 pass_arg1(this, arg_1); 6707 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6708 } 6709 6710 void MacroAssembler::super_call_VM(Register oop_result, 6711 Register last_java_sp, 6712 address entry_point, 6713 Register arg_1, 6714 Register arg_2, 6715 bool check_exceptions) { 6716 6717 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6718 pass_arg2(this, arg_2); 6719 pass_arg1(this, arg_1); 6720 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6721 } 6722 6723 void MacroAssembler::super_call_VM(Register oop_result, 6724 Register last_java_sp, 6725 address entry_point, 6726 Register arg_1, 6727 Register arg_2, 6728 Register arg_3, 6729 bool check_exceptions) { 6730 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6731 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6732 pass_arg3(this, arg_3); 6733 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6734 pass_arg2(this, arg_2); 6735 pass_arg1(this, arg_1); 6736 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6737 } 6738 6739 void MacroAssembler::call_VM_base(Register oop_result, 6740 Register java_thread, 6741 Register last_java_sp, 6742 address entry_point, 6743 int number_of_arguments, 6744 bool check_exceptions) { 6745 // determine java_thread register 6746 if (!java_thread->is_valid()) { 6747 #ifdef _LP64 6748 java_thread = r15_thread; 6749 #else 6750 java_thread = rdi; 6751 get_thread(java_thread); 6752 #endif // LP64 6753 } 6754 // determine last_java_sp register 6755 if (!last_java_sp->is_valid()) { 6756 last_java_sp = rsp; 6757 } 6758 // debugging support 6759 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 6760 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 6761 #ifdef ASSERT 6762 // TraceBytecodes does not use r12 but saves it over the call, so don't verify 6763 // r12 is the heapbase. 6764 LP64_ONLY(if (UseCompressedOops && !TraceBytecodes) verify_heapbase("call_VM_base");) 6765 #endif // ASSERT 6766 6767 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 6768 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 6769 6770 // push java thread (becomes first argument of C function) 6771 6772 NOT_LP64(push(java_thread); number_of_arguments++); 6773 LP64_ONLY(mov(c_rarg0, r15_thread)); 6774 6775 // set last Java frame before call 6776 assert(last_java_sp != rbp, "can't use ebp/rbp"); 6777 6778 // Only interpreter should have to set fp 6779 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 6780 6781 // do the call, remove parameters 6782 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 6783 6784 // restore the thread (cannot use the pushed argument since arguments 6785 // may be overwritten by C code generated by an optimizing compiler); 6786 // however can use the register value directly if it is callee saved. 6787 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 6788 // rdi & rsi (also r15) are callee saved -> nothing to do 6789 #ifdef ASSERT 6790 guarantee(java_thread != rax, "change this code"); 6791 push(rax); 6792 { Label L; 6793 get_thread(rax); 6794 cmpptr(java_thread, rax); 6795 jcc(Assembler::equal, L); 6796 stop("MacroAssembler::call_VM_base: rdi not callee saved?"); 6797 bind(L); 6798 } 6799 pop(rax); 6800 #endif 6801 } else { 6802 get_thread(java_thread); 6803 } 6804 // reset last Java frame 6805 // Only interpreter should have to clear fp 6806 reset_last_Java_frame(java_thread, true, false); 6807 6808 #ifndef CC_INTERP 6809 // C++ interp handles this in the interpreter 6810 check_and_handle_popframe(java_thread); 6811 check_and_handle_earlyret(java_thread); 6812 #endif /* CC_INTERP */ 6813 6814 if (check_exceptions) { 6815 // check for pending exceptions (java_thread is set upon return) 6816 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 6817 #ifndef _LP64 6818 jump_cc(Assembler::notEqual, 6819 RuntimeAddress(StubRoutines::forward_exception_entry())); 6820 #else 6821 // This used to conditionally jump to forward_exception however it is 6822 // possible if we relocate that the branch will not reach. So we must jump 6823 // around so we can always reach 6824 6825 Label ok; 6826 jcc(Assembler::equal, ok); 6827 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 6828 bind(ok); 6829 #endif // LP64 6830 } 6831 6832 // get oop result if there is one and reset the value in the thread 6833 if (oop_result->is_valid()) { 6834 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 6835 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 6836 verify_oop(oop_result, "broken oop in call_VM_base"); 6837 } 6838 } 6839 6840 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 6841 6842 // Calculate the value for last_Java_sp 6843 // somewhat subtle. call_VM does an intermediate call 6844 // which places a return address on the stack just under the 6845 // stack pointer as the user finsihed with it. This allows 6846 // use to retrieve last_Java_pc from last_Java_sp[-1]. 6847 // On 32bit we then have to push additional args on the stack to accomplish 6848 // the actual requested call. On 64bit call_VM only can use register args 6849 // so the only extra space is the return address that call_VM created. 6850 // This hopefully explains the calculations here. 6851 6852 #ifdef _LP64 6853 // We've pushed one address, correct last_Java_sp 6854 lea(rax, Address(rsp, wordSize)); 6855 #else 6856 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 6857 #endif // LP64 6858 6859 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 6860 6861 } 6862 6863 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 6864 call_VM_leaf_base(entry_point, number_of_arguments); 6865 } 6866 6867 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 6868 pass_arg0(this, arg_0); 6869 call_VM_leaf(entry_point, 1); 6870 } 6871 6872 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 6873 6874 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6875 pass_arg1(this, arg_1); 6876 pass_arg0(this, arg_0); 6877 call_VM_leaf(entry_point, 2); 6878 } 6879 6880 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 6881 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6882 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6883 pass_arg2(this, arg_2); 6884 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6885 pass_arg1(this, arg_1); 6886 pass_arg0(this, arg_0); 6887 call_VM_leaf(entry_point, 3); 6888 } 6889 6890 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 6891 pass_arg0(this, arg_0); 6892 MacroAssembler::call_VM_leaf_base(entry_point, 1); 6893 } 6894 6895 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 6896 6897 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6898 pass_arg1(this, arg_1); 6899 pass_arg0(this, arg_0); 6900 MacroAssembler::call_VM_leaf_base(entry_point, 2); 6901 } 6902 6903 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 6904 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6905 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6906 pass_arg2(this, arg_2); 6907 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6908 pass_arg1(this, arg_1); 6909 pass_arg0(this, arg_0); 6910 MacroAssembler::call_VM_leaf_base(entry_point, 3); 6911 } 6912 6913 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 6914 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); 6915 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6916 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6917 pass_arg3(this, arg_3); 6918 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 6919 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6920 pass_arg2(this, arg_2); 6921 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 6922 pass_arg1(this, arg_1); 6923 pass_arg0(this, arg_0); 6924 MacroAssembler::call_VM_leaf_base(entry_point, 4); 6925 } 6926 6927 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 6928 } 6929 6930 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 6931 } 6932 6933 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 6934 if (reachable(src1)) { 6935 cmpl(as_Address(src1), imm); 6936 } else { 6937 lea(rscratch1, src1); 6938 cmpl(Address(rscratch1, 0), imm); 6939 } 6940 } 6941 6942 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 6943 assert(!src2.is_lval(), "use cmpptr"); 6944 if (reachable(src2)) { 6945 cmpl(src1, as_Address(src2)); 6946 } else { 6947 lea(rscratch1, src2); 6948 cmpl(src1, Address(rscratch1, 0)); 6949 } 6950 } 6951 6952 void MacroAssembler::cmp32(Register src1, int32_t imm) { 6953 Assembler::cmpl(src1, imm); 6954 } 6955 6956 void MacroAssembler::cmp32(Register src1, Address src2) { 6957 Assembler::cmpl(src1, src2); 6958 } 6959 6960 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6961 ucomisd(opr1, opr2); 6962 6963 Label L; 6964 if (unordered_is_less) { 6965 movl(dst, -1); 6966 jcc(Assembler::parity, L); 6967 jcc(Assembler::below , L); 6968 movl(dst, 0); 6969 jcc(Assembler::equal , L); 6970 increment(dst); 6971 } else { // unordered is greater 6972 movl(dst, 1); 6973 jcc(Assembler::parity, L); 6974 jcc(Assembler::above , L); 6975 movl(dst, 0); 6976 jcc(Assembler::equal , L); 6977 decrementl(dst); 6978 } 6979 bind(L); 6980 } 6981 6982 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 6983 ucomiss(opr1, opr2); 6984 6985 Label L; 6986 if (unordered_is_less) { 6987 movl(dst, -1); 6988 jcc(Assembler::parity, L); 6989 jcc(Assembler::below , L); 6990 movl(dst, 0); 6991 jcc(Assembler::equal , L); 6992 increment(dst); 6993 } else { // unordered is greater 6994 movl(dst, 1); 6995 jcc(Assembler::parity, L); 6996 jcc(Assembler::above , L); 6997 movl(dst, 0); 6998 jcc(Assembler::equal , L); 6999 decrementl(dst); 7000 } 7001 bind(L); 7002 } 7003 7004 7005 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 7006 if (reachable(src1)) { 7007 cmpb(as_Address(src1), imm); 7008 } else { 7009 lea(rscratch1, src1); 7010 cmpb(Address(rscratch1, 0), imm); 7011 } 7012 } 7013 7014 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 7015 #ifdef _LP64 7016 if (src2.is_lval()) { 7017 movptr(rscratch1, src2); 7018 Assembler::cmpq(src1, rscratch1); 7019 } else if (reachable(src2)) { 7020 cmpq(src1, as_Address(src2)); 7021 } else { 7022 lea(rscratch1, src2); 7023 Assembler::cmpq(src1, Address(rscratch1, 0)); 7024 } 7025 #else 7026 if (src2.is_lval()) { 7027 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 7028 } else { 7029 cmpl(src1, as_Address(src2)); 7030 } 7031 #endif // _LP64 7032 } 7033 7034 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 7035 assert(src2.is_lval(), "not a mem-mem compare"); 7036 #ifdef _LP64 7037 // moves src2's literal address 7038 movptr(rscratch1, src2); 7039 Assembler::cmpq(src1, rscratch1); 7040 #else 7041 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 7042 #endif // _LP64 7043 } 7044 7045 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 7046 if (reachable(adr)) { 7047 if (os::is_MP()) 7048 lock(); 7049 cmpxchgptr(reg, as_Address(adr)); 7050 } else { 7051 lea(rscratch1, adr); 7052 if (os::is_MP()) 7053 lock(); 7054 cmpxchgptr(reg, Address(rscratch1, 0)); 7055 } 7056 } 7057 7058 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 7059 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 7060 } 7061 7062 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 7063 if (reachable(src)) { 7064 Assembler::comisd(dst, as_Address(src)); 7065 } else { 7066 lea(rscratch1, src); 7067 Assembler::comisd(dst, Address(rscratch1, 0)); 7068 } 7069 } 7070 7071 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 7072 if (reachable(src)) { 7073 Assembler::comiss(dst, as_Address(src)); 7074 } else { 7075 lea(rscratch1, src); 7076 Assembler::comiss(dst, Address(rscratch1, 0)); 7077 } 7078 } 7079 7080 7081 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 7082 Condition negated_cond = negate_condition(cond); 7083 Label L; 7084 jcc(negated_cond, L); 7085 atomic_incl(counter_addr); 7086 bind(L); 7087 } 7088 7089 int MacroAssembler::corrected_idivl(Register reg) { 7090 // Full implementation of Java idiv and irem; checks for 7091 // special case as described in JVM spec., p.243 & p.271. 7092 // The function returns the (pc) offset of the idivl 7093 // instruction - may be needed for implicit exceptions. 7094 // 7095 // normal case special case 7096 // 7097 // input : rax,: dividend min_int 7098 // reg: divisor (may not be rax,/rdx) -1 7099 // 7100 // output: rax,: quotient (= rax, idiv reg) min_int 7101 // rdx: remainder (= rax, irem reg) 0 7102 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 7103 const int min_int = 0x80000000; 7104 Label normal_case, special_case; 7105 7106 // check for special case 7107 cmpl(rax, min_int); 7108 jcc(Assembler::notEqual, normal_case); 7109 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 7110 cmpl(reg, -1); 7111 jcc(Assembler::equal, special_case); 7112 7113 // handle normal case 7114 bind(normal_case); 7115 cdql(); 7116 int idivl_offset = offset(); 7117 idivl(reg); 7118 7119 // normal and special case exit 7120 bind(special_case); 7121 7122 return idivl_offset; 7123 } 7124 7125 7126 7127 void MacroAssembler::decrementl(Register reg, int value) { 7128 if (value == min_jint) {subl(reg, value) ; return; } 7129 if (value < 0) { incrementl(reg, -value); return; } 7130 if (value == 0) { ; return; } 7131 if (value == 1 && UseIncDec) { decl(reg) ; return; } 7132 /* else */ { subl(reg, value) ; return; } 7133 } 7134 7135 void MacroAssembler::decrementl(Address dst, int value) { 7136 if (value == min_jint) {subl(dst, value) ; return; } 7137 if (value < 0) { incrementl(dst, -value); return; } 7138 if (value == 0) { ; return; } 7139 if (value == 1 && UseIncDec) { decl(dst) ; return; } 7140 /* else */ { subl(dst, value) ; return; } 7141 } 7142 7143 void MacroAssembler::division_with_shift (Register reg, int shift_value) { 7144 assert (shift_value > 0, "illegal shift value"); 7145 Label _is_positive; 7146 testl (reg, reg); 7147 jcc (Assembler::positive, _is_positive); 7148 int offset = (1 << shift_value) - 1 ; 7149 7150 if (offset == 1) { 7151 incrementl(reg); 7152 } else { 7153 addl(reg, offset); 7154 } 7155 7156 bind (_is_positive); 7157 sarl(reg, shift_value); 7158 } 7159 7160 void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { 7161 if (reachable(src)) { 7162 Assembler::divsd(dst, as_Address(src)); 7163 } else { 7164 lea(rscratch1, src); 7165 Assembler::divsd(dst, Address(rscratch1, 0)); 7166 } 7167 } 7168 7169 void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { 7170 if (reachable(src)) { 7171 Assembler::divss(dst, as_Address(src)); 7172 } else { 7173 lea(rscratch1, src); 7174 Assembler::divss(dst, Address(rscratch1, 0)); 7175 } 7176 } 7177 7178 // !defined(COMPILER2) is because of stupid core builds 7179 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 7180 void MacroAssembler::empty_FPU_stack() { 7181 if (VM_Version::supports_mmx()) { 7182 emms(); 7183 } else { 7184 for (int i = 8; i-- > 0; ) ffree(i); 7185 } 7186 } 7187 #endif // !LP64 || C1 || !C2 7188 7189 7190 // Defines obj, preserves var_size_in_bytes 7191 void MacroAssembler::eden_allocate(Register obj, 7192 Register var_size_in_bytes, 7193 int con_size_in_bytes, 7194 Register t1, 7195 Label& slow_case) { 7196 assert(obj == rax, "obj must be in rax, for cmpxchg"); 7197 assert_different_registers(obj, var_size_in_bytes, t1); 7198 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 7199 jmp(slow_case); 7200 } else { 7201 Register end = t1; 7202 Label retry; 7203 bind(retry); 7204 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 7205 movptr(obj, heap_top); 7206 if (var_size_in_bytes == noreg) { 7207 lea(end, Address(obj, con_size_in_bytes)); 7208 } else { 7209 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 7210 } 7211 // if end < obj then we wrapped around => object too long => slow case 7212 cmpptr(end, obj); 7213 jcc(Assembler::below, slow_case); 7214 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 7215 jcc(Assembler::above, slow_case); 7216 // Compare obj with the top addr, and if still equal, store the new top addr in 7217 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 7218 // it otherwise. Use lock prefix for atomicity on MPs. 7219 locked_cmpxchgptr(end, heap_top); 7220 jcc(Assembler::notEqual, retry); 7221 } 7222 } 7223 7224 void MacroAssembler::enter() { 7225 push(rbp); 7226 mov(rbp, rsp); 7227 } 7228 7229 // A 5 byte nop that is safe for patching (see patch_verified_entry) 7230 void MacroAssembler::fat_nop() { 7231 if (UseAddressNop) { 7232 addr_nop_5(); 7233 } else { 7234 emit_byte(0x26); // es: 7235 emit_byte(0x2e); // cs: 7236 emit_byte(0x64); // fs: 7237 emit_byte(0x65); // gs: 7238 emit_byte(0x90); 7239 } 7240 } 7241 7242 void MacroAssembler::fcmp(Register tmp) { 7243 fcmp(tmp, 1, true, true); 7244 } 7245 7246 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 7247 assert(!pop_right || pop_left, "usage error"); 7248 if (VM_Version::supports_cmov()) { 7249 assert(tmp == noreg, "unneeded temp"); 7250 if (pop_left) { 7251 fucomip(index); 7252 } else { 7253 fucomi(index); 7254 } 7255 if (pop_right) { 7256 fpop(); 7257 } 7258 } else { 7259 assert(tmp != noreg, "need temp"); 7260 if (pop_left) { 7261 if (pop_right) { 7262 fcompp(); 7263 } else { 7264 fcomp(index); 7265 } 7266 } else { 7267 fcom(index); 7268 } 7269 // convert FPU condition into eflags condition via rax, 7270 save_rax(tmp); 7271 fwait(); fnstsw_ax(); 7272 sahf(); 7273 restore_rax(tmp); 7274 } 7275 // condition codes set as follows: 7276 // 7277 // CF (corresponds to C0) if x < y 7278 // PF (corresponds to C2) if unordered 7279 // ZF (corresponds to C3) if x = y 7280 } 7281 7282 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 7283 fcmp2int(dst, unordered_is_less, 1, true, true); 7284 } 7285 7286 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 7287 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 7288 Label L; 7289 if (unordered_is_less) { 7290 movl(dst, -1); 7291 jcc(Assembler::parity, L); 7292 jcc(Assembler::below , L); 7293 movl(dst, 0); 7294 jcc(Assembler::equal , L); 7295 increment(dst); 7296 } else { // unordered is greater 7297 movl(dst, 1); 7298 jcc(Assembler::parity, L); 7299 jcc(Assembler::above , L); 7300 movl(dst, 0); 7301 jcc(Assembler::equal , L); 7302 decrementl(dst); 7303 } 7304 bind(L); 7305 } 7306 7307 void MacroAssembler::fld_d(AddressLiteral src) { 7308 fld_d(as_Address(src)); 7309 } 7310 7311 void MacroAssembler::fld_s(AddressLiteral src) { 7312 fld_s(as_Address(src)); 7313 } 7314 7315 void MacroAssembler::fld_x(AddressLiteral src) { 7316 Assembler::fld_x(as_Address(src)); 7317 } 7318 7319 void MacroAssembler::fldcw(AddressLiteral src) { 7320 Assembler::fldcw(as_Address(src)); 7321 } 7322 7323 void MacroAssembler::pow_exp_core_encoding() { 7324 // kills rax, rcx, rdx 7325 subptr(rsp,sizeof(jdouble)); 7326 // computes 2^X. Stack: X ... 7327 // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and 7328 // keep it on the thread's stack to compute 2^int(X) later 7329 // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) 7330 // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) 7331 fld_s(0); // Stack: X X ... 7332 frndint(); // Stack: int(X) X ... 7333 fsuba(1); // Stack: int(X) X-int(X) ... 7334 fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... 7335 f2xm1(); // Stack: 2^(X-int(X))-1 ... 7336 fld1(); // Stack: 1 2^(X-int(X))-1 ... 7337 faddp(1); // Stack: 2^(X-int(X)) 7338 // computes 2^(int(X)): add exponent bias (1023) to int(X), then 7339 // shift int(X)+1023 to exponent position. 7340 // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 7341 // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent 7342 // values so detect them and set result to NaN. 7343 movl(rax,Address(rsp,0)); 7344 movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding 7345 addl(rax, 1023); 7346 movl(rdx,rax); 7347 shll(rax,20); 7348 // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. 7349 addl(rdx,1); 7350 // Check that 1 < int(X)+1023+1 < 2048 7351 // in 3 steps: 7352 // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 7353 // 2- (int(X)+1023+1)&-2048 != 0 7354 // 3- (int(X)+1023+1)&-2048 != 1 7355 // Do 2- first because addl just updated the flags. 7356 cmov32(Assembler::equal,rax,rcx); 7357 cmpl(rdx,1); 7358 cmov32(Assembler::equal,rax,rcx); 7359 testl(rdx,rcx); 7360 cmov32(Assembler::notEqual,rax,rcx); 7361 movl(Address(rsp,4),rax); 7362 movl(Address(rsp,0),0); 7363 fmul_d(Address(rsp,0)); // Stack: 2^X ... 7364 addptr(rsp,sizeof(jdouble)); 7365 } 7366 7367 void MacroAssembler::increase_precision() { 7368 subptr(rsp, BytesPerWord); 7369 fnstcw(Address(rsp, 0)); 7370 movl(rax, Address(rsp, 0)); 7371 orl(rax, 0x300); 7372 push(rax); 7373 fldcw(Address(rsp, 0)); 7374 pop(rax); 7375 } 7376 7377 void MacroAssembler::restore_precision() { 7378 fldcw(Address(rsp, 0)); 7379 addptr(rsp, BytesPerWord); 7380 } 7381 7382 void MacroAssembler::fast_pow() { 7383 // computes X^Y = 2^(Y * log2(X)) 7384 // if fast computation is not possible, result is NaN. Requires 7385 // fallback from user of this macro. 7386 // increase precision for intermediate steps of the computation 7387 increase_precision(); 7388 fyl2x(); // Stack: (Y*log2(X)) ... 7389 pow_exp_core_encoding(); // Stack: exp(X) ... 7390 restore_precision(); 7391 } 7392 7393 void MacroAssembler::fast_exp() { 7394 // computes exp(X) = 2^(X * log2(e)) 7395 // if fast computation is not possible, result is NaN. Requires 7396 // fallback from user of this macro. 7397 // increase precision for intermediate steps of the computation 7398 increase_precision(); 7399 fldl2e(); // Stack: log2(e) X ... 7400 fmulp(1); // Stack: (X*log2(e)) ... 7401 pow_exp_core_encoding(); // Stack: exp(X) ... 7402 restore_precision(); 7403 } 7404 7405 void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { 7406 // kills rax, rcx, rdx 7407 // pow and exp needs 2 extra registers on the fpu stack. 7408 Label slow_case, done; 7409 Register tmp = noreg; 7410 if (!VM_Version::supports_cmov()) { 7411 // fcmp needs a temporary so preserve rdx, 7412 tmp = rdx; 7413 } 7414 Register tmp2 = rax; 7415 Register tmp3 = rcx; 7416 7417 if (is_exp) { 7418 // Stack: X 7419 fld_s(0); // duplicate argument for runtime call. Stack: X X 7420 fast_exp(); // Stack: exp(X) X 7421 fcmp(tmp, 0, false, false); // Stack: exp(X) X 7422 // exp(X) not equal to itself: exp(X) is NaN go to slow case. 7423 jcc(Assembler::parity, slow_case); 7424 // get rid of duplicate argument. Stack: exp(X) 7425 if (num_fpu_regs_in_use > 0) { 7426 fxch(); 7427 fpop(); 7428 } else { 7429 ffree(1); 7430 } 7431 jmp(done); 7432 } else { 7433 // Stack: X Y 7434 Label x_negative, y_odd; 7435 7436 fldz(); // Stack: 0 X Y 7437 fcmp(tmp, 1, true, false); // Stack: X Y 7438 jcc(Assembler::above, x_negative); 7439 7440 // X >= 0 7441 7442 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 7443 fld_s(1); // Stack: X Y X Y 7444 fast_pow(); // Stack: X^Y X Y 7445 fcmp(tmp, 0, false, false); // Stack: X^Y X Y 7446 // X^Y not equal to itself: X^Y is NaN go to slow case. 7447 jcc(Assembler::parity, slow_case); 7448 // get rid of duplicate arguments. Stack: X^Y 7449 if (num_fpu_regs_in_use > 0) { 7450 fxch(); fpop(); 7451 fxch(); fpop(); 7452 } else { 7453 ffree(2); 7454 ffree(1); 7455 } 7456 jmp(done); 7457 7458 // X <= 0 7459 bind(x_negative); 7460 7461 fld_s(1); // Stack: Y X Y 7462 frndint(); // Stack: int(Y) X Y 7463 fcmp(tmp, 2, false, false); // Stack: int(Y) X Y 7464 jcc(Assembler::notEqual, slow_case); 7465 7466 subptr(rsp, 8); 7467 7468 // For X^Y, when X < 0, Y has to be an integer and the final 7469 // result depends on whether it's odd or even. We just checked 7470 // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit 7471 // integer to test its parity. If int(Y) is huge and doesn't fit 7472 // in the 64 bit integer range, the integer indefinite value will 7473 // end up in the gp registers. Huge numbers are all even, the 7474 // integer indefinite number is even so it's fine. 7475 7476 #ifdef ASSERT 7477 // Let's check we don't end up with an integer indefinite number 7478 // when not expected. First test for huge numbers: check whether 7479 // int(Y)+1 == int(Y) which is true for very large numbers and 7480 // those are all even. A 64 bit integer is guaranteed to not 7481 // overflow for numbers where y+1 != y (when precision is set to 7482 // double precision). 7483 Label y_not_huge; 7484 7485 fld1(); // Stack: 1 int(Y) X Y 7486 fadd(1); // Stack: 1+int(Y) int(Y) X Y 7487 7488 #ifdef _LP64 7489 // trip to memory to force the precision down from double extended 7490 // precision 7491 fstp_d(Address(rsp, 0)); 7492 fld_d(Address(rsp, 0)); 7493 #endif 7494 7495 fcmp(tmp, 1, true, false); // Stack: int(Y) X Y 7496 #endif 7497 7498 // move int(Y) as 64 bit integer to thread's stack 7499 fistp_d(Address(rsp,0)); // Stack: X Y 7500 7501 #ifdef ASSERT 7502 jcc(Assembler::notEqual, y_not_huge); 7503 7504 // Y is huge so we know it's even. It may not fit in a 64 bit 7505 // integer and we don't want the debug code below to see the 7506 // integer indefinite value so overwrite int(Y) on the thread's 7507 // stack with 0. 7508 movl(Address(rsp, 0), 0); 7509 movl(Address(rsp, 4), 0); 7510 7511 bind(y_not_huge); 7512 #endif 7513 7514 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 7515 fld_s(1); // Stack: X Y X Y 7516 fabs(); // Stack: abs(X) Y X Y 7517 fast_pow(); // Stack: abs(X)^Y X Y 7518 fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y 7519 // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. 7520 7521 pop(tmp2); 7522 NOT_LP64(pop(tmp3)); 7523 jcc(Assembler::parity, slow_case); 7524 7525 #ifdef ASSERT 7526 // Check that int(Y) is not integer indefinite value (int 7527 // overflow). Shouldn't happen because for values that would 7528 // overflow, 1+int(Y)==Y which was tested earlier. 7529 #ifndef _LP64 7530 { 7531 Label integer; 7532 testl(tmp2, tmp2); 7533 jcc(Assembler::notZero, integer); 7534 cmpl(tmp3, 0x80000000); 7535 jcc(Assembler::notZero, integer); 7536 stop("integer indefinite value shouldn't be seen here"); 7537 bind(integer); 7538 } 7539 #else 7540 { 7541 Label integer; 7542 mov(tmp3, tmp2); // preserve tmp2 for parity check below 7543 shlq(tmp3, 1); 7544 jcc(Assembler::carryClear, integer); 7545 jcc(Assembler::notZero, integer); 7546 stop("integer indefinite value shouldn't be seen here"); 7547 bind(integer); 7548 } 7549 #endif 7550 #endif 7551 7552 // get rid of duplicate arguments. Stack: X^Y 7553 if (num_fpu_regs_in_use > 0) { 7554 fxch(); fpop(); 7555 fxch(); fpop(); 7556 } else { 7557 ffree(2); 7558 ffree(1); 7559 } 7560 7561 testl(tmp2, 1); 7562 jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y 7563 // X <= 0, Y even: X^Y = -abs(X)^Y 7564 7565 fchs(); // Stack: -abs(X)^Y Y 7566 jmp(done); 7567 } 7568 7569 // slow case: runtime call 7570 bind(slow_case); 7571 7572 fpop(); // pop incorrect result or int(Y) 7573 7574 fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 7575 is_exp ? 1 : 2, num_fpu_regs_in_use); 7576 7577 // Come here with result in F-TOS 7578 bind(done); 7579 } 7580 7581 void MacroAssembler::fpop() { 7582 ffree(); 7583 fincstp(); 7584 } 7585 7586 void MacroAssembler::fremr(Register tmp) { 7587 save_rax(tmp); 7588 { Label L; 7589 bind(L); 7590 fprem(); 7591 fwait(); fnstsw_ax(); 7592 #ifdef _LP64 7593 testl(rax, 0x400); 7594 jcc(Assembler::notEqual, L); 7595 #else 7596 sahf(); 7597 jcc(Assembler::parity, L); 7598 #endif // _LP64 7599 } 7600 restore_rax(tmp); 7601 // Result is in ST0. 7602 // Note: fxch & fpop to get rid of ST1 7603 // (otherwise FPU stack could overflow eventually) 7604 fxch(1); 7605 fpop(); 7606 } 7607 7608 7609 void MacroAssembler::incrementl(AddressLiteral dst) { 7610 if (reachable(dst)) { 7611 incrementl(as_Address(dst)); 7612 } else { 7613 lea(rscratch1, dst); 7614 incrementl(Address(rscratch1, 0)); 7615 } 7616 } 7617 7618 void MacroAssembler::incrementl(ArrayAddress dst) { 7619 incrementl(as_Address(dst)); 7620 } 7621 7622 void MacroAssembler::incrementl(Register reg, int value) { 7623 if (value == min_jint) {addl(reg, value) ; return; } 7624 if (value < 0) { decrementl(reg, -value); return; } 7625 if (value == 0) { ; return; } 7626 if (value == 1 && UseIncDec) { incl(reg) ; return; } 7627 /* else */ { addl(reg, value) ; return; } 7628 } 7629 7630 void MacroAssembler::incrementl(Address dst, int value) { 7631 if (value == min_jint) {addl(dst, value) ; return; } 7632 if (value < 0) { decrementl(dst, -value); return; } 7633 if (value == 0) { ; return; } 7634 if (value == 1 && UseIncDec) { incl(dst) ; return; } 7635 /* else */ { addl(dst, value) ; return; } 7636 } 7637 7638 void MacroAssembler::jump(AddressLiteral dst) { 7639 if (reachable(dst)) { 7640 jmp_literal(dst.target(), dst.rspec()); 7641 } else { 7642 lea(rscratch1, dst); 7643 jmp(rscratch1); 7644 } 7645 } 7646 7647 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 7648 if (reachable(dst)) { 7649 InstructionMark im(this); 7650 relocate(dst.reloc()); 7651 const int short_size = 2; 7652 const int long_size = 6; 7653 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos); 7654 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 7655 // 0111 tttn #8-bit disp 7656 emit_byte(0x70 | cc); 7657 emit_byte((offs - short_size) & 0xFF); 7658 } else { 7659 // 0000 1111 1000 tttn #32-bit disp 7660 emit_byte(0x0F); 7661 emit_byte(0x80 | cc); 7662 emit_long(offs - long_size); 7663 } 7664 } else { 7665 #ifdef ASSERT 7666 warning("reversing conditional branch"); 7667 #endif /* ASSERT */ 7668 Label skip; 7669 jccb(reverse[cc], skip); 7670 lea(rscratch1, dst); 7671 Assembler::jmp(rscratch1); 7672 bind(skip); 7673 } 7674 } 7675 7676 void MacroAssembler::ldmxcsr(AddressLiteral src) { 7677 if (reachable(src)) { 7678 Assembler::ldmxcsr(as_Address(src)); 7679 } else { 7680 lea(rscratch1, src); 7681 Assembler::ldmxcsr(Address(rscratch1, 0)); 7682 } 7683 } 7684 7685 int MacroAssembler::load_signed_byte(Register dst, Address src) { 7686 int off; 7687 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7688 off = offset(); 7689 movsbl(dst, src); // movsxb 7690 } else { 7691 off = load_unsigned_byte(dst, src); 7692 shll(dst, 24); 7693 sarl(dst, 24); 7694 } 7695 return off; 7696 } 7697 7698 // Note: load_signed_short used to be called load_signed_word. 7699 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler 7700 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. 7701 // The term "word" in HotSpot means a 32- or 64-bit machine word. 7702 int MacroAssembler::load_signed_short(Register dst, Address src) { 7703 int off; 7704 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7705 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 7706 // version but this is what 64bit has always done. This seems to imply 7707 // that users are only using 32bits worth. 7708 off = offset(); 7709 movswl(dst, src); // movsxw 7710 } else { 7711 off = load_unsigned_short(dst, src); 7712 shll(dst, 16); 7713 sarl(dst, 16); 7714 } 7715 return off; 7716 } 7717 7718 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 7719 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 7720 // and "3.9 Partial Register Penalties", p. 22). 7721 int off; 7722 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 7723 off = offset(); 7724 movzbl(dst, src); // movzxb 7725 } else { 7726 xorl(dst, dst); 7727 off = offset(); 7728 movb(dst, src); 7729 } 7730 return off; 7731 } 7732 7733 // Note: load_unsigned_short used to be called load_unsigned_word. 7734 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 7735 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 7736 // and "3.9 Partial Register Penalties", p. 22). 7737 int off; 7738 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 7739 off = offset(); 7740 movzwl(dst, src); // movzxw 7741 } else { 7742 xorl(dst, dst); 7743 off = offset(); 7744 movw(dst, src); 7745 } 7746 return off; 7747 } 7748 7749 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 7750 switch (size_in_bytes) { 7751 #ifndef _LP64 7752 case 8: 7753 assert(dst2 != noreg, "second dest register required"); 7754 movl(dst, src); 7755 movl(dst2, src.plus_disp(BytesPerInt)); 7756 break; 7757 #else 7758 case 8: movq(dst, src); break; 7759 #endif 7760 case 4: movl(dst, src); break; 7761 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 7762 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 7763 default: ShouldNotReachHere(); 7764 } 7765 } 7766 7767 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 7768 switch (size_in_bytes) { 7769 #ifndef _LP64 7770 case 8: 7771 assert(src2 != noreg, "second source register required"); 7772 movl(dst, src); 7773 movl(dst.plus_disp(BytesPerInt), src2); 7774 break; 7775 #else 7776 case 8: movq(dst, src); break; 7777 #endif 7778 case 4: movl(dst, src); break; 7779 case 2: movw(dst, src); break; 7780 case 1: movb(dst, src); break; 7781 default: ShouldNotReachHere(); 7782 } 7783 } 7784 7785 void MacroAssembler::mov32(AddressLiteral dst, Register src) { 7786 if (reachable(dst)) { 7787 movl(as_Address(dst), src); 7788 } else { 7789 lea(rscratch1, dst); 7790 movl(Address(rscratch1, 0), src); 7791 } 7792 } 7793 7794 void MacroAssembler::mov32(Register dst, AddressLiteral src) { 7795 if (reachable(src)) { 7796 movl(dst, as_Address(src)); 7797 } else { 7798 lea(rscratch1, src); 7799 movl(dst, Address(rscratch1, 0)); 7800 } 7801 } 7802 7803 // C++ bool manipulation 7804 7805 void MacroAssembler::movbool(Register dst, Address src) { 7806 if(sizeof(bool) == 1) 7807 movb(dst, src); 7808 else if(sizeof(bool) == 2) 7809 movw(dst, src); 7810 else if(sizeof(bool) == 4) 7811 movl(dst, src); 7812 else 7813 // unsupported 7814 ShouldNotReachHere(); 7815 } 7816 7817 void MacroAssembler::movbool(Address dst, bool boolconst) { 7818 if(sizeof(bool) == 1) 7819 movb(dst, (int) boolconst); 7820 else if(sizeof(bool) == 2) 7821 movw(dst, (int) boolconst); 7822 else if(sizeof(bool) == 4) 7823 movl(dst, (int) boolconst); 7824 else 7825 // unsupported 7826 ShouldNotReachHere(); 7827 } 7828 7829 void MacroAssembler::movbool(Address dst, Register src) { 7830 if(sizeof(bool) == 1) 7831 movb(dst, src); 7832 else if(sizeof(bool) == 2) 7833 movw(dst, src); 7834 else if(sizeof(bool) == 4) 7835 movl(dst, src); 7836 else 7837 // unsupported 7838 ShouldNotReachHere(); 7839 } 7840 7841 void MacroAssembler::movbyte(ArrayAddress dst, int src) { 7842 movb(as_Address(dst), src); 7843 } 7844 7845 void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) { 7846 if (reachable(src)) { 7847 movdl(dst, as_Address(src)); 7848 } else { 7849 lea(rscratch1, src); 7850 movdl(dst, Address(rscratch1, 0)); 7851 } 7852 } 7853 7854 void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) { 7855 if (reachable(src)) { 7856 movq(dst, as_Address(src)); 7857 } else { 7858 lea(rscratch1, src); 7859 movq(dst, Address(rscratch1, 0)); 7860 } 7861 } 7862 7863 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 7864 if (reachable(src)) { 7865 if (UseXmmLoadAndClearUpper) { 7866 movsd (dst, as_Address(src)); 7867 } else { 7868 movlpd(dst, as_Address(src)); 7869 } 7870 } else { 7871 lea(rscratch1, src); 7872 if (UseXmmLoadAndClearUpper) { 7873 movsd (dst, Address(rscratch1, 0)); 7874 } else { 7875 movlpd(dst, Address(rscratch1, 0)); 7876 } 7877 } 7878 } 7879 7880 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 7881 if (reachable(src)) { 7882 movss(dst, as_Address(src)); 7883 } else { 7884 lea(rscratch1, src); 7885 movss(dst, Address(rscratch1, 0)); 7886 } 7887 } 7888 7889 void MacroAssembler::movptr(Register dst, Register src) { 7890 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 7891 } 7892 7893 void MacroAssembler::movptr(Register dst, Address src) { 7894 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 7895 } 7896 7897 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 7898 void MacroAssembler::movptr(Register dst, intptr_t src) { 7899 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 7900 } 7901 7902 void MacroAssembler::movptr(Address dst, Register src) { 7903 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 7904 } 7905 7906 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 7907 if (reachable(src)) { 7908 Assembler::movsd(dst, as_Address(src)); 7909 } else { 7910 lea(rscratch1, src); 7911 Assembler::movsd(dst, Address(rscratch1, 0)); 7912 } 7913 } 7914 7915 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 7916 if (reachable(src)) { 7917 Assembler::movss(dst, as_Address(src)); 7918 } else { 7919 lea(rscratch1, src); 7920 Assembler::movss(dst, Address(rscratch1, 0)); 7921 } 7922 } 7923 7924 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { 7925 if (reachable(src)) { 7926 Assembler::mulsd(dst, as_Address(src)); 7927 } else { 7928 lea(rscratch1, src); 7929 Assembler::mulsd(dst, Address(rscratch1, 0)); 7930 } 7931 } 7932 7933 void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { 7934 if (reachable(src)) { 7935 Assembler::mulss(dst, as_Address(src)); 7936 } else { 7937 lea(rscratch1, src); 7938 Assembler::mulss(dst, Address(rscratch1, 0)); 7939 } 7940 } 7941 7942 void MacroAssembler::null_check(Register reg, int offset) { 7943 if (needs_explicit_null_check(offset)) { 7944 // provoke OS NULL exception if reg = NULL by 7945 // accessing M[reg] w/o changing any (non-CC) registers 7946 // NOTE: cmpl is plenty here to provoke a segv 7947 cmpptr(rax, Address(reg, 0)); 7948 // Note: should probably use testl(rax, Address(reg, 0)); 7949 // may be shorter code (however, this version of 7950 // testl needs to be implemented first) 7951 } else { 7952 // nothing to do, (later) access of M[reg + offset] 7953 // will provoke OS NULL exception if reg = NULL 7954 } 7955 } 7956 7957 void MacroAssembler::os_breakpoint() { 7958 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 7959 // (e.g., MSVC can't call ps() otherwise) 7960 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 7961 } 7962 7963 void MacroAssembler::pop_CPU_state() { 7964 pop_FPU_state(); 7965 pop_IU_state(); 7966 } 7967 7968 void MacroAssembler::pop_FPU_state() { 7969 NOT_LP64(frstor(Address(rsp, 0));) 7970 LP64_ONLY(fxrstor(Address(rsp, 0));) 7971 addptr(rsp, FPUStateSizeInWords * wordSize); 7972 } 7973 7974 void MacroAssembler::pop_IU_state() { 7975 popa(); 7976 LP64_ONLY(addq(rsp, 8)); 7977 popf(); 7978 } 7979 7980 // Save Integer and Float state 7981 // Warning: Stack must be 16 byte aligned (64bit) 7982 void MacroAssembler::push_CPU_state() { 7983 push_IU_state(); 7984 push_FPU_state(); 7985 } 7986 7987 void MacroAssembler::push_FPU_state() { 7988 subptr(rsp, FPUStateSizeInWords * wordSize); 7989 #ifndef _LP64 7990 fnsave(Address(rsp, 0)); 7991 fwait(); 7992 #else 7993 fxsave(Address(rsp, 0)); 7994 #endif // LP64 7995 } 7996 7997 void MacroAssembler::push_IU_state() { 7998 // Push flags first because pusha kills them 7999 pushf(); 8000 // Make sure rsp stays 16-byte aligned 8001 LP64_ONLY(subq(rsp, 8)); 8002 pusha(); 8003 } 8004 8005 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 8006 // determine java_thread register 8007 if (!java_thread->is_valid()) { 8008 java_thread = rdi; 8009 get_thread(java_thread); 8010 } 8011 // we must set sp to zero to clear frame 8012 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 8013 if (clear_fp) { 8014 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 8015 } 8016 8017 if (clear_pc) 8018 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 8019 8020 } 8021 8022 void MacroAssembler::restore_rax(Register tmp) { 8023 if (tmp == noreg) pop(rax); 8024 else if (tmp != rax) mov(rax, tmp); 8025 } 8026 8027 void MacroAssembler::round_to(Register reg, int modulus) { 8028 addptr(reg, modulus - 1); 8029 andptr(reg, -modulus); 8030 } 8031 8032 void MacroAssembler::save_rax(Register tmp) { 8033 if (tmp == noreg) push(rax); 8034 else if (tmp != rax) mov(tmp, rax); 8035 } 8036 8037 // Write serialization page so VM thread can do a pseudo remote membar. 8038 // We use the current thread pointer to calculate a thread specific 8039 // offset to write to within the page. This minimizes bus traffic 8040 // due to cache line collision. 8041 void MacroAssembler::serialize_memory(Register thread, Register tmp) { 8042 movl(tmp, thread); 8043 shrl(tmp, os::get_serialize_page_shift_count()); 8044 andl(tmp, (os::vm_page_size() - sizeof(int))); 8045 8046 Address index(noreg, tmp, Address::times_1); 8047 ExternalAddress page(os::get_memory_serialize_page()); 8048 8049 // Size of store must match masking code above 8050 movl(as_Address(ArrayAddress(page, index)), tmp); 8051 } 8052 8053 // Calls to C land 8054 // 8055 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded 8056 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 8057 // has to be reset to 0. This is required to allow proper stack traversal. 8058 void MacroAssembler::set_last_Java_frame(Register java_thread, 8059 Register last_java_sp, 8060 Register last_java_fp, 8061 address last_java_pc) { 8062 // determine java_thread register 8063 if (!java_thread->is_valid()) { 8064 java_thread = rdi; 8065 get_thread(java_thread); 8066 } 8067 // determine last_java_sp register 8068 if (!last_java_sp->is_valid()) { 8069 last_java_sp = rsp; 8070 } 8071 8072 // last_java_fp is optional 8073 8074 if (last_java_fp->is_valid()) { 8075 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 8076 } 8077 8078 // last_java_pc is optional 8079 8080 if (last_java_pc != NULL) { 8081 lea(Address(java_thread, 8082 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 8083 InternalAddress(last_java_pc)); 8084 8085 } 8086 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 8087 } 8088 8089 void MacroAssembler::shlptr(Register dst, int imm8) { 8090 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 8091 } 8092 8093 void MacroAssembler::shrptr(Register dst, int imm8) { 8094 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 8095 } 8096 8097 void MacroAssembler::sign_extend_byte(Register reg) { 8098 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 8099 movsbl(reg, reg); // movsxb 8100 } else { 8101 shll(reg, 24); 8102 sarl(reg, 24); 8103 } 8104 } 8105 8106 void MacroAssembler::sign_extend_short(Register reg) { 8107 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 8108 movswl(reg, reg); // movsxw 8109 } else { 8110 shll(reg, 16); 8111 sarl(reg, 16); 8112 } 8113 } 8114 8115 void MacroAssembler::testl(Register dst, AddressLiteral src) { 8116 assert(reachable(src), "Address should be reachable"); 8117 testl(dst, as_Address(src)); 8118 } 8119 8120 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { 8121 if (reachable(src)) { 8122 Assembler::sqrtsd(dst, as_Address(src)); 8123 } else { 8124 lea(rscratch1, src); 8125 Assembler::sqrtsd(dst, Address(rscratch1, 0)); 8126 } 8127 } 8128 8129 void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { 8130 if (reachable(src)) { 8131 Assembler::sqrtss(dst, as_Address(src)); 8132 } else { 8133 lea(rscratch1, src); 8134 Assembler::sqrtss(dst, Address(rscratch1, 0)); 8135 } 8136 } 8137 8138 void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { 8139 if (reachable(src)) { 8140 Assembler::subsd(dst, as_Address(src)); 8141 } else { 8142 lea(rscratch1, src); 8143 Assembler::subsd(dst, Address(rscratch1, 0)); 8144 } 8145 } 8146 8147 void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { 8148 if (reachable(src)) { 8149 Assembler::subss(dst, as_Address(src)); 8150 } else { 8151 lea(rscratch1, src); 8152 Assembler::subss(dst, Address(rscratch1, 0)); 8153 } 8154 } 8155 8156 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 8157 if (reachable(src)) { 8158 Assembler::ucomisd(dst, as_Address(src)); 8159 } else { 8160 lea(rscratch1, src); 8161 Assembler::ucomisd(dst, Address(rscratch1, 0)); 8162 } 8163 } 8164 8165 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 8166 if (reachable(src)) { 8167 Assembler::ucomiss(dst, as_Address(src)); 8168 } else { 8169 lea(rscratch1, src); 8170 Assembler::ucomiss(dst, Address(rscratch1, 0)); 8171 } 8172 } 8173 8174 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 8175 // Used in sign-bit flipping with aligned address. 8176 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 8177 if (reachable(src)) { 8178 Assembler::xorpd(dst, as_Address(src)); 8179 } else { 8180 lea(rscratch1, src); 8181 Assembler::xorpd(dst, Address(rscratch1, 0)); 8182 } 8183 } 8184 8185 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 8186 // Used in sign-bit flipping with aligned address. 8187 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 8188 if (reachable(src)) { 8189 Assembler::xorps(dst, as_Address(src)); 8190 } else { 8191 lea(rscratch1, src); 8192 Assembler::xorps(dst, Address(rscratch1, 0)); 8193 } 8194 } 8195 8196 // AVX 3-operands instructions 8197 8198 void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8199 if (reachable(src)) { 8200 vaddsd(dst, nds, as_Address(src)); 8201 } else { 8202 lea(rscratch1, src); 8203 vaddsd(dst, nds, Address(rscratch1, 0)); 8204 } 8205 } 8206 8207 void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8208 if (reachable(src)) { 8209 vaddss(dst, nds, as_Address(src)); 8210 } else { 8211 lea(rscratch1, src); 8212 vaddss(dst, nds, Address(rscratch1, 0)); 8213 } 8214 } 8215 8216 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8217 if (reachable(src)) { 8218 vandpd(dst, nds, as_Address(src), vector256); 8219 } else { 8220 lea(rscratch1, src); 8221 vandpd(dst, nds, Address(rscratch1, 0), vector256); 8222 } 8223 } 8224 8225 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8226 if (reachable(src)) { 8227 vandps(dst, nds, as_Address(src), vector256); 8228 } else { 8229 lea(rscratch1, src); 8230 vandps(dst, nds, Address(rscratch1, 0), vector256); 8231 } 8232 } 8233 8234 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8235 if (reachable(src)) { 8236 vdivsd(dst, nds, as_Address(src)); 8237 } else { 8238 lea(rscratch1, src); 8239 vdivsd(dst, nds, Address(rscratch1, 0)); 8240 } 8241 } 8242 8243 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8244 if (reachable(src)) { 8245 vdivss(dst, nds, as_Address(src)); 8246 } else { 8247 lea(rscratch1, src); 8248 vdivss(dst, nds, Address(rscratch1, 0)); 8249 } 8250 } 8251 8252 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8253 if (reachable(src)) { 8254 vmulsd(dst, nds, as_Address(src)); 8255 } else { 8256 lea(rscratch1, src); 8257 vmulsd(dst, nds, Address(rscratch1, 0)); 8258 } 8259 } 8260 8261 void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8262 if (reachable(src)) { 8263 vmulss(dst, nds, as_Address(src)); 8264 } else { 8265 lea(rscratch1, src); 8266 vmulss(dst, nds, Address(rscratch1, 0)); 8267 } 8268 } 8269 8270 void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8271 if (reachable(src)) { 8272 vsubsd(dst, nds, as_Address(src)); 8273 } else { 8274 lea(rscratch1, src); 8275 vsubsd(dst, nds, Address(rscratch1, 0)); 8276 } 8277 } 8278 8279 void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8280 if (reachable(src)) { 8281 vsubss(dst, nds, as_Address(src)); 8282 } else { 8283 lea(rscratch1, src); 8284 vsubss(dst, nds, Address(rscratch1, 0)); 8285 } 8286 } 8287 8288 void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8289 if (reachable(src)) { 8290 vxorpd(dst, nds, as_Address(src), vector256); 8291 } else { 8292 lea(rscratch1, src); 8293 vxorpd(dst, nds, Address(rscratch1, 0), vector256); 8294 } 8295 } 8296 8297 void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8298 if (reachable(src)) { 8299 vxorps(dst, nds, as_Address(src), vector256); 8300 } else { 8301 lea(rscratch1, src); 8302 vxorps(dst, nds, Address(rscratch1, 0), vector256); 8303 } 8304 } 8305 8306 8307 ////////////////////////////////////////////////////////////////////////////////// 8308 #ifndef SERIALGC 8309 8310 void MacroAssembler::g1_write_barrier_pre(Register obj, 8311 Register pre_val, 8312 Register thread, 8313 Register tmp, 8314 bool tosca_live, 8315 bool expand_call) { 8316 8317 // If expand_call is true then we expand the call_VM_leaf macro 8318 // directly to skip generating the check by 8319 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 8320 8321 #ifdef _LP64 8322 assert(thread == r15_thread, "must be"); 8323 #endif // _LP64 8324 8325 Label done; 8326 Label runtime; 8327 8328 assert(pre_val != noreg, "check this code"); 8329 8330 if (obj != noreg) { 8331 assert_different_registers(obj, pre_val, tmp); 8332 assert(pre_val != rax, "check this code"); 8333 } 8334 8335 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 8336 PtrQueue::byte_offset_of_active())); 8337 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 8338 PtrQueue::byte_offset_of_index())); 8339 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 8340 PtrQueue::byte_offset_of_buf())); 8341 8342 8343 // Is marking active? 8344 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 8345 cmpl(in_progress, 0); 8346 } else { 8347 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 8348 cmpb(in_progress, 0); 8349 } 8350 jcc(Assembler::equal, done); 8351 8352 // Do we need to load the previous value? 8353 if (obj != noreg) { 8354 load_heap_oop(pre_val, Address(obj, 0)); 8355 } 8356 8357 // Is the previous value null? 8358 cmpptr(pre_val, (int32_t) NULL_WORD); 8359 jcc(Assembler::equal, done); 8360 8361 // Can we store original value in the thread's buffer? 8362 // Is index == 0? 8363 // (The index field is typed as size_t.) 8364 8365 movptr(tmp, index); // tmp := *index_adr 8366 cmpptr(tmp, 0); // tmp == 0? 8367 jcc(Assembler::equal, runtime); // If yes, goto runtime 8368 8369 subptr(tmp, wordSize); // tmp := tmp - wordSize 8370 movptr(index, tmp); // *index_adr := tmp 8371 addptr(tmp, buffer); // tmp := tmp + *buffer_adr 8372 8373 // Record the previous value 8374 movptr(Address(tmp, 0), pre_val); 8375 jmp(done); 8376 8377 bind(runtime); 8378 // save the live input values 8379 if(tosca_live) push(rax); 8380 8381 if (obj != noreg && obj != rax) 8382 push(obj); 8383 8384 if (pre_val != rax) 8385 push(pre_val); 8386 8387 // Calling the runtime using the regular call_VM_leaf mechanism generates 8388 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 8389 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 8390 // 8391 // If we care generating the pre-barrier without a frame (e.g. in the 8392 // intrinsified Reference.get() routine) then ebp might be pointing to 8393 // the caller frame and so this check will most likely fail at runtime. 8394 // 8395 // Expanding the call directly bypasses the generation of the check. 8396 // So when we do not have have a full interpreter frame on the stack 8397 // expand_call should be passed true. 8398 8399 NOT_LP64( push(thread); ) 8400 8401 if (expand_call) { 8402 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 8403 pass_arg1(this, thread); 8404 pass_arg0(this, pre_val); 8405 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 8406 } else { 8407 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 8408 } 8409 8410 NOT_LP64( pop(thread); ) 8411 8412 // save the live input values 8413 if (pre_val != rax) 8414 pop(pre_val); 8415 8416 if (obj != noreg && obj != rax) 8417 pop(obj); 8418 8419 if(tosca_live) pop(rax); 8420 8421 bind(done); 8422 } 8423 8424 void MacroAssembler::g1_write_barrier_post(Register store_addr, 8425 Register new_val, 8426 Register thread, 8427 Register tmp, 8428 Register tmp2) { 8429 #ifdef _LP64 8430 assert(thread == r15_thread, "must be"); 8431 #endif // _LP64 8432 8433 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 8434 PtrQueue::byte_offset_of_index())); 8435 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 8436 PtrQueue::byte_offset_of_buf())); 8437 8438 BarrierSet* bs = Universe::heap()->barrier_set(); 8439 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 8440 Label done; 8441 Label runtime; 8442 8443 // Does store cross heap regions? 8444 8445 movptr(tmp, store_addr); 8446 xorptr(tmp, new_val); 8447 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 8448 jcc(Assembler::equal, done); 8449 8450 // crosses regions, storing NULL? 8451 8452 cmpptr(new_val, (int32_t) NULL_WORD); 8453 jcc(Assembler::equal, done); 8454 8455 // storing region crossing non-NULL, is card already dirty? 8456 8457 ExternalAddress cardtable((address) ct->byte_map_base); 8458 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 8459 #ifdef _LP64 8460 const Register card_addr = tmp; 8461 8462 movq(card_addr, store_addr); 8463 shrq(card_addr, CardTableModRefBS::card_shift); 8464 8465 lea(tmp2, cardtable); 8466 8467 // get the address of the card 8468 addq(card_addr, tmp2); 8469 #else 8470 const Register card_index = tmp; 8471 8472 movl(card_index, store_addr); 8473 shrl(card_index, CardTableModRefBS::card_shift); 8474 8475 Address index(noreg, card_index, Address::times_1); 8476 const Register card_addr = tmp; 8477 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 8478 #endif 8479 cmpb(Address(card_addr, 0), 0); 8480 jcc(Assembler::equal, done); 8481 8482 // storing a region crossing, non-NULL oop, card is clean. 8483 // dirty card and log. 8484 8485 movb(Address(card_addr, 0), 0); 8486 8487 cmpl(queue_index, 0); 8488 jcc(Assembler::equal, runtime); 8489 subl(queue_index, wordSize); 8490 movptr(tmp2, buffer); 8491 #ifdef _LP64 8492 movslq(rscratch1, queue_index); 8493 addq(tmp2, rscratch1); 8494 movq(Address(tmp2, 0), card_addr); 8495 #else 8496 addl(tmp2, queue_index); 8497 movl(Address(tmp2, 0), card_index); 8498 #endif 8499 jmp(done); 8500 8501 bind(runtime); 8502 // save the live input values 8503 push(store_addr); 8504 push(new_val); 8505 #ifdef _LP64 8506 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 8507 #else 8508 push(thread); 8509 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 8510 pop(thread); 8511 #endif 8512 pop(new_val); 8513 pop(store_addr); 8514 8515 bind(done); 8516 } 8517 8518 #endif // SERIALGC 8519 ////////////////////////////////////////////////////////////////////////////////// 8520 8521 8522 void MacroAssembler::store_check(Register obj) { 8523 // Does a store check for the oop in register obj. The content of 8524 // register obj is destroyed afterwards. 8525 store_check_part_1(obj); 8526 store_check_part_2(obj); 8527 } 8528 8529 void MacroAssembler::store_check(Register obj, Address dst) { 8530 store_check(obj); 8531 } 8532 8533 8534 // split the store check operation so that other instructions can be scheduled inbetween 8535 void MacroAssembler::store_check_part_1(Register obj) { 8536 BarrierSet* bs = Universe::heap()->barrier_set(); 8537 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 8538 shrptr(obj, CardTableModRefBS::card_shift); 8539 } 8540 8541 void MacroAssembler::store_check_part_2(Register obj) { 8542 BarrierSet* bs = Universe::heap()->barrier_set(); 8543 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 8544 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 8545 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 8546 8547 // The calculation for byte_map_base is as follows: 8548 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 8549 // So this essentially converts an address to a displacement and 8550 // it will never need to be relocated. On 64bit however the value may be too 8551 // large for a 32bit displacement 8552 8553 intptr_t disp = (intptr_t) ct->byte_map_base; 8554 if (is_simm32(disp)) { 8555 Address cardtable(noreg, obj, Address::times_1, disp); 8556 movb(cardtable, 0); 8557 } else { 8558 // By doing it as an ExternalAddress disp could be converted to a rip-relative 8559 // displacement and done in a single instruction given favorable mapping and 8560 // a smarter version of as_Address. Worst case it is two instructions which 8561 // is no worse off then loading disp into a register and doing as a simple 8562 // Address() as above. 8563 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 8564 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 8565 // in some cases we'll get a single instruction version. 8566 8567 ExternalAddress cardtable((address)disp); 8568 Address index(noreg, obj, Address::times_1); 8569 movb(as_Address(ArrayAddress(cardtable, index)), 0); 8570 } 8571 } 8572 8573 void MacroAssembler::subptr(Register dst, int32_t imm32) { 8574 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 8575 } 8576 8577 // Force generation of a 4 byte immediate value even if it fits into 8bit 8578 void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { 8579 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); 8580 } 8581 8582 void MacroAssembler::subptr(Register dst, Register src) { 8583 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 8584 } 8585 8586 // C++ bool manipulation 8587 void MacroAssembler::testbool(Register dst) { 8588 if(sizeof(bool) == 1) 8589 testb(dst, 0xff); 8590 else if(sizeof(bool) == 2) { 8591 // testw implementation needed for two byte bools 8592 ShouldNotReachHere(); 8593 } else if(sizeof(bool) == 4) 8594 testl(dst, dst); 8595 else 8596 // unsupported 8597 ShouldNotReachHere(); 8598 } 8599 8600 void MacroAssembler::testptr(Register dst, Register src) { 8601 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 8602 } 8603 8604 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 8605 void MacroAssembler::tlab_allocate(Register obj, 8606 Register var_size_in_bytes, 8607 int con_size_in_bytes, 8608 Register t1, 8609 Register t2, 8610 Label& slow_case) { 8611 assert_different_registers(obj, t1, t2); 8612 assert_different_registers(obj, var_size_in_bytes, t1); 8613 Register end = t2; 8614 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 8615 8616 verify_tlab(); 8617 8618 NOT_LP64(get_thread(thread)); 8619 8620 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 8621 if (var_size_in_bytes == noreg) { 8622 lea(end, Address(obj, con_size_in_bytes)); 8623 } else { 8624 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 8625 } 8626 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 8627 jcc(Assembler::above, slow_case); 8628 8629 // update the tlab top pointer 8630 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 8631 8632 // recover var_size_in_bytes if necessary 8633 if (var_size_in_bytes == end) { 8634 subptr(var_size_in_bytes, obj); 8635 } 8636 verify_tlab(); 8637 } 8638 8639 // Preserves rbx, and rdx. 8640 Register MacroAssembler::tlab_refill(Label& retry, 8641 Label& try_eden, 8642 Label& slow_case) { 8643 Register top = rax; 8644 Register t1 = rcx; 8645 Register t2 = rsi; 8646 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 8647 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 8648 Label do_refill, discard_tlab; 8649 8650 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 8651 // No allocation in the shared eden. 8652 jmp(slow_case); 8653 } 8654 8655 NOT_LP64(get_thread(thread_reg)); 8656 8657 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8658 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 8659 8660 // calculate amount of free space 8661 subptr(t1, top); 8662 shrptr(t1, LogHeapWordSize); 8663 8664 // Retain tlab and allocate object in shared space if 8665 // the amount free in the tlab is too large to discard. 8666 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 8667 jcc(Assembler::lessEqual, discard_tlab); 8668 8669 // Retain 8670 // %%% yuck as movptr... 8671 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 8672 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 8673 if (TLABStats) { 8674 // increment number of slow_allocations 8675 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 8676 } 8677 jmp(try_eden); 8678 8679 bind(discard_tlab); 8680 if (TLABStats) { 8681 // increment number of refills 8682 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 8683 // accumulate wastage -- t1 is amount free in tlab 8684 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 8685 } 8686 8687 // if tlab is currently allocated (top or end != null) then 8688 // fill [top, end + alignment_reserve) with array object 8689 testptr(top, top); 8690 jcc(Assembler::zero, do_refill); 8691 8692 // set up the mark word 8693 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 8694 // set the length to the remaining space 8695 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 8696 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 8697 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 8698 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 8699 // set klass to intArrayKlass 8700 // dubious reloc why not an oop reloc? 8701 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); 8702 // store klass last. concurrent gcs assumes klass length is valid if 8703 // klass field is not null. 8704 store_klass(top, t1); 8705 8706 movptr(t1, top); 8707 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 8708 incr_allocated_bytes(thread_reg, t1, 0); 8709 8710 // refill the tlab with an eden allocation 8711 bind(do_refill); 8712 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 8713 shlptr(t1, LogHeapWordSize); 8714 // allocate new tlab, address returned in top 8715 eden_allocate(top, t1, 0, t2, slow_case); 8716 8717 // Check that t1 was preserved in eden_allocate. 8718 #ifdef ASSERT 8719 if (UseTLAB) { 8720 Label ok; 8721 Register tsize = rsi; 8722 assert_different_registers(tsize, thread_reg, t1); 8723 push(tsize); 8724 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 8725 shlptr(tsize, LogHeapWordSize); 8726 cmpptr(t1, tsize); 8727 jcc(Assembler::equal, ok); 8728 stop("assert(t1 != tlab size)"); 8729 should_not_reach_here(); 8730 8731 bind(ok); 8732 pop(tsize); 8733 } 8734 #endif 8735 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 8736 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 8737 addptr(top, t1); 8738 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 8739 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 8740 verify_tlab(); 8741 jmp(retry); 8742 8743 return thread_reg; // for use by caller 8744 } 8745 8746 void MacroAssembler::incr_allocated_bytes(Register thread, 8747 Register var_size_in_bytes, 8748 int con_size_in_bytes, 8749 Register t1) { 8750 if (!thread->is_valid()) { 8751 #ifdef _LP64 8752 thread = r15_thread; 8753 #else 8754 assert(t1->is_valid(), "need temp reg"); 8755 thread = t1; 8756 get_thread(thread); 8757 #endif 8758 } 8759 8760 #ifdef _LP64 8761 if (var_size_in_bytes->is_valid()) { 8762 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 8763 } else { 8764 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 8765 } 8766 #else 8767 if (var_size_in_bytes->is_valid()) { 8768 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 8769 } else { 8770 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 8771 } 8772 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); 8773 #endif 8774 } 8775 8776 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { 8777 pusha(); 8778 8779 // if we are coming from c1, xmm registers may be live 8780 if (UseSSE >= 1) { 8781 subptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); 8782 } 8783 int off = 0; 8784 if (UseSSE == 1) { 8785 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); 8786 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); 8787 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); 8788 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); 8789 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); 8790 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); 8791 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); 8792 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); 8793 } else if (UseSSE >= 2) { 8794 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm0); 8795 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm1); 8796 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm2); 8797 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm3); 8798 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm4); 8799 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm5); 8800 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm6); 8801 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm7); 8802 #ifdef _LP64 8803 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm8); 8804 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm9); 8805 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm10); 8806 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm11); 8807 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm12); 8808 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm13); 8809 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm14); 8810 movdbl(Address(rsp,off++*sizeof(jdouble)),xmm15); 8811 #endif 8812 } 8813 8814 // Preserve registers across runtime call 8815 int incoming_argument_and_return_value_offset = -1; 8816 if (num_fpu_regs_in_use > 1) { 8817 // Must preserve all other FPU regs (could alternatively convert 8818 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash 8819 // FPU state, but can not trust C compiler) 8820 NEEDS_CLEANUP; 8821 // NOTE that in this case we also push the incoming argument(s) to 8822 // the stack and restore it later; we also use this stack slot to 8823 // hold the return value from dsin, dcos etc. 8824 for (int i = 0; i < num_fpu_regs_in_use; i++) { 8825 subptr(rsp, sizeof(jdouble)); 8826 fstp_d(Address(rsp, 0)); 8827 } 8828 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 8829 for (int i = nb_args-1; i >= 0; i--) { 8830 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); 8831 } 8832 } 8833 8834 subptr(rsp, nb_args*sizeof(jdouble)); 8835 for (int i = 0; i < nb_args; i++) { 8836 fstp_d(Address(rsp, i*sizeof(jdouble))); 8837 } 8838 8839 #ifdef _LP64 8840 if (nb_args > 0) { 8841 movdbl(xmm0, Address(rsp, 0)); 8842 } 8843 if (nb_args > 1) { 8844 movdbl(xmm1, Address(rsp, sizeof(jdouble))); 8845 } 8846 assert(nb_args <= 2, "unsupported number of args"); 8847 #endif // _LP64 8848 8849 // NOTE: we must not use call_VM_leaf here because that requires a 8850 // complete interpreter frame in debug mode -- same bug as 4387334 8851 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 8852 // do proper 64bit abi 8853 8854 NEEDS_CLEANUP; 8855 // Need to add stack banging before this runtime call if it needs to 8856 // be taken; however, there is no generic stack banging routine at 8857 // the MacroAssembler level 8858 8859 MacroAssembler::call_VM_leaf_base(runtime_entry, 0); 8860 8861 #ifdef _LP64 8862 movsd(Address(rsp, 0), xmm0); 8863 fld_d(Address(rsp, 0)); 8864 #endif // _LP64 8865 addptr(rsp, sizeof(jdouble) * nb_args); 8866 if (num_fpu_regs_in_use > 1) { 8867 // Must save return value to stack and then restore entire FPU 8868 // stack except incoming arguments 8869 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 8870 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { 8871 fld_d(Address(rsp, 0)); 8872 addptr(rsp, sizeof(jdouble)); 8873 } 8874 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); 8875 addptr(rsp, sizeof(jdouble) * nb_args); 8876 } 8877 8878 off = 0; 8879 if (UseSSE == 1) { 8880 movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); 8881 movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); 8882 movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); 8883 movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); 8884 movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); 8885 movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); 8886 movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); 8887 movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); 8888 } else if (UseSSE >= 2) { 8889 movdbl(xmm0, Address(rsp,off++*sizeof(jdouble))); 8890 movdbl(xmm1, Address(rsp,off++*sizeof(jdouble))); 8891 movdbl(xmm2, Address(rsp,off++*sizeof(jdouble))); 8892 movdbl(xmm3, Address(rsp,off++*sizeof(jdouble))); 8893 movdbl(xmm4, Address(rsp,off++*sizeof(jdouble))); 8894 movdbl(xmm5, Address(rsp,off++*sizeof(jdouble))); 8895 movdbl(xmm6, Address(rsp,off++*sizeof(jdouble))); 8896 movdbl(xmm7, Address(rsp,off++*sizeof(jdouble))); 8897 #ifdef _LP64 8898 movdbl(xmm8, Address(rsp,off++*sizeof(jdouble))); 8899 movdbl(xmm9, Address(rsp,off++*sizeof(jdouble))); 8900 movdbl(xmm10, Address(rsp,off++*sizeof(jdouble))); 8901 movdbl(xmm11, Address(rsp,off++*sizeof(jdouble))); 8902 movdbl(xmm12, Address(rsp,off++*sizeof(jdouble))); 8903 movdbl(xmm13, Address(rsp,off++*sizeof(jdouble))); 8904 movdbl(xmm14, Address(rsp,off++*sizeof(jdouble))); 8905 movdbl(xmm15, Address(rsp,off++*sizeof(jdouble))); 8906 #endif 8907 } 8908 if (UseSSE >= 1) { 8909 addptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); 8910 } 8911 popa(); 8912 } 8913 8914 static const double pi_4 = 0.7853981633974483; 8915 8916 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 8917 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 8918 // was attempted in this code; unfortunately it appears that the 8919 // switch to 80-bit precision and back causes this to be 8920 // unprofitable compared with simply performing a runtime call if 8921 // the argument is out of the (-pi/4, pi/4) range. 8922 8923 Register tmp = noreg; 8924 if (!VM_Version::supports_cmov()) { 8925 // fcmp needs a temporary so preserve rbx, 8926 tmp = rbx; 8927 push(tmp); 8928 } 8929 8930 Label slow_case, done; 8931 8932 ExternalAddress pi4_adr = (address)&pi_4; 8933 if (reachable(pi4_adr)) { 8934 // x ?<= pi/4 8935 fld_d(pi4_adr); 8936 fld_s(1); // Stack: X PI/4 X 8937 fabs(); // Stack: |X| PI/4 X 8938 fcmp(tmp); 8939 jcc(Assembler::above, slow_case); 8940 8941 // fastest case: -pi/4 <= x <= pi/4 8942 switch(trig) { 8943 case 's': 8944 fsin(); 8945 break; 8946 case 'c': 8947 fcos(); 8948 break; 8949 case 't': 8950 ftan(); 8951 break; 8952 default: 8953 assert(false, "bad intrinsic"); 8954 break; 8955 } 8956 jmp(done); 8957 } 8958 8959 // slow case: runtime call 8960 bind(slow_case); 8961 8962 switch(trig) { 8963 case 's': 8964 { 8965 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); 8966 } 8967 break; 8968 case 'c': 8969 { 8970 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); 8971 } 8972 break; 8973 case 't': 8974 { 8975 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); 8976 } 8977 break; 8978 default: 8979 assert(false, "bad intrinsic"); 8980 break; 8981 } 8982 8983 // Come here with result in F-TOS 8984 bind(done); 8985 8986 if (tmp != noreg) { 8987 pop(tmp); 8988 } 8989 } 8990 8991 8992 // Look up the method for a megamorphic invokeinterface call. 8993 // The target method is determined by <intf_klass, itable_index>. 8994 // The receiver klass is in recv_klass. 8995 // On success, the result will be in method_result, and execution falls through. 8996 // On failure, execution transfers to the given label. 8997 void MacroAssembler::lookup_interface_method(Register recv_klass, 8998 Register intf_klass, 8999 RegisterOrConstant itable_index, 9000 Register method_result, 9001 Register scan_temp, 9002 Label& L_no_such_interface) { 9003 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 9004 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 9005 "caller must use same register for non-constant itable index as for method"); 9006 9007 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 9008 int vtable_base = instanceKlass::vtable_start_offset() * wordSize; 9009 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 9010 int scan_step = itableOffsetEntry::size() * wordSize; 9011 int vte_size = vtableEntry::size() * wordSize; 9012 Address::ScaleFactor times_vte_scale = Address::times_ptr; 9013 assert(vte_size == wordSize, "else adjust times_vte_scale"); 9014 9015 movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize)); 9016 9017 // %%% Could store the aligned, prescaled offset in the klassoop. 9018 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 9019 if (HeapWordsPerLong > 1) { 9020 // Round up to align_object_offset boundary 9021 // see code for instanceKlass::start_of_itable! 9022 round_to(scan_temp, BytesPerLong); 9023 } 9024 9025 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 9026 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 9027 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 9028 9029 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 9030 // if (scan->interface() == intf) { 9031 // result = (klass + scan->offset() + itable_index); 9032 // } 9033 // } 9034 Label search, found_method; 9035 9036 for (int peel = 1; peel >= 0; peel--) { 9037 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 9038 cmpptr(intf_klass, method_result); 9039 9040 if (peel) { 9041 jccb(Assembler::equal, found_method); 9042 } else { 9043 jccb(Assembler::notEqual, search); 9044 // (invert the test to fall through to found_method...) 9045 } 9046 9047 if (!peel) break; 9048 9049 bind(search); 9050 9051 // Check that the previous entry is non-null. A null entry means that 9052 // the receiver class doesn't implement the interface, and wasn't the 9053 // same as when the caller was compiled. 9054 testptr(method_result, method_result); 9055 jcc(Assembler::zero, L_no_such_interface); 9056 addptr(scan_temp, scan_step); 9057 } 9058 9059 bind(found_method); 9060 9061 // Got a hit. 9062 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 9063 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 9064 } 9065 9066 9067 void MacroAssembler::check_klass_subtype(Register sub_klass, 9068 Register super_klass, 9069 Register temp_reg, 9070 Label& L_success) { 9071 Label L_failure; 9072 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 9073 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 9074 bind(L_failure); 9075 } 9076 9077 9078 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 9079 Register super_klass, 9080 Register temp_reg, 9081 Label* L_success, 9082 Label* L_failure, 9083 Label* L_slow_path, 9084 RegisterOrConstant super_check_offset) { 9085 assert_different_registers(sub_klass, super_klass, temp_reg); 9086 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 9087 if (super_check_offset.is_register()) { 9088 assert_different_registers(sub_klass, super_klass, 9089 super_check_offset.as_register()); 9090 } else if (must_load_sco) { 9091 assert(temp_reg != noreg, "supply either a temp or a register offset"); 9092 } 9093 9094 Label L_fallthrough; 9095 int label_nulls = 0; 9096 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 9097 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 9098 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 9099 assert(label_nulls <= 1, "at most one NULL in the batch"); 9100 9101 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 9102 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 9103 Address super_check_offset_addr(super_klass, sco_offset); 9104 9105 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 9106 // range of a jccb. If this routine grows larger, reconsider at 9107 // least some of these. 9108 #define local_jcc(assembler_cond, label) \ 9109 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 9110 else jcc( assembler_cond, label) /*omit semi*/ 9111 9112 // Hacked jmp, which may only be used just before L_fallthrough. 9113 #define final_jmp(label) \ 9114 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 9115 else jmp(label) /*omit semi*/ 9116 9117 // If the pointers are equal, we are done (e.g., String[] elements). 9118 // This self-check enables sharing of secondary supertype arrays among 9119 // non-primary types such as array-of-interface. Otherwise, each such 9120 // type would need its own customized SSA. 9121 // We move this check to the front of the fast path because many 9122 // type checks are in fact trivially successful in this manner, 9123 // so we get a nicely predicted branch right at the start of the check. 9124 cmpptr(sub_klass, super_klass); 9125 local_jcc(Assembler::equal, *L_success); 9126 9127 // Check the supertype display: 9128 if (must_load_sco) { 9129 // Positive movl does right thing on LP64. 9130 movl(temp_reg, super_check_offset_addr); 9131 super_check_offset = RegisterOrConstant(temp_reg); 9132 } 9133 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 9134 cmpptr(super_klass, super_check_addr); // load displayed supertype 9135 9136 // This check has worked decisively for primary supers. 9137 // Secondary supers are sought in the super_cache ('super_cache_addr'). 9138 // (Secondary supers are interfaces and very deeply nested subtypes.) 9139 // This works in the same check above because of a tricky aliasing 9140 // between the super_cache and the primary super display elements. 9141 // (The 'super_check_addr' can address either, as the case requires.) 9142 // Note that the cache is updated below if it does not help us find 9143 // what we need immediately. 9144 // So if it was a primary super, we can just fail immediately. 9145 // Otherwise, it's the slow path for us (no success at this point). 9146 9147 if (super_check_offset.is_register()) { 9148 local_jcc(Assembler::equal, *L_success); 9149 cmpl(super_check_offset.as_register(), sc_offset); 9150 if (L_failure == &L_fallthrough) { 9151 local_jcc(Assembler::equal, *L_slow_path); 9152 } else { 9153 local_jcc(Assembler::notEqual, *L_failure); 9154 final_jmp(*L_slow_path); 9155 } 9156 } else if (super_check_offset.as_constant() == sc_offset) { 9157 // Need a slow path; fast failure is impossible. 9158 if (L_slow_path == &L_fallthrough) { 9159 local_jcc(Assembler::equal, *L_success); 9160 } else { 9161 local_jcc(Assembler::notEqual, *L_slow_path); 9162 final_jmp(*L_success); 9163 } 9164 } else { 9165 // No slow path; it's a fast decision. 9166 if (L_failure == &L_fallthrough) { 9167 local_jcc(Assembler::equal, *L_success); 9168 } else { 9169 local_jcc(Assembler::notEqual, *L_failure); 9170 final_jmp(*L_success); 9171 } 9172 } 9173 9174 bind(L_fallthrough); 9175 9176 #undef local_jcc 9177 #undef final_jmp 9178 } 9179 9180 9181 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 9182 Register super_klass, 9183 Register temp_reg, 9184 Register temp2_reg, 9185 Label* L_success, 9186 Label* L_failure, 9187 bool set_cond_codes) { 9188 assert_different_registers(sub_klass, super_klass, temp_reg); 9189 if (temp2_reg != noreg) 9190 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 9191 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 9192 9193 Label L_fallthrough; 9194 int label_nulls = 0; 9195 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 9196 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 9197 assert(label_nulls <= 1, "at most one NULL in the batch"); 9198 9199 // a couple of useful fields in sub_klass: 9200 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 9201 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 9202 Address secondary_supers_addr(sub_klass, ss_offset); 9203 Address super_cache_addr( sub_klass, sc_offset); 9204 9205 // Do a linear scan of the secondary super-klass chain. 9206 // This code is rarely used, so simplicity is a virtue here. 9207 // The repne_scan instruction uses fixed registers, which we must spill. 9208 // Don't worry too much about pre-existing connections with the input regs. 9209 9210 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 9211 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 9212 9213 // Get super_klass value into rax (even if it was in rdi or rcx). 9214 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 9215 if (super_klass != rax || UseCompressedOops) { 9216 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 9217 mov(rax, super_klass); 9218 } 9219 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 9220 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 9221 9222 #ifndef PRODUCT 9223 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 9224 ExternalAddress pst_counter_addr((address) pst_counter); 9225 NOT_LP64( incrementl(pst_counter_addr) ); 9226 LP64_ONLY( lea(rcx, pst_counter_addr) ); 9227 LP64_ONLY( incrementl(Address(rcx, 0)) ); 9228 #endif //PRODUCT 9229 9230 // We will consult the secondary-super array. 9231 movptr(rdi, secondary_supers_addr); 9232 // Load the array length. (Positive movl does right thing on LP64.) 9233 movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); 9234 // Skip to start of data. 9235 addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); 9236 9237 // Scan RCX words at [RDI] for an occurrence of RAX. 9238 // Set NZ/Z based on last compare. 9239 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 9240 // not change flags (only scas instruction which is repeated sets flags). 9241 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 9242 #ifdef _LP64 9243 // This part is tricky, as values in supers array could be 32 or 64 bit wide 9244 // and we store values in objArrays always encoded, thus we need to encode 9245 // the value of rax before repne. Note that rax is dead after the repne. 9246 if (UseCompressedOops) { 9247 encode_heap_oop_not_null(rax); // Changes flags. 9248 // The superclass is never null; it would be a basic system error if a null 9249 // pointer were to sneak in here. Note that we have already loaded the 9250 // Klass::super_check_offset from the super_klass in the fast path, 9251 // so if there is a null in that register, we are already in the afterlife. 9252 testl(rax,rax); // Set Z = 0 9253 repne_scanl(); 9254 } else 9255 #endif // _LP64 9256 { 9257 testptr(rax,rax); // Set Z = 0 9258 repne_scan(); 9259 } 9260 // Unspill the temp. registers: 9261 if (pushed_rdi) pop(rdi); 9262 if (pushed_rcx) pop(rcx); 9263 if (pushed_rax) pop(rax); 9264 9265 if (set_cond_codes) { 9266 // Special hack for the AD files: rdi is guaranteed non-zero. 9267 assert(!pushed_rdi, "rdi must be left non-NULL"); 9268 // Also, the condition codes are properly set Z/NZ on succeed/failure. 9269 } 9270 9271 if (L_failure == &L_fallthrough) 9272 jccb(Assembler::notEqual, *L_failure); 9273 else jcc(Assembler::notEqual, *L_failure); 9274 9275 // Success. Cache the super we found and proceed in triumph. 9276 movptr(super_cache_addr, super_klass); 9277 9278 if (L_success != &L_fallthrough) { 9279 jmp(*L_success); 9280 } 9281 9282 #undef IS_A_TEMP 9283 9284 bind(L_fallthrough); 9285 } 9286 9287 9288 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 9289 if (VM_Version::supports_cmov()) { 9290 cmovl(cc, dst, src); 9291 } else { 9292 Label L; 9293 jccb(negate_condition(cc), L); 9294 movl(dst, src); 9295 bind(L); 9296 } 9297 } 9298 9299 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { 9300 if (VM_Version::supports_cmov()) { 9301 cmovl(cc, dst, src); 9302 } else { 9303 Label L; 9304 jccb(negate_condition(cc), L); 9305 movl(dst, src); 9306 bind(L); 9307 } 9308 } 9309 9310 void MacroAssembler::verify_oop(Register reg, const char* s) { 9311 if (!VerifyOops) return; 9312 9313 // Pass register number to verify_oop_subroutine 9314 char* b = new char[strlen(s) + 50]; 9315 sprintf(b, "verify_oop: %s: %s", reg->name(), s); 9316 #ifdef _LP64 9317 push(rscratch1); // save r10, trashed by movptr() 9318 #endif 9319 push(rax); // save rax, 9320 push(reg); // pass register argument 9321 ExternalAddress buffer((address) b); 9322 // avoid using pushptr, as it modifies scratch registers 9323 // and our contract is not to modify anything 9324 movptr(rax, buffer.addr()); 9325 push(rax); 9326 // call indirectly to solve generation ordering problem 9327 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 9328 call(rax); 9329 // Caller pops the arguments (oop, message) and restores rax, r10 9330 } 9331 9332 9333 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 9334 Register tmp, 9335 int offset) { 9336 intptr_t value = *delayed_value_addr; 9337 if (value != 0) 9338 return RegisterOrConstant(value + offset); 9339 9340 // load indirectly to solve generation ordering problem 9341 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 9342 9343 #ifdef ASSERT 9344 { Label L; 9345 testptr(tmp, tmp); 9346 if (WizardMode) { 9347 jcc(Assembler::notZero, L); 9348 char* buf = new char[40]; 9349 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 9350 stop(buf); 9351 } else { 9352 jccb(Assembler::notZero, L); 9353 hlt(); 9354 } 9355 bind(L); 9356 } 9357 #endif 9358 9359 if (offset != 0) 9360 addptr(tmp, offset); 9361 9362 return RegisterOrConstant(tmp); 9363 } 9364 9365 9366 // registers on entry: 9367 // - rax ('check' register): required MethodType 9368 // - rcx: method handle 9369 // - rdx, rsi, or ?: killable temp 9370 void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg, 9371 Register temp_reg, 9372 Label& wrong_method_type) { 9373 Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)); 9374 // compare method type against that of the receiver 9375 if (UseCompressedOops) { 9376 load_heap_oop(temp_reg, type_addr); 9377 cmpptr(mtype_reg, temp_reg); 9378 } else { 9379 cmpptr(mtype_reg, type_addr); 9380 } 9381 jcc(Assembler::notEqual, wrong_method_type); 9382 } 9383 9384 9385 // A method handle has a "vmslots" field which gives the size of its 9386 // argument list in JVM stack slots. This field is either located directly 9387 // in every method handle, or else is indirectly accessed through the 9388 // method handle's MethodType. This macro hides the distinction. 9389 void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg, 9390 Register temp_reg) { 9391 assert_different_registers(vmslots_reg, mh_reg, temp_reg); 9392 // load mh.type.form.vmslots 9393 Register temp2_reg = vmslots_reg; 9394 load_heap_oop(temp2_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg))); 9395 load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg))); 9396 movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg))); 9397 } 9398 9399 9400 // registers on entry: 9401 // - rcx: method handle 9402 // - rdx: killable temp (interpreted only) 9403 // - rax: killable temp (compiled only) 9404 void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) { 9405 assert(mh_reg == rcx, "caller must put MH object in rcx"); 9406 assert_different_registers(mh_reg, temp_reg); 9407 9408 // pick out the interpreted side of the handler 9409 // NOTE: vmentry is not an oop! 9410 movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg))); 9411 9412 // off we go... 9413 jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes())); 9414 9415 // for the various stubs which take control at this point, 9416 // see MethodHandles::generate_method_handle_stub 9417 } 9418 9419 9420 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 9421 int extra_slot_offset) { 9422 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 9423 int stackElementSize = Interpreter::stackElementSize; 9424 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 9425 #ifdef ASSERT 9426 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 9427 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 9428 #endif 9429 Register scale_reg = noreg; 9430 Address::ScaleFactor scale_factor = Address::no_scale; 9431 if (arg_slot.is_constant()) { 9432 offset += arg_slot.as_constant() * stackElementSize; 9433 } else { 9434 scale_reg = arg_slot.as_register(); 9435 scale_factor = Address::times(stackElementSize); 9436 } 9437 offset += wordSize; // return PC is on stack 9438 return Address(rsp, scale_reg, scale_factor, offset); 9439 } 9440 9441 9442 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 9443 if (!VerifyOops) return; 9444 9445 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 9446 // Pass register number to verify_oop_subroutine 9447 char* b = new char[strlen(s) + 50]; 9448 sprintf(b, "verify_oop_addr: %s", s); 9449 9450 #ifdef _LP64 9451 push(rscratch1); // save r10, trashed by movptr() 9452 #endif 9453 push(rax); // save rax, 9454 // addr may contain rsp so we will have to adjust it based on the push 9455 // we just did (and on 64 bit we do two pushes) 9456 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 9457 // stores rax into addr which is backwards of what was intended. 9458 if (addr.uses(rsp)) { 9459 lea(rax, addr); 9460 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); 9461 } else { 9462 pushptr(addr); 9463 } 9464 9465 ExternalAddress buffer((address) b); 9466 // pass msg argument 9467 // avoid using pushptr, as it modifies scratch registers 9468 // and our contract is not to modify anything 9469 movptr(rax, buffer.addr()); 9470 push(rax); 9471 9472 // call indirectly to solve generation ordering problem 9473 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 9474 call(rax); 9475 // Caller pops the arguments (addr, message) and restores rax, r10. 9476 } 9477 9478 void MacroAssembler::verify_tlab() { 9479 #ifdef ASSERT 9480 if (UseTLAB && VerifyOops) { 9481 Label next, ok; 9482 Register t1 = rsi; 9483 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 9484 9485 push(t1); 9486 NOT_LP64(push(thread_reg)); 9487 NOT_LP64(get_thread(thread_reg)); 9488 9489 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 9490 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 9491 jcc(Assembler::aboveEqual, next); 9492 stop("assert(top >= start)"); 9493 should_not_reach_here(); 9494 9495 bind(next); 9496 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 9497 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 9498 jcc(Assembler::aboveEqual, ok); 9499 stop("assert(top <= end)"); 9500 should_not_reach_here(); 9501 9502 bind(ok); 9503 NOT_LP64(pop(thread_reg)); 9504 pop(t1); 9505 } 9506 #endif 9507 } 9508 9509 class ControlWord { 9510 public: 9511 int32_t _value; 9512 9513 int rounding_control() const { return (_value >> 10) & 3 ; } 9514 int precision_control() const { return (_value >> 8) & 3 ; } 9515 bool precision() const { return ((_value >> 5) & 1) != 0; } 9516 bool underflow() const { return ((_value >> 4) & 1) != 0; } 9517 bool overflow() const { return ((_value >> 3) & 1) != 0; } 9518 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 9519 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 9520 bool invalid() const { return ((_value >> 0) & 1) != 0; } 9521 9522 void print() const { 9523 // rounding control 9524 const char* rc; 9525 switch (rounding_control()) { 9526 case 0: rc = "round near"; break; 9527 case 1: rc = "round down"; break; 9528 case 2: rc = "round up "; break; 9529 case 3: rc = "chop "; break; 9530 }; 9531 // precision control 9532 const char* pc; 9533 switch (precision_control()) { 9534 case 0: pc = "24 bits "; break; 9535 case 1: pc = "reserved"; break; 9536 case 2: pc = "53 bits "; break; 9537 case 3: pc = "64 bits "; break; 9538 }; 9539 // flags 9540 char f[9]; 9541 f[0] = ' '; 9542 f[1] = ' '; 9543 f[2] = (precision ()) ? 'P' : 'p'; 9544 f[3] = (underflow ()) ? 'U' : 'u'; 9545 f[4] = (overflow ()) ? 'O' : 'o'; 9546 f[5] = (zero_divide ()) ? 'Z' : 'z'; 9547 f[6] = (denormalized()) ? 'D' : 'd'; 9548 f[7] = (invalid ()) ? 'I' : 'i'; 9549 f[8] = '\x0'; 9550 // output 9551 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 9552 } 9553 9554 }; 9555 9556 class StatusWord { 9557 public: 9558 int32_t _value; 9559 9560 bool busy() const { return ((_value >> 15) & 1) != 0; } 9561 bool C3() const { return ((_value >> 14) & 1) != 0; } 9562 bool C2() const { return ((_value >> 10) & 1) != 0; } 9563 bool C1() const { return ((_value >> 9) & 1) != 0; } 9564 bool C0() const { return ((_value >> 8) & 1) != 0; } 9565 int top() const { return (_value >> 11) & 7 ; } 9566 bool error_status() const { return ((_value >> 7) & 1) != 0; } 9567 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 9568 bool precision() const { return ((_value >> 5) & 1) != 0; } 9569 bool underflow() const { return ((_value >> 4) & 1) != 0; } 9570 bool overflow() const { return ((_value >> 3) & 1) != 0; } 9571 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 9572 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 9573 bool invalid() const { return ((_value >> 0) & 1) != 0; } 9574 9575 void print() const { 9576 // condition codes 9577 char c[5]; 9578 c[0] = (C3()) ? '3' : '-'; 9579 c[1] = (C2()) ? '2' : '-'; 9580 c[2] = (C1()) ? '1' : '-'; 9581 c[3] = (C0()) ? '0' : '-'; 9582 c[4] = '\x0'; 9583 // flags 9584 char f[9]; 9585 f[0] = (error_status()) ? 'E' : '-'; 9586 f[1] = (stack_fault ()) ? 'S' : '-'; 9587 f[2] = (precision ()) ? 'P' : '-'; 9588 f[3] = (underflow ()) ? 'U' : '-'; 9589 f[4] = (overflow ()) ? 'O' : '-'; 9590 f[5] = (zero_divide ()) ? 'Z' : '-'; 9591 f[6] = (denormalized()) ? 'D' : '-'; 9592 f[7] = (invalid ()) ? 'I' : '-'; 9593 f[8] = '\x0'; 9594 // output 9595 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 9596 } 9597 9598 }; 9599 9600 class TagWord { 9601 public: 9602 int32_t _value; 9603 9604 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 9605 9606 void print() const { 9607 printf("%04x", _value & 0xFFFF); 9608 } 9609 9610 }; 9611 9612 class FPU_Register { 9613 public: 9614 int32_t _m0; 9615 int32_t _m1; 9616 int16_t _ex; 9617 9618 bool is_indefinite() const { 9619 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 9620 } 9621 9622 void print() const { 9623 char sign = (_ex < 0) ? '-' : '+'; 9624 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 9625 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 9626 }; 9627 9628 }; 9629 9630 class FPU_State { 9631 public: 9632 enum { 9633 register_size = 10, 9634 number_of_registers = 8, 9635 register_mask = 7 9636 }; 9637 9638 ControlWord _control_word; 9639 StatusWord _status_word; 9640 TagWord _tag_word; 9641 int32_t _error_offset; 9642 int32_t _error_selector; 9643 int32_t _data_offset; 9644 int32_t _data_selector; 9645 int8_t _register[register_size * number_of_registers]; 9646 9647 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 9648 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 9649 9650 const char* tag_as_string(int tag) const { 9651 switch (tag) { 9652 case 0: return "valid"; 9653 case 1: return "zero"; 9654 case 2: return "special"; 9655 case 3: return "empty"; 9656 } 9657 ShouldNotReachHere(); 9658 return NULL; 9659 } 9660 9661 void print() const { 9662 // print computation registers 9663 { int t = _status_word.top(); 9664 for (int i = 0; i < number_of_registers; i++) { 9665 int j = (i - t) & register_mask; 9666 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 9667 st(j)->print(); 9668 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 9669 } 9670 } 9671 printf("\n"); 9672 // print control registers 9673 printf("ctrl = "); _control_word.print(); printf("\n"); 9674 printf("stat = "); _status_word .print(); printf("\n"); 9675 printf("tags = "); _tag_word .print(); printf("\n"); 9676 } 9677 9678 }; 9679 9680 class Flag_Register { 9681 public: 9682 int32_t _value; 9683 9684 bool overflow() const { return ((_value >> 11) & 1) != 0; } 9685 bool direction() const { return ((_value >> 10) & 1) != 0; } 9686 bool sign() const { return ((_value >> 7) & 1) != 0; } 9687 bool zero() const { return ((_value >> 6) & 1) != 0; } 9688 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 9689 bool parity() const { return ((_value >> 2) & 1) != 0; } 9690 bool carry() const { return ((_value >> 0) & 1) != 0; } 9691 9692 void print() const { 9693 // flags 9694 char f[8]; 9695 f[0] = (overflow ()) ? 'O' : '-'; 9696 f[1] = (direction ()) ? 'D' : '-'; 9697 f[2] = (sign ()) ? 'S' : '-'; 9698 f[3] = (zero ()) ? 'Z' : '-'; 9699 f[4] = (auxiliary_carry()) ? 'A' : '-'; 9700 f[5] = (parity ()) ? 'P' : '-'; 9701 f[6] = (carry ()) ? 'C' : '-'; 9702 f[7] = '\x0'; 9703 // output 9704 printf("%08x flags = %s", _value, f); 9705 } 9706 9707 }; 9708 9709 class IU_Register { 9710 public: 9711 int32_t _value; 9712 9713 void print() const { 9714 printf("%08x %11d", _value, _value); 9715 } 9716 9717 }; 9718 9719 class IU_State { 9720 public: 9721 Flag_Register _eflags; 9722 IU_Register _rdi; 9723 IU_Register _rsi; 9724 IU_Register _rbp; 9725 IU_Register _rsp; 9726 IU_Register _rbx; 9727 IU_Register _rdx; 9728 IU_Register _rcx; 9729 IU_Register _rax; 9730 9731 void print() const { 9732 // computation registers 9733 printf("rax, = "); _rax.print(); printf("\n"); 9734 printf("rbx, = "); _rbx.print(); printf("\n"); 9735 printf("rcx = "); _rcx.print(); printf("\n"); 9736 printf("rdx = "); _rdx.print(); printf("\n"); 9737 printf("rdi = "); _rdi.print(); printf("\n"); 9738 printf("rsi = "); _rsi.print(); printf("\n"); 9739 printf("rbp, = "); _rbp.print(); printf("\n"); 9740 printf("rsp = "); _rsp.print(); printf("\n"); 9741 printf("\n"); 9742 // control registers 9743 printf("flgs = "); _eflags.print(); printf("\n"); 9744 } 9745 }; 9746 9747 9748 class CPU_State { 9749 public: 9750 FPU_State _fpu_state; 9751 IU_State _iu_state; 9752 9753 void print() const { 9754 printf("--------------------------------------------------\n"); 9755 _iu_state .print(); 9756 printf("\n"); 9757 _fpu_state.print(); 9758 printf("--------------------------------------------------\n"); 9759 } 9760 9761 }; 9762 9763 9764 static void _print_CPU_state(CPU_State* state) { 9765 state->print(); 9766 }; 9767 9768 9769 void MacroAssembler::print_CPU_state() { 9770 push_CPU_state(); 9771 push(rsp); // pass CPU state 9772 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 9773 addptr(rsp, wordSize); // discard argument 9774 pop_CPU_state(); 9775 } 9776 9777 9778 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 9779 static int counter = 0; 9780 FPU_State* fs = &state->_fpu_state; 9781 counter++; 9782 // For leaf calls, only verify that the top few elements remain empty. 9783 // We only need 1 empty at the top for C2 code. 9784 if( stack_depth < 0 ) { 9785 if( fs->tag_for_st(7) != 3 ) { 9786 printf("FPR7 not empty\n"); 9787 state->print(); 9788 assert(false, "error"); 9789 return false; 9790 } 9791 return true; // All other stack states do not matter 9792 } 9793 9794 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 9795 "bad FPU control word"); 9796 9797 // compute stack depth 9798 int i = 0; 9799 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 9800 int d = i; 9801 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 9802 // verify findings 9803 if (i != FPU_State::number_of_registers) { 9804 // stack not contiguous 9805 printf("%s: stack not contiguous at ST%d\n", s, i); 9806 state->print(); 9807 assert(false, "error"); 9808 return false; 9809 } 9810 // check if computed stack depth corresponds to expected stack depth 9811 if (stack_depth < 0) { 9812 // expected stack depth is -stack_depth or less 9813 if (d > -stack_depth) { 9814 // too many elements on the stack 9815 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 9816 state->print(); 9817 assert(false, "error"); 9818 return false; 9819 } 9820 } else { 9821 // expected stack depth is stack_depth 9822 if (d != stack_depth) { 9823 // wrong stack depth 9824 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 9825 state->print(); 9826 assert(false, "error"); 9827 return false; 9828 } 9829 } 9830 // everything is cool 9831 return true; 9832 } 9833 9834 9835 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 9836 if (!VerifyFPU) return; 9837 push_CPU_state(); 9838 push(rsp); // pass CPU state 9839 ExternalAddress msg((address) s); 9840 // pass message string s 9841 pushptr(msg.addr()); 9842 push(stack_depth); // pass stack depth 9843 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 9844 addptr(rsp, 3 * wordSize); // discard arguments 9845 // check for error 9846 { Label L; 9847 testl(rax, rax); 9848 jcc(Assembler::notZero, L); 9849 int3(); // break if error condition 9850 bind(L); 9851 } 9852 pop_CPU_state(); 9853 } 9854 9855 void MacroAssembler::load_klass(Register dst, Register src) { 9856 #ifdef _LP64 9857 if (UseCompressedOops) { 9858 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9859 decode_heap_oop_not_null(dst); 9860 } else 9861 #endif 9862 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9863 } 9864 9865 void MacroAssembler::load_prototype_header(Register dst, Register src) { 9866 #ifdef _LP64 9867 if (UseCompressedOops) { 9868 assert (Universe::heap() != NULL, "java heap should be initialized"); 9869 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9870 if (Universe::narrow_oop_shift() != 0) { 9871 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9872 if (LogMinObjAlignmentInBytes == Address::times_8) { 9873 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset())); 9874 } else { 9875 // OK to use shift since we don't need to preserve flags. 9876 shlq(dst, LogMinObjAlignmentInBytes); 9877 movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset())); 9878 } 9879 } else { 9880 movq(dst, Address(dst, Klass::prototype_header_offset())); 9881 } 9882 } else 9883 #endif 9884 { 9885 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 9886 movptr(dst, Address(dst, Klass::prototype_header_offset())); 9887 } 9888 } 9889 9890 void MacroAssembler::store_klass(Register dst, Register src) { 9891 #ifdef _LP64 9892 if (UseCompressedOops) { 9893 encode_heap_oop_not_null(src); 9894 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 9895 } else 9896 #endif 9897 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 9898 } 9899 9900 void MacroAssembler::load_heap_oop(Register dst, Address src) { 9901 #ifdef _LP64 9902 if (UseCompressedOops) { 9903 movl(dst, src); 9904 decode_heap_oop(dst); 9905 } else 9906 #endif 9907 movptr(dst, src); 9908 } 9909 9910 // Doesn't do verfication, generates fixed size code 9911 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { 9912 #ifdef _LP64 9913 if (UseCompressedOops) { 9914 movl(dst, src); 9915 decode_heap_oop_not_null(dst); 9916 } else 9917 #endif 9918 movptr(dst, src); 9919 } 9920 9921 void MacroAssembler::store_heap_oop(Address dst, Register src) { 9922 #ifdef _LP64 9923 if (UseCompressedOops) { 9924 assert(!dst.uses(src), "not enough registers"); 9925 encode_heap_oop(src); 9926 movl(dst, src); 9927 } else 9928 #endif 9929 movptr(dst, src); 9930 } 9931 9932 // Used for storing NULLs. 9933 void MacroAssembler::store_heap_oop_null(Address dst) { 9934 #ifdef _LP64 9935 if (UseCompressedOops) { 9936 movl(dst, (int32_t)NULL_WORD); 9937 } else { 9938 movslq(dst, (int32_t)NULL_WORD); 9939 } 9940 #else 9941 movl(dst, (int32_t)NULL_WORD); 9942 #endif 9943 } 9944 9945 #ifdef _LP64 9946 void MacroAssembler::store_klass_gap(Register dst, Register src) { 9947 if (UseCompressedOops) { 9948 // Store to klass gap in destination 9949 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 9950 } 9951 } 9952 9953 #ifdef ASSERT 9954 void MacroAssembler::verify_heapbase(const char* msg) { 9955 assert (UseCompressedOops, "should be compressed"); 9956 assert (Universe::heap() != NULL, "java heap should be initialized"); 9957 if (CheckCompressedOops) { 9958 Label ok; 9959 push(rscratch1); // cmpptr trashes rscratch1 9960 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 9961 jcc(Assembler::equal, ok); 9962 stop(msg); 9963 bind(ok); 9964 pop(rscratch1); 9965 } 9966 } 9967 #endif 9968 9969 // Algorithm must match oop.inline.hpp encode_heap_oop. 9970 void MacroAssembler::encode_heap_oop(Register r) { 9971 #ifdef ASSERT 9972 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 9973 #endif 9974 verify_oop(r, "broken oop in encode_heap_oop"); 9975 if (Universe::narrow_oop_base() == NULL) { 9976 if (Universe::narrow_oop_shift() != 0) { 9977 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 9978 shrq(r, LogMinObjAlignmentInBytes); 9979 } 9980 return; 9981 } 9982 testq(r, r); 9983 cmovq(Assembler::equal, r, r12_heapbase); 9984 subq(r, r12_heapbase); 9985 shrq(r, LogMinObjAlignmentInBytes); 9986 } 9987 9988 void MacroAssembler::encode_heap_oop_not_null(Register r) { 9989 #ifdef ASSERT 9990 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 9991 if (CheckCompressedOops) { 9992 Label ok; 9993 testq(r, r); 9994 jcc(Assembler::notEqual, ok); 9995 stop("null oop passed to encode_heap_oop_not_null"); 9996 bind(ok); 9997 } 9998 #endif 9999 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 10000 if (Universe::narrow_oop_base() != NULL) { 10001 subq(r, r12_heapbase); 10002 } 10003 if (Universe::narrow_oop_shift() != 0) { 10004 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10005 shrq(r, LogMinObjAlignmentInBytes); 10006 } 10007 } 10008 10009 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 10010 #ifdef ASSERT 10011 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 10012 if (CheckCompressedOops) { 10013 Label ok; 10014 testq(src, src); 10015 jcc(Assembler::notEqual, ok); 10016 stop("null oop passed to encode_heap_oop_not_null2"); 10017 bind(ok); 10018 } 10019 #endif 10020 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 10021 if (dst != src) { 10022 movq(dst, src); 10023 } 10024 if (Universe::narrow_oop_base() != NULL) { 10025 subq(dst, r12_heapbase); 10026 } 10027 if (Universe::narrow_oop_shift() != 0) { 10028 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10029 shrq(dst, LogMinObjAlignmentInBytes); 10030 } 10031 } 10032 10033 void MacroAssembler::decode_heap_oop(Register r) { 10034 #ifdef ASSERT 10035 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 10036 #endif 10037 if (Universe::narrow_oop_base() == NULL) { 10038 if (Universe::narrow_oop_shift() != 0) { 10039 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10040 shlq(r, LogMinObjAlignmentInBytes); 10041 } 10042 } else { 10043 Label done; 10044 shlq(r, LogMinObjAlignmentInBytes); 10045 jccb(Assembler::equal, done); 10046 addq(r, r12_heapbase); 10047 bind(done); 10048 } 10049 verify_oop(r, "broken oop in decode_heap_oop"); 10050 } 10051 10052 void MacroAssembler::decode_heap_oop_not_null(Register r) { 10053 // Note: it will change flags 10054 assert (UseCompressedOops, "should only be used for compressed headers"); 10055 assert (Universe::heap() != NULL, "java heap should be initialized"); 10056 // Cannot assert, unverified entry point counts instructions (see .ad file) 10057 // vtableStubs also counts instructions in pd_code_size_limit. 10058 // Also do not verify_oop as this is called by verify_oop. 10059 if (Universe::narrow_oop_shift() != 0) { 10060 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10061 shlq(r, LogMinObjAlignmentInBytes); 10062 if (Universe::narrow_oop_base() != NULL) { 10063 addq(r, r12_heapbase); 10064 } 10065 } else { 10066 assert (Universe::narrow_oop_base() == NULL, "sanity"); 10067 } 10068 } 10069 10070 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 10071 // Note: it will change flags 10072 assert (UseCompressedOops, "should only be used for compressed headers"); 10073 assert (Universe::heap() != NULL, "java heap should be initialized"); 10074 // Cannot assert, unverified entry point counts instructions (see .ad file) 10075 // vtableStubs also counts instructions in pd_code_size_limit. 10076 // Also do not verify_oop as this is called by verify_oop. 10077 if (Universe::narrow_oop_shift() != 0) { 10078 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10079 if (LogMinObjAlignmentInBytes == Address::times_8) { 10080 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 10081 } else { 10082 if (dst != src) { 10083 movq(dst, src); 10084 } 10085 shlq(dst, LogMinObjAlignmentInBytes); 10086 if (Universe::narrow_oop_base() != NULL) { 10087 addq(dst, r12_heapbase); 10088 } 10089 } 10090 } else { 10091 assert (Universe::narrow_oop_base() == NULL, "sanity"); 10092 if (dst != src) { 10093 movq(dst, src); 10094 } 10095 } 10096 } 10097 10098 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 10099 assert (UseCompressedOops, "should only be used for compressed headers"); 10100 assert (Universe::heap() != NULL, "java heap should be initialized"); 10101 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10102 int oop_index = oop_recorder()->find_index(obj); 10103 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10104 mov_narrow_oop(dst, oop_index, rspec); 10105 } 10106 10107 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 10108 assert (UseCompressedOops, "should only be used for compressed headers"); 10109 assert (Universe::heap() != NULL, "java heap should be initialized"); 10110 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10111 int oop_index = oop_recorder()->find_index(obj); 10112 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10113 mov_narrow_oop(dst, oop_index, rspec); 10114 } 10115 10116 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 10117 assert (UseCompressedOops, "should only be used for compressed headers"); 10118 assert (Universe::heap() != NULL, "java heap should be initialized"); 10119 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10120 int oop_index = oop_recorder()->find_index(obj); 10121 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10122 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 10123 } 10124 10125 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 10126 assert (UseCompressedOops, "should only be used for compressed headers"); 10127 assert (Universe::heap() != NULL, "java heap should be initialized"); 10128 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10129 int oop_index = oop_recorder()->find_index(obj); 10130 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10131 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 10132 } 10133 10134 void MacroAssembler::reinit_heapbase() { 10135 if (UseCompressedOops) { 10136 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 10137 } 10138 } 10139 #endif // _LP64 10140 10141 10142 // C2 compiled method's prolog code. 10143 void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { 10144 10145 // WARNING: Initial instruction MUST be 5 bytes or longer so that 10146 // NativeJump::patch_verified_entry will be able to patch out the entry 10147 // code safely. The push to verify stack depth is ok at 5 bytes, 10148 // the frame allocation can be either 3 or 6 bytes. So if we don't do 10149 // stack bang then we must use the 6 byte frame allocation even if 10150 // we have no frame. :-( 10151 10152 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 10153 // Remove word for return addr 10154 framesize -= wordSize; 10155 10156 // Calls to C2R adapters often do not accept exceptional returns. 10157 // We require that their callers must bang for them. But be careful, because 10158 // some VM calls (such as call site linkage) can use several kilobytes of 10159 // stack. But the stack safety zone should account for that. 10160 // See bugs 4446381, 4468289, 4497237. 10161 if (stack_bang) { 10162 generate_stack_overflow_check(framesize); 10163 10164 // We always push rbp, so that on return to interpreter rbp, will be 10165 // restored correctly and we can correct the stack. 10166 push(rbp); 10167 // Remove word for ebp 10168 framesize -= wordSize; 10169 10170 // Create frame 10171 if (framesize) { 10172 subptr(rsp, framesize); 10173 } 10174 } else { 10175 // Create frame (force generation of a 4 byte immediate value) 10176 subptr_imm32(rsp, framesize); 10177 10178 // Save RBP register now. 10179 framesize -= wordSize; 10180 movptr(Address(rsp, framesize), rbp); 10181 } 10182 10183 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth 10184 framesize -= wordSize; 10185 movptr(Address(rsp, framesize), (int32_t)0xbadb100d); 10186 } 10187 10188 #ifndef _LP64 10189 // If method sets FPU control word do it now 10190 if (fp_mode_24b) { 10191 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10192 } 10193 if (UseSSE >= 2 && VerifyFPU) { 10194 verify_FPU(0, "FPU stack must be clean on entry"); 10195 } 10196 #endif 10197 10198 #ifdef ASSERT 10199 if (VerifyStackAtCalls) { 10200 Label L; 10201 push(rax); 10202 mov(rax, rsp); 10203 andptr(rax, StackAlignmentInBytes-1); 10204 cmpptr(rax, StackAlignmentInBytes-wordSize); 10205 pop(rax); 10206 jcc(Assembler::equal, L); 10207 stop("Stack is not properly aligned!"); 10208 bind(L); 10209 } 10210 #endif 10211 10212 } 10213 10214 10215 // IndexOf for constant substrings with size >= 8 chars 10216 // which don't need to be loaded through stack. 10217 void MacroAssembler::string_indexofC8(Register str1, Register str2, 10218 Register cnt1, Register cnt2, 10219 int int_cnt2, Register result, 10220 XMMRegister vec, Register tmp) { 10221 ShortBranchVerifier sbv(this); 10222 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 10223 10224 // This method uses pcmpestri inxtruction with bound registers 10225 // inputs: 10226 // xmm - substring 10227 // rax - substring length (elements count) 10228 // mem - scanned string 10229 // rdx - string length (elements count) 10230 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 10231 // outputs: 10232 // rcx - matched index in string 10233 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 10234 10235 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, 10236 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, 10237 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; 10238 10239 // Note, inline_string_indexOf() generates checks: 10240 // if (substr.count > string.count) return -1; 10241 // if (substr.count == 0) return 0; 10242 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); 10243 10244 // Load substring. 10245 movdqu(vec, Address(str2, 0)); 10246 movl(cnt2, int_cnt2); 10247 movptr(result, str1); // string addr 10248 10249 if (int_cnt2 > 8) { 10250 jmpb(SCAN_TO_SUBSTR); 10251 10252 // Reload substr for rescan, this code 10253 // is executed only for large substrings (> 8 chars) 10254 bind(RELOAD_SUBSTR); 10255 movdqu(vec, Address(str2, 0)); 10256 negptr(cnt2); // Jumped here with negative cnt2, convert to positive 10257 10258 bind(RELOAD_STR); 10259 // We came here after the beginning of the substring was 10260 // matched but the rest of it was not so we need to search 10261 // again. Start from the next element after the previous match. 10262 10263 // cnt2 is number of substring reminding elements and 10264 // cnt1 is number of string reminding elements when cmp failed. 10265 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 10266 subl(cnt1, cnt2); 10267 addl(cnt1, int_cnt2); 10268 movl(cnt2, int_cnt2); // Now restore cnt2 10269 10270 decrementl(cnt1); // Shift to next element 10271 cmpl(cnt1, cnt2); 10272 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10273 10274 addptr(result, 2); 10275 10276 } // (int_cnt2 > 8) 10277 10278 // Scan string for start of substr in 16-byte vectors 10279 bind(SCAN_TO_SUBSTR); 10280 pcmpestri(vec, Address(result, 0), 0x0d); 10281 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 10282 subl(cnt1, 8); 10283 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 10284 cmpl(cnt1, cnt2); 10285 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10286 addptr(result, 16); 10287 jmpb(SCAN_TO_SUBSTR); 10288 10289 // Found a potential substr 10290 bind(FOUND_CANDIDATE); 10291 // Matched whole vector if first element matched (tmp(rcx) == 0). 10292 if (int_cnt2 == 8) { 10293 jccb(Assembler::overflow, RET_FOUND); // OF == 1 10294 } else { // int_cnt2 > 8 10295 jccb(Assembler::overflow, FOUND_SUBSTR); 10296 } 10297 // After pcmpestri tmp(rcx) contains matched element index 10298 // Compute start addr of substr 10299 lea(result, Address(result, tmp, Address::times_2)); 10300 10301 // Make sure string is still long enough 10302 subl(cnt1, tmp); 10303 cmpl(cnt1, cnt2); 10304 if (int_cnt2 == 8) { 10305 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 10306 } else { // int_cnt2 > 8 10307 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); 10308 } 10309 // Left less then substring. 10310 10311 bind(RET_NOT_FOUND); 10312 movl(result, -1); 10313 jmpb(EXIT); 10314 10315 if (int_cnt2 > 8) { 10316 // This code is optimized for the case when whole substring 10317 // is matched if its head is matched. 10318 bind(MATCH_SUBSTR_HEAD); 10319 pcmpestri(vec, Address(result, 0), 0x0d); 10320 // Reload only string if does not match 10321 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 10322 10323 Label CONT_SCAN_SUBSTR; 10324 // Compare the rest of substring (> 8 chars). 10325 bind(FOUND_SUBSTR); 10326 // First 8 chars are already matched. 10327 negptr(cnt2); 10328 addptr(cnt2, 8); 10329 10330 bind(SCAN_SUBSTR); 10331 subl(cnt1, 8); 10332 cmpl(cnt2, -8); // Do not read beyond substring 10333 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); 10334 // Back-up strings to avoid reading beyond substring: 10335 // cnt1 = cnt1 - cnt2 + 8 10336 addl(cnt1, cnt2); // cnt2 is negative 10337 addl(cnt1, 8); 10338 movl(cnt2, 8); negptr(cnt2); 10339 bind(CONT_SCAN_SUBSTR); 10340 if (int_cnt2 < (int)G) { 10341 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); 10342 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); 10343 } else { 10344 // calculate index in register to avoid integer overflow (int_cnt2*2) 10345 movl(tmp, int_cnt2); 10346 addptr(tmp, cnt2); 10347 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); 10348 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 10349 } 10350 // Need to reload strings pointers if not matched whole vector 10351 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 10352 addptr(cnt2, 8); 10353 jcc(Assembler::negative, SCAN_SUBSTR); 10354 // Fall through if found full substring 10355 10356 } // (int_cnt2 > 8) 10357 10358 bind(RET_FOUND); 10359 // Found result if we matched full small substring. 10360 // Compute substr offset 10361 subptr(result, str1); 10362 shrl(result, 1); // index 10363 bind(EXIT); 10364 10365 } // string_indexofC8 10366 10367 // Small strings are loaded through stack if they cross page boundary. 10368 void MacroAssembler::string_indexof(Register str1, Register str2, 10369 Register cnt1, Register cnt2, 10370 int int_cnt2, Register result, 10371 XMMRegister vec, Register tmp) { 10372 ShortBranchVerifier sbv(this); 10373 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 10374 // 10375 // int_cnt2 is length of small (< 8 chars) constant substring 10376 // or (-1) for non constant substring in which case its length 10377 // is in cnt2 register. 10378 // 10379 // Note, inline_string_indexOf() generates checks: 10380 // if (substr.count > string.count) return -1; 10381 // if (substr.count == 0) return 0; 10382 // 10383 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); 10384 10385 // This method uses pcmpestri inxtruction with bound registers 10386 // inputs: 10387 // xmm - substring 10388 // rax - substring length (elements count) 10389 // mem - scanned string 10390 // rdx - string length (elements count) 10391 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 10392 // outputs: 10393 // rcx - matched index in string 10394 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 10395 10396 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, 10397 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, 10398 FOUND_CANDIDATE; 10399 10400 { //======================================================== 10401 // We don't know where these strings are located 10402 // and we can't read beyond them. Load them through stack. 10403 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; 10404 10405 movptr(tmp, rsp); // save old SP 10406 10407 if (int_cnt2 > 0) { // small (< 8 chars) constant substring 10408 if (int_cnt2 == 1) { // One char 10409 load_unsigned_short(result, Address(str2, 0)); 10410 movdl(vec, result); // move 32 bits 10411 } else if (int_cnt2 == 2) { // Two chars 10412 movdl(vec, Address(str2, 0)); // move 32 bits 10413 } else if (int_cnt2 == 4) { // Four chars 10414 movq(vec, Address(str2, 0)); // move 64 bits 10415 } else { // cnt2 = { 3, 5, 6, 7 } 10416 // Array header size is 12 bytes in 32-bit VM 10417 // + 6 bytes for 3 chars == 18 bytes, 10418 // enough space to load vec and shift. 10419 assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity"); 10420 movdqu(vec, Address(str2, (int_cnt2*2)-16)); 10421 psrldq(vec, 16-(int_cnt2*2)); 10422 } 10423 } else { // not constant substring 10424 cmpl(cnt2, 8); 10425 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough 10426 10427 // We can read beyond string if srt+16 does not cross page boundary 10428 // since heaps are aligned and mapped by pages. 10429 assert(os::vm_page_size() < (int)G, "default page should be small"); 10430 movl(result, str2); // We need only low 32 bits 10431 andl(result, (os::vm_page_size()-1)); 10432 cmpl(result, (os::vm_page_size()-16)); 10433 jccb(Assembler::belowEqual, CHECK_STR); 10434 10435 // Move small strings to stack to allow load 16 bytes into vec. 10436 subptr(rsp, 16); 10437 int stk_offset = wordSize-2; 10438 push(cnt2); 10439 10440 bind(COPY_SUBSTR); 10441 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); 10442 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 10443 decrement(cnt2); 10444 jccb(Assembler::notZero, COPY_SUBSTR); 10445 10446 pop(cnt2); 10447 movptr(str2, rsp); // New substring address 10448 } // non constant 10449 10450 bind(CHECK_STR); 10451 cmpl(cnt1, 8); 10452 jccb(Assembler::aboveEqual, BIG_STRINGS); 10453 10454 // Check cross page boundary. 10455 movl(result, str1); // We need only low 32 bits 10456 andl(result, (os::vm_page_size()-1)); 10457 cmpl(result, (os::vm_page_size()-16)); 10458 jccb(Assembler::belowEqual, BIG_STRINGS); 10459 10460 subptr(rsp, 16); 10461 int stk_offset = -2; 10462 if (int_cnt2 < 0) { // not constant 10463 push(cnt2); 10464 stk_offset += wordSize; 10465 } 10466 movl(cnt2, cnt1); 10467 10468 bind(COPY_STR); 10469 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); 10470 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 10471 decrement(cnt2); 10472 jccb(Assembler::notZero, COPY_STR); 10473 10474 if (int_cnt2 < 0) { // not constant 10475 pop(cnt2); 10476 } 10477 movptr(str1, rsp); // New string address 10478 10479 bind(BIG_STRINGS); 10480 // Load substring. 10481 if (int_cnt2 < 0) { // -1 10482 movdqu(vec, Address(str2, 0)); 10483 push(cnt2); // substr count 10484 push(str2); // substr addr 10485 push(str1); // string addr 10486 } else { 10487 // Small (< 8 chars) constant substrings are loaded already. 10488 movl(cnt2, int_cnt2); 10489 } 10490 push(tmp); // original SP 10491 10492 } // Finished loading 10493 10494 //======================================================== 10495 // Start search 10496 // 10497 10498 movptr(result, str1); // string addr 10499 10500 if (int_cnt2 < 0) { // Only for non constant substring 10501 jmpb(SCAN_TO_SUBSTR); 10502 10503 // SP saved at sp+0 10504 // String saved at sp+1*wordSize 10505 // Substr saved at sp+2*wordSize 10506 // Substr count saved at sp+3*wordSize 10507 10508 // Reload substr for rescan, this code 10509 // is executed only for large substrings (> 8 chars) 10510 bind(RELOAD_SUBSTR); 10511 movptr(str2, Address(rsp, 2*wordSize)); 10512 movl(cnt2, Address(rsp, 3*wordSize)); 10513 movdqu(vec, Address(str2, 0)); 10514 // We came here after the beginning of the substring was 10515 // matched but the rest of it was not so we need to search 10516 // again. Start from the next element after the previous match. 10517 subptr(str1, result); // Restore counter 10518 shrl(str1, 1); 10519 addl(cnt1, str1); 10520 decrementl(cnt1); // Shift to next element 10521 cmpl(cnt1, cnt2); 10522 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10523 10524 addptr(result, 2); 10525 } // non constant 10526 10527 // Scan string for start of substr in 16-byte vectors 10528 bind(SCAN_TO_SUBSTR); 10529 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 10530 pcmpestri(vec, Address(result, 0), 0x0d); 10531 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 10532 subl(cnt1, 8); 10533 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 10534 cmpl(cnt1, cnt2); 10535 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10536 addptr(result, 16); 10537 10538 bind(ADJUST_STR); 10539 cmpl(cnt1, 8); // Do not read beyond string 10540 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 10541 // Back-up string to avoid reading beyond string. 10542 lea(result, Address(result, cnt1, Address::times_2, -16)); 10543 movl(cnt1, 8); 10544 jmpb(SCAN_TO_SUBSTR); 10545 10546 // Found a potential substr 10547 bind(FOUND_CANDIDATE); 10548 // After pcmpestri tmp(rcx) contains matched element index 10549 10550 // Make sure string is still long enough 10551 subl(cnt1, tmp); 10552 cmpl(cnt1, cnt2); 10553 jccb(Assembler::greaterEqual, FOUND_SUBSTR); 10554 // Left less then substring. 10555 10556 bind(RET_NOT_FOUND); 10557 movl(result, -1); 10558 jmpb(CLEANUP); 10559 10560 bind(FOUND_SUBSTR); 10561 // Compute start addr of substr 10562 lea(result, Address(result, tmp, Address::times_2)); 10563 10564 if (int_cnt2 > 0) { // Constant substring 10565 // Repeat search for small substring (< 8 chars) 10566 // from new point without reloading substring. 10567 // Have to check that we don't read beyond string. 10568 cmpl(tmp, 8-int_cnt2); 10569 jccb(Assembler::greater, ADJUST_STR); 10570 // Fall through if matched whole substring. 10571 } else { // non constant 10572 assert(int_cnt2 == -1, "should be != 0"); 10573 10574 addl(tmp, cnt2); 10575 // Found result if we matched whole substring. 10576 cmpl(tmp, 8); 10577 jccb(Assembler::lessEqual, RET_FOUND); 10578 10579 // Repeat search for small substring (<= 8 chars) 10580 // from new point 'str1' without reloading substring. 10581 cmpl(cnt2, 8); 10582 // Have to check that we don't read beyond string. 10583 jccb(Assembler::lessEqual, ADJUST_STR); 10584 10585 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; 10586 // Compare the rest of substring (> 8 chars). 10587 movptr(str1, result); 10588 10589 cmpl(tmp, cnt2); 10590 // First 8 chars are already matched. 10591 jccb(Assembler::equal, CHECK_NEXT); 10592 10593 bind(SCAN_SUBSTR); 10594 pcmpestri(vec, Address(str1, 0), 0x0d); 10595 // Need to reload strings pointers if not matched whole vector 10596 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 10597 10598 bind(CHECK_NEXT); 10599 subl(cnt2, 8); 10600 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring 10601 addptr(str1, 16); 10602 addptr(str2, 16); 10603 subl(cnt1, 8); 10604 cmpl(cnt2, 8); // Do not read beyond substring 10605 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); 10606 // Back-up strings to avoid reading beyond substring. 10607 lea(str2, Address(str2, cnt2, Address::times_2, -16)); 10608 lea(str1, Address(str1, cnt2, Address::times_2, -16)); 10609 subl(cnt1, cnt2); 10610 movl(cnt2, 8); 10611 addl(cnt1, 8); 10612 bind(CONT_SCAN_SUBSTR); 10613 movdqu(vec, Address(str2, 0)); 10614 jmpb(SCAN_SUBSTR); 10615 10616 bind(RET_FOUND_LONG); 10617 movptr(str1, Address(rsp, wordSize)); 10618 } // non constant 10619 10620 bind(RET_FOUND); 10621 // Compute substr offset 10622 subptr(result, str1); 10623 shrl(result, 1); // index 10624 10625 bind(CLEANUP); 10626 pop(rsp); // restore SP 10627 10628 } // string_indexof 10629 10630 // Compare strings. 10631 void MacroAssembler::string_compare(Register str1, Register str2, 10632 Register cnt1, Register cnt2, Register result, 10633 XMMRegister vec1) { 10634 ShortBranchVerifier sbv(this); 10635 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 10636 10637 // Compute the minimum of the string lengths and the 10638 // difference of the string lengths (stack). 10639 // Do the conditional move stuff 10640 movl(result, cnt1); 10641 subl(cnt1, cnt2); 10642 push(cnt1); 10643 cmov32(Assembler::lessEqual, cnt2, result); 10644 10645 // Is the minimum length zero? 10646 testl(cnt2, cnt2); 10647 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 10648 10649 // Load first characters 10650 load_unsigned_short(result, Address(str1, 0)); 10651 load_unsigned_short(cnt1, Address(str2, 0)); 10652 10653 // Compare first characters 10654 subl(result, cnt1); 10655 jcc(Assembler::notZero, POP_LABEL); 10656 decrementl(cnt2); 10657 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 10658 10659 { 10660 // Check after comparing first character to see if strings are equivalent 10661 Label LSkip2; 10662 // Check if the strings start at same location 10663 cmpptr(str1, str2); 10664 jccb(Assembler::notEqual, LSkip2); 10665 10666 // Check if the length difference is zero (from stack) 10667 cmpl(Address(rsp, 0), 0x0); 10668 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 10669 10670 // Strings might not be equivalent 10671 bind(LSkip2); 10672 } 10673 10674 Address::ScaleFactor scale = Address::times_2; 10675 int stride = 8; 10676 10677 // Advance to next element 10678 addptr(str1, 16/stride); 10679 addptr(str2, 16/stride); 10680 10681 if (UseSSE42Intrinsics) { 10682 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 10683 int pcmpmask = 0x19; 10684 // Setup to compare 16-byte vectors 10685 movl(result, cnt2); 10686 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 10687 jccb(Assembler::zero, COMPARE_TAIL); 10688 10689 lea(str1, Address(str1, result, scale)); 10690 lea(str2, Address(str2, result, scale)); 10691 negptr(result); 10692 10693 // pcmpestri 10694 // inputs: 10695 // vec1- substring 10696 // rax - negative string length (elements count) 10697 // mem - scaned string 10698 // rdx - string length (elements count) 10699 // pcmpmask - cmp mode: 11000 (string compare with negated result) 10700 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 10701 // outputs: 10702 // rcx - first mismatched element index 10703 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 10704 10705 bind(COMPARE_WIDE_VECTORS); 10706 movdqu(vec1, Address(str1, result, scale)); 10707 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 10708 // After pcmpestri cnt1(rcx) contains mismatched element index 10709 10710 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 10711 addptr(result, stride); 10712 subptr(cnt2, stride); 10713 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); 10714 10715 // compare wide vectors tail 10716 testl(result, result); 10717 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 10718 10719 movl(cnt2, stride); 10720 movl(result, stride); 10721 negptr(result); 10722 movdqu(vec1, Address(str1, result, scale)); 10723 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 10724 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); 10725 10726 // Mismatched characters in the vectors 10727 bind(VECTOR_NOT_EQUAL); 10728 addptr(result, cnt1); 10729 movptr(cnt2, result); 10730 load_unsigned_short(result, Address(str1, cnt2, scale)); 10731 load_unsigned_short(cnt1, Address(str2, cnt2, scale)); 10732 subl(result, cnt1); 10733 jmpb(POP_LABEL); 10734 10735 bind(COMPARE_TAIL); // limit is zero 10736 movl(cnt2, result); 10737 // Fallthru to tail compare 10738 } 10739 10740 // Shift str2 and str1 to the end of the arrays, negate min 10741 lea(str1, Address(str1, cnt2, scale, 0)); 10742 lea(str2, Address(str2, cnt2, scale, 0)); 10743 negptr(cnt2); 10744 10745 // Compare the rest of the elements 10746 bind(WHILE_HEAD_LABEL); 10747 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); 10748 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); 10749 subl(result, cnt1); 10750 jccb(Assembler::notZero, POP_LABEL); 10751 increment(cnt2); 10752 jccb(Assembler::notZero, WHILE_HEAD_LABEL); 10753 10754 // Strings are equal up to min length. Return the length difference. 10755 bind(LENGTH_DIFF_LABEL); 10756 pop(result); 10757 jmpb(DONE_LABEL); 10758 10759 // Discard the stored length difference 10760 bind(POP_LABEL); 10761 pop(cnt1); 10762 10763 // That's it 10764 bind(DONE_LABEL); 10765 } 10766 10767 // Compare char[] arrays aligned to 4 bytes or substrings. 10768 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 10769 Register limit, Register result, Register chr, 10770 XMMRegister vec1, XMMRegister vec2) { 10771 ShortBranchVerifier sbv(this); 10772 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 10773 10774 int length_offset = arrayOopDesc::length_offset_in_bytes(); 10775 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 10776 10777 // Check the input args 10778 cmpptr(ary1, ary2); 10779 jcc(Assembler::equal, TRUE_LABEL); 10780 10781 if (is_array_equ) { 10782 // Need additional checks for arrays_equals. 10783 testptr(ary1, ary1); 10784 jcc(Assembler::zero, FALSE_LABEL); 10785 testptr(ary2, ary2); 10786 jcc(Assembler::zero, FALSE_LABEL); 10787 10788 // Check the lengths 10789 movl(limit, Address(ary1, length_offset)); 10790 cmpl(limit, Address(ary2, length_offset)); 10791 jcc(Assembler::notEqual, FALSE_LABEL); 10792 } 10793 10794 // count == 0 10795 testl(limit, limit); 10796 jcc(Assembler::zero, TRUE_LABEL); 10797 10798 if (is_array_equ) { 10799 // Load array address 10800 lea(ary1, Address(ary1, base_offset)); 10801 lea(ary2, Address(ary2, base_offset)); 10802 } 10803 10804 shll(limit, 1); // byte count != 0 10805 movl(result, limit); // copy 10806 10807 if (UseSSE42Intrinsics) { 10808 // With SSE4.2, use double quad vector compare 10809 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 10810 10811 // Compare 16-byte vectors 10812 andl(result, 0x0000000e); // tail count (in bytes) 10813 andl(limit, 0xfffffff0); // vector count (in bytes) 10814 jccb(Assembler::zero, COMPARE_TAIL); 10815 10816 lea(ary1, Address(ary1, limit, Address::times_1)); 10817 lea(ary2, Address(ary2, limit, Address::times_1)); 10818 negptr(limit); 10819 10820 bind(COMPARE_WIDE_VECTORS); 10821 movdqu(vec1, Address(ary1, limit, Address::times_1)); 10822 movdqu(vec2, Address(ary2, limit, Address::times_1)); 10823 pxor(vec1, vec2); 10824 10825 ptest(vec1, vec1); 10826 jccb(Assembler::notZero, FALSE_LABEL); 10827 addptr(limit, 16); 10828 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 10829 10830 testl(result, result); 10831 jccb(Assembler::zero, TRUE_LABEL); 10832 10833 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); 10834 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); 10835 pxor(vec1, vec2); 10836 10837 ptest(vec1, vec1); 10838 jccb(Assembler::notZero, FALSE_LABEL); 10839 jmpb(TRUE_LABEL); 10840 10841 bind(COMPARE_TAIL); // limit is zero 10842 movl(limit, result); 10843 // Fallthru to tail compare 10844 } 10845 10846 // Compare 4-byte vectors 10847 andl(limit, 0xfffffffc); // vector count (in bytes) 10848 jccb(Assembler::zero, COMPARE_CHAR); 10849 10850 lea(ary1, Address(ary1, limit, Address::times_1)); 10851 lea(ary2, Address(ary2, limit, Address::times_1)); 10852 negptr(limit); 10853 10854 bind(COMPARE_VECTORS); 10855 movl(chr, Address(ary1, limit, Address::times_1)); 10856 cmpl(chr, Address(ary2, limit, Address::times_1)); 10857 jccb(Assembler::notEqual, FALSE_LABEL); 10858 addptr(limit, 4); 10859 jcc(Assembler::notZero, COMPARE_VECTORS); 10860 10861 // Compare trailing char (final 2 bytes), if any 10862 bind(COMPARE_CHAR); 10863 testl(result, 0x2); // tail char 10864 jccb(Assembler::zero, TRUE_LABEL); 10865 load_unsigned_short(chr, Address(ary1, 0)); 10866 load_unsigned_short(limit, Address(ary2, 0)); 10867 cmpl(chr, limit); 10868 jccb(Assembler::notEqual, FALSE_LABEL); 10869 10870 bind(TRUE_LABEL); 10871 movl(result, 1); // return true 10872 jmpb(DONE); 10873 10874 bind(FALSE_LABEL); 10875 xorl(result, result); // return false 10876 10877 // That's it 10878 bind(DONE); 10879 } 10880 10881 #ifdef PRODUCT 10882 #define BLOCK_COMMENT(str) /* nothing */ 10883 #else 10884 #define BLOCK_COMMENT(str) block_comment(str) 10885 #endif 10886 10887 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 10888 void MacroAssembler::generate_fill(BasicType t, bool aligned, 10889 Register to, Register value, Register count, 10890 Register rtmp, XMMRegister xtmp) { 10891 ShortBranchVerifier sbv(this); 10892 assert_different_registers(to, value, count, rtmp); 10893 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 10894 Label L_fill_2_bytes, L_fill_4_bytes; 10895 10896 int shift = -1; 10897 switch (t) { 10898 case T_BYTE: 10899 shift = 2; 10900 break; 10901 case T_SHORT: 10902 shift = 1; 10903 break; 10904 case T_INT: 10905 shift = 0; 10906 break; 10907 default: ShouldNotReachHere(); 10908 } 10909 10910 if (t == T_BYTE) { 10911 andl(value, 0xff); 10912 movl(rtmp, value); 10913 shll(rtmp, 8); 10914 orl(value, rtmp); 10915 } 10916 if (t == T_SHORT) { 10917 andl(value, 0xffff); 10918 } 10919 if (t == T_BYTE || t == T_SHORT) { 10920 movl(rtmp, value); 10921 shll(rtmp, 16); 10922 orl(value, rtmp); 10923 } 10924 10925 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 10926 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 10927 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 10928 // align source address at 4 bytes address boundary 10929 if (t == T_BYTE) { 10930 // One byte misalignment happens only for byte arrays 10931 testptr(to, 1); 10932 jccb(Assembler::zero, L_skip_align1); 10933 movb(Address(to, 0), value); 10934 increment(to); 10935 decrement(count); 10936 BIND(L_skip_align1); 10937 } 10938 // Two bytes misalignment happens only for byte and short (char) arrays 10939 testptr(to, 2); 10940 jccb(Assembler::zero, L_skip_align2); 10941 movw(Address(to, 0), value); 10942 addptr(to, 2); 10943 subl(count, 1<<(shift-1)); 10944 BIND(L_skip_align2); 10945 } 10946 if (UseSSE < 2) { 10947 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 10948 // Fill 32-byte chunks 10949 subl(count, 8 << shift); 10950 jcc(Assembler::less, L_check_fill_8_bytes); 10951 align(16); 10952 10953 BIND(L_fill_32_bytes_loop); 10954 10955 for (int i = 0; i < 32; i += 4) { 10956 movl(Address(to, i), value); 10957 } 10958 10959 addptr(to, 32); 10960 subl(count, 8 << shift); 10961 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 10962 BIND(L_check_fill_8_bytes); 10963 addl(count, 8 << shift); 10964 jccb(Assembler::zero, L_exit); 10965 jmpb(L_fill_8_bytes); 10966 10967 // 10968 // length is too short, just fill qwords 10969 // 10970 BIND(L_fill_8_bytes_loop); 10971 movl(Address(to, 0), value); 10972 movl(Address(to, 4), value); 10973 addptr(to, 8); 10974 BIND(L_fill_8_bytes); 10975 subl(count, 1 << (shift + 1)); 10976 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 10977 // fall through to fill 4 bytes 10978 } else { 10979 Label L_fill_32_bytes; 10980 if (!UseUnalignedLoadStores) { 10981 // align to 8 bytes, we know we are 4 byte aligned to start 10982 testptr(to, 4); 10983 jccb(Assembler::zero, L_fill_32_bytes); 10984 movl(Address(to, 0), value); 10985 addptr(to, 4); 10986 subl(count, 1<<shift); 10987 } 10988 BIND(L_fill_32_bytes); 10989 { 10990 assert( UseSSE >= 2, "supported cpu only" ); 10991 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 10992 // Fill 32-byte chunks 10993 movdl(xtmp, value); 10994 pshufd(xtmp, xtmp, 0); 10995 10996 subl(count, 8 << shift); 10997 jcc(Assembler::less, L_check_fill_8_bytes); 10998 align(16); 10999 11000 BIND(L_fill_32_bytes_loop); 11001 11002 if (UseUnalignedLoadStores) { 11003 movdqu(Address(to, 0), xtmp); 11004 movdqu(Address(to, 16), xtmp); 11005 } else { 11006 movq(Address(to, 0), xtmp); 11007 movq(Address(to, 8), xtmp); 11008 movq(Address(to, 16), xtmp); 11009 movq(Address(to, 24), xtmp); 11010 } 11011 11012 addptr(to, 32); 11013 subl(count, 8 << shift); 11014 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 11015 BIND(L_check_fill_8_bytes); 11016 addl(count, 8 << shift); 11017 jccb(Assembler::zero, L_exit); 11018 jmpb(L_fill_8_bytes); 11019 11020 // 11021 // length is too short, just fill qwords 11022 // 11023 BIND(L_fill_8_bytes_loop); 11024 movq(Address(to, 0), xtmp); 11025 addptr(to, 8); 11026 BIND(L_fill_8_bytes); 11027 subl(count, 1 << (shift + 1)); 11028 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 11029 } 11030 } 11031 // fill trailing 4 bytes 11032 BIND(L_fill_4_bytes); 11033 testl(count, 1<<shift); 11034 jccb(Assembler::zero, L_fill_2_bytes); 11035 movl(Address(to, 0), value); 11036 if (t == T_BYTE || t == T_SHORT) { 11037 addptr(to, 4); 11038 BIND(L_fill_2_bytes); 11039 // fill trailing 2 bytes 11040 testl(count, 1<<(shift-1)); 11041 jccb(Assembler::zero, L_fill_byte); 11042 movw(Address(to, 0), value); 11043 if (t == T_BYTE) { 11044 addptr(to, 2); 11045 BIND(L_fill_byte); 11046 // fill trailing byte 11047 testl(count, 1); 11048 jccb(Assembler::zero, L_exit); 11049 movb(Address(to, 0), value); 11050 } else { 11051 BIND(L_fill_byte); 11052 } 11053 } else { 11054 BIND(L_fill_2_bytes); 11055 } 11056 BIND(L_exit); 11057 } 11058 #undef BIND 11059 #undef BLOCK_COMMENT 11060 11061 11062 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 11063 switch (cond) { 11064 // Note some conditions are synonyms for others 11065 case Assembler::zero: return Assembler::notZero; 11066 case Assembler::notZero: return Assembler::zero; 11067 case Assembler::less: return Assembler::greaterEqual; 11068 case Assembler::lessEqual: return Assembler::greater; 11069 case Assembler::greater: return Assembler::lessEqual; 11070 case Assembler::greaterEqual: return Assembler::less; 11071 case Assembler::below: return Assembler::aboveEqual; 11072 case Assembler::belowEqual: return Assembler::above; 11073 case Assembler::above: return Assembler::belowEqual; 11074 case Assembler::aboveEqual: return Assembler::below; 11075 case Assembler::overflow: return Assembler::noOverflow; 11076 case Assembler::noOverflow: return Assembler::overflow; 11077 case Assembler::negative: return Assembler::positive; 11078 case Assembler::positive: return Assembler::negative; 11079 case Assembler::parity: return Assembler::noParity; 11080 case Assembler::noParity: return Assembler::parity; 11081 } 11082 ShouldNotReachHere(); return Assembler::overflow; 11083 } 11084 11085 SkipIfEqual::SkipIfEqual( 11086 MacroAssembler* masm, const bool* flag_addr, bool value) { 11087 _masm = masm; 11088 _masm->cmp8(ExternalAddress((address)flag_addr), value); 11089 _masm->jcc(Assembler::equal, _label); 11090 } 11091 11092 SkipIfEqual::~SkipIfEqual() { 11093 _masm->bind(_label); 11094 }