1 /* 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "assembler_x86.inline.hpp" 27 #include "gc_interface/collectedHeap.inline.hpp" 28 #include "interpreter/interpreter.hpp" 29 #include "memory/cardTableModRefBS.hpp" 30 #include "memory/resourceArea.hpp" 31 #include "prims/methodHandles.hpp" 32 #include "runtime/biasedLocking.hpp" 33 #include "runtime/interfaceSupport.hpp" 34 #include "runtime/objectMonitor.hpp" 35 #include "runtime/os.hpp" 36 #include "runtime/sharedRuntime.hpp" 37 #include "runtime/stubRoutines.hpp" 38 #ifndef SERIALGC 39 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 40 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 41 #include "gc_implementation/g1/heapRegion.hpp" 42 #endif 43 44 #ifdef PRODUCT 45 #define BLOCK_COMMENT(str) /* nothing */ 46 #define STOP(error) stop(error) 47 #else 48 #define BLOCK_COMMENT(str) block_comment(str) 49 #define STOP(error) block_comment(error); stop(error) 50 #endif 51 52 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 53 // Implementation of AddressLiteral 54 55 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { 56 _is_lval = false; 57 _target = target; 58 switch (rtype) { 59 case relocInfo::oop_type: 60 case relocInfo::metadata_type: 61 // Oops are a special case. Normally they would be their own section 62 // but in cases like icBuffer they are literals in the code stream that 63 // we don't have a section for. We use none so that we get a literal address 64 // which is always patchable. 65 break; 66 case relocInfo::external_word_type: 67 _rspec = external_word_Relocation::spec(target); 68 break; 69 case relocInfo::internal_word_type: 70 _rspec = internal_word_Relocation::spec(target); 71 break; 72 case relocInfo::opt_virtual_call_type: 73 _rspec = opt_virtual_call_Relocation::spec(); 74 break; 75 case relocInfo::static_call_type: 76 _rspec = static_call_Relocation::spec(); 77 break; 78 case relocInfo::runtime_call_type: 79 _rspec = runtime_call_Relocation::spec(); 80 break; 81 case relocInfo::poll_type: 82 case relocInfo::poll_return_type: 83 _rspec = Relocation::spec_simple(rtype); 84 break; 85 case relocInfo::none: 86 break; 87 default: 88 ShouldNotReachHere(); 89 break; 90 } 91 } 92 93 // Implementation of Address 94 95 #ifdef _LP64 96 97 Address Address::make_array(ArrayAddress adr) { 98 // Not implementable on 64bit machines 99 // Should have been handled higher up the call chain. 100 ShouldNotReachHere(); 101 return Address(); 102 } 103 104 // exceedingly dangerous constructor 105 Address::Address(int disp, address loc, relocInfo::relocType rtype) { 106 _base = noreg; 107 _index = noreg; 108 _scale = no_scale; 109 _disp = disp; 110 switch (rtype) { 111 case relocInfo::external_word_type: 112 _rspec = external_word_Relocation::spec(loc); 113 break; 114 case relocInfo::internal_word_type: 115 _rspec = internal_word_Relocation::spec(loc); 116 break; 117 case relocInfo::runtime_call_type: 118 // HMM 119 _rspec = runtime_call_Relocation::spec(); 120 break; 121 case relocInfo::poll_type: 122 case relocInfo::poll_return_type: 123 _rspec = Relocation::spec_simple(rtype); 124 break; 125 case relocInfo::none: 126 break; 127 default: 128 ShouldNotReachHere(); 129 } 130 } 131 #else // LP64 132 133 Address Address::make_array(ArrayAddress adr) { 134 AddressLiteral base = adr.base(); 135 Address index = adr.index(); 136 assert(index._disp == 0, "must not have disp"); // maybe it can? 137 Address array(index._base, index._index, index._scale, (intptr_t) base.target()); 138 array._rspec = base._rspec; 139 return array; 140 } 141 142 // exceedingly dangerous constructor 143 Address::Address(address loc, RelocationHolder spec) { 144 _base = noreg; 145 _index = noreg; 146 _scale = no_scale; 147 _disp = (intptr_t) loc; 148 _rspec = spec; 149 } 150 151 #endif // _LP64 152 153 154 155 // Convert the raw encoding form into the form expected by the constructor for 156 // Address. An index of 4 (rsp) corresponds to having no index, so convert 157 // that to noreg for the Address constructor. 158 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) { 159 RelocationHolder rspec; 160 if (disp_reloc != relocInfo::none) { 161 rspec = Relocation::spec_simple(disp_reloc); 162 } 163 bool valid_index = index != rsp->encoding(); 164 if (valid_index) { 165 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); 166 madr._rspec = rspec; 167 return madr; 168 } else { 169 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); 170 madr._rspec = rspec; 171 return madr; 172 } 173 } 174 175 // Implementation of Assembler 176 177 int AbstractAssembler::code_fill_byte() { 178 return (u_char)'\xF4'; // hlt 179 } 180 181 // make this go away someday 182 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { 183 if (rtype == relocInfo::none) 184 emit_long(data); 185 else emit_data(data, Relocation::spec_simple(rtype), format); 186 } 187 188 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { 189 assert(imm_operand == 0, "default format must be immediate in this file"); 190 assert(inst_mark() != NULL, "must be inside InstructionMark"); 191 if (rspec.type() != relocInfo::none) { 192 #ifdef ASSERT 193 check_relocation(rspec, format); 194 #endif 195 // Do not use AbstractAssembler::relocate, which is not intended for 196 // embedded words. Instead, relocate to the enclosing instruction. 197 198 // hack. call32 is too wide for mask so use disp32 199 if (format == call32_operand) 200 code_section()->relocate(inst_mark(), rspec, disp32_operand); 201 else 202 code_section()->relocate(inst_mark(), rspec, format); 203 } 204 emit_long(data); 205 } 206 207 static int encode(Register r) { 208 int enc = r->encoding(); 209 if (enc >= 8) { 210 enc -= 8; 211 } 212 return enc; 213 } 214 215 static int encode(XMMRegister r) { 216 int enc = r->encoding(); 217 if (enc >= 8) { 218 enc -= 8; 219 } 220 return enc; 221 } 222 223 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { 224 assert(dst->has_byte_register(), "must have byte register"); 225 assert(isByte(op1) && isByte(op2), "wrong opcode"); 226 assert(isByte(imm8), "not a byte"); 227 assert((op1 & 0x01) == 0, "should be 8bit operation"); 228 emit_byte(op1); 229 emit_byte(op2 | encode(dst)); 230 emit_byte(imm8); 231 } 232 233 234 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 235 assert(isByte(op1) && isByte(op2), "wrong opcode"); 236 assert((op1 & 0x01) == 1, "should be 32bit operation"); 237 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 238 if (is8bit(imm32)) { 239 emit_byte(op1 | 0x02); // set sign bit 240 emit_byte(op2 | encode(dst)); 241 emit_byte(imm32 & 0xFF); 242 } else { 243 emit_byte(op1); 244 emit_byte(op2 | encode(dst)); 245 emit_long(imm32); 246 } 247 } 248 249 // Force generation of a 4 byte immediate value even if it fits into 8bit 250 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) { 251 assert(isByte(op1) && isByte(op2), "wrong opcode"); 252 assert((op1 & 0x01) == 1, "should be 32bit operation"); 253 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 254 emit_byte(op1); 255 emit_byte(op2 | encode(dst)); 256 emit_long(imm32); 257 } 258 259 // immediate-to-memory forms 260 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 261 assert((op1 & 0x01) == 1, "should be 32bit operation"); 262 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 263 if (is8bit(imm32)) { 264 emit_byte(op1 | 0x02); // set sign bit 265 emit_operand(rm, adr, 1); 266 emit_byte(imm32 & 0xFF); 267 } else { 268 emit_byte(op1); 269 emit_operand(rm, adr, 4); 270 emit_long(imm32); 271 } 272 } 273 274 275 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { 276 assert(isByte(op1) && isByte(op2), "wrong opcode"); 277 emit_byte(op1); 278 emit_byte(op2 | encode(dst) << 3 | encode(src)); 279 } 280 281 282 void Assembler::emit_operand(Register reg, Register base, Register index, 283 Address::ScaleFactor scale, int disp, 284 RelocationHolder const& rspec, 285 int rip_relative_correction) { 286 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type(); 287 288 // Encode the registers as needed in the fields they are used in 289 290 int regenc = encode(reg) << 3; 291 int indexenc = index->is_valid() ? encode(index) << 3 : 0; 292 int baseenc = base->is_valid() ? encode(base) : 0; 293 294 if (base->is_valid()) { 295 if (index->is_valid()) { 296 assert(scale != Address::no_scale, "inconsistent address"); 297 // [base + index*scale + disp] 298 if (disp == 0 && rtype == relocInfo::none && 299 base != rbp LP64_ONLY(&& base != r13)) { 300 // [base + index*scale] 301 // [00 reg 100][ss index base] 302 assert(index != rsp, "illegal addressing mode"); 303 emit_byte(0x04 | regenc); 304 emit_byte(scale << 6 | indexenc | baseenc); 305 } else if (is8bit(disp) && rtype == relocInfo::none) { 306 // [base + index*scale + imm8] 307 // [01 reg 100][ss index base] imm8 308 assert(index != rsp, "illegal addressing mode"); 309 emit_byte(0x44 | regenc); 310 emit_byte(scale << 6 | indexenc | baseenc); 311 emit_byte(disp & 0xFF); 312 } else { 313 // [base + index*scale + disp32] 314 // [10 reg 100][ss index base] disp32 315 assert(index != rsp, "illegal addressing mode"); 316 emit_byte(0x84 | regenc); 317 emit_byte(scale << 6 | indexenc | baseenc); 318 emit_data(disp, rspec, disp32_operand); 319 } 320 } else if (base == rsp LP64_ONLY(|| base == r12)) { 321 // [rsp + disp] 322 if (disp == 0 && rtype == relocInfo::none) { 323 // [rsp] 324 // [00 reg 100][00 100 100] 325 emit_byte(0x04 | regenc); 326 emit_byte(0x24); 327 } else if (is8bit(disp) && rtype == relocInfo::none) { 328 // [rsp + imm8] 329 // [01 reg 100][00 100 100] disp8 330 emit_byte(0x44 | regenc); 331 emit_byte(0x24); 332 emit_byte(disp & 0xFF); 333 } else { 334 // [rsp + imm32] 335 // [10 reg 100][00 100 100] disp32 336 emit_byte(0x84 | regenc); 337 emit_byte(0x24); 338 emit_data(disp, rspec, disp32_operand); 339 } 340 } else { 341 // [base + disp] 342 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode"); 343 if (disp == 0 && rtype == relocInfo::none && 344 base != rbp LP64_ONLY(&& base != r13)) { 345 // [base] 346 // [00 reg base] 347 emit_byte(0x00 | regenc | baseenc); 348 } else if (is8bit(disp) && rtype == relocInfo::none) { 349 // [base + disp8] 350 // [01 reg base] disp8 351 emit_byte(0x40 | regenc | baseenc); 352 emit_byte(disp & 0xFF); 353 } else { 354 // [base + disp32] 355 // [10 reg base] disp32 356 emit_byte(0x80 | regenc | baseenc); 357 emit_data(disp, rspec, disp32_operand); 358 } 359 } 360 } else { 361 if (index->is_valid()) { 362 assert(scale != Address::no_scale, "inconsistent address"); 363 // [index*scale + disp] 364 // [00 reg 100][ss index 101] disp32 365 assert(index != rsp, "illegal addressing mode"); 366 emit_byte(0x04 | regenc); 367 emit_byte(scale << 6 | indexenc | 0x05); 368 emit_data(disp, rspec, disp32_operand); 369 } else if (rtype != relocInfo::none ) { 370 // [disp] (64bit) RIP-RELATIVE (32bit) abs 371 // [00 000 101] disp32 372 373 emit_byte(0x05 | regenc); 374 // Note that the RIP-rel. correction applies to the generated 375 // disp field, but _not_ to the target address in the rspec. 376 377 // disp was created by converting the target address minus the pc 378 // at the start of the instruction. That needs more correction here. 379 // intptr_t disp = target - next_ip; 380 assert(inst_mark() != NULL, "must be inside InstructionMark"); 381 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction; 382 int64_t adjusted = disp; 383 // Do rip-rel adjustment for 64bit 384 LP64_ONLY(adjusted -= (next_ip - inst_mark())); 385 assert(is_simm32(adjusted), 386 "must be 32bit offset (RIP relative address)"); 387 emit_data((int32_t) adjusted, rspec, disp32_operand); 388 389 } else { 390 // 32bit never did this, did everything as the rip-rel/disp code above 391 // [disp] ABSOLUTE 392 // [00 reg 100][00 100 101] disp32 393 emit_byte(0x04 | regenc); 394 emit_byte(0x25); 395 emit_data(disp, rspec, disp32_operand); 396 } 397 } 398 } 399 400 void Assembler::emit_operand(XMMRegister reg, Register base, Register index, 401 Address::ScaleFactor scale, int disp, 402 RelocationHolder const& rspec) { 403 emit_operand((Register)reg, base, index, scale, disp, rspec); 404 } 405 406 // Secret local extension to Assembler::WhichOperand: 407 #define end_pc_operand (_WhichOperand_limit) 408 409 address Assembler::locate_operand(address inst, WhichOperand which) { 410 // Decode the given instruction, and return the address of 411 // an embedded 32-bit operand word. 412 413 // If "which" is disp32_operand, selects the displacement portion 414 // of an effective address specifier. 415 // If "which" is imm64_operand, selects the trailing immediate constant. 416 // If "which" is call32_operand, selects the displacement of a call or jump. 417 // Caller is responsible for ensuring that there is such an operand, 418 // and that it is 32/64 bits wide. 419 420 // If "which" is end_pc_operand, find the end of the instruction. 421 422 address ip = inst; 423 bool is_64bit = false; 424 425 debug_only(bool has_disp32 = false); 426 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn 427 428 again_after_prefix: 429 switch (0xFF & *ip++) { 430 431 // These convenience macros generate groups of "case" labels for the switch. 432 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3 433 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \ 434 case (x)+4: case (x)+5: case (x)+6: case (x)+7 435 #define REP16(x) REP8((x)+0): \ 436 case REP8((x)+8) 437 438 case CS_segment: 439 case SS_segment: 440 case DS_segment: 441 case ES_segment: 442 case FS_segment: 443 case GS_segment: 444 // Seems dubious 445 LP64_ONLY(assert(false, "shouldn't have that prefix")); 446 assert(ip == inst+1, "only one prefix allowed"); 447 goto again_after_prefix; 448 449 case 0x67: 450 case REX: 451 case REX_B: 452 case REX_X: 453 case REX_XB: 454 case REX_R: 455 case REX_RB: 456 case REX_RX: 457 case REX_RXB: 458 NOT_LP64(assert(false, "64bit prefixes")); 459 goto again_after_prefix; 460 461 case REX_W: 462 case REX_WB: 463 case REX_WX: 464 case REX_WXB: 465 case REX_WR: 466 case REX_WRB: 467 case REX_WRX: 468 case REX_WRXB: 469 NOT_LP64(assert(false, "64bit prefixes")); 470 is_64bit = true; 471 goto again_after_prefix; 472 473 case 0xFF: // pushq a; decl a; incl a; call a; jmp a 474 case 0x88: // movb a, r 475 case 0x89: // movl a, r 476 case 0x8A: // movb r, a 477 case 0x8B: // movl r, a 478 case 0x8F: // popl a 479 debug_only(has_disp32 = true); 480 break; 481 482 case 0x68: // pushq #32 483 if (which == end_pc_operand) { 484 return ip + 4; 485 } 486 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate"); 487 return ip; // not produced by emit_operand 488 489 case 0x66: // movw ... (size prefix) 490 again_after_size_prefix2: 491 switch (0xFF & *ip++) { 492 case REX: 493 case REX_B: 494 case REX_X: 495 case REX_XB: 496 case REX_R: 497 case REX_RB: 498 case REX_RX: 499 case REX_RXB: 500 case REX_W: 501 case REX_WB: 502 case REX_WX: 503 case REX_WXB: 504 case REX_WR: 505 case REX_WRB: 506 case REX_WRX: 507 case REX_WRXB: 508 NOT_LP64(assert(false, "64bit prefix found")); 509 goto again_after_size_prefix2; 510 case 0x8B: // movw r, a 511 case 0x89: // movw a, r 512 debug_only(has_disp32 = true); 513 break; 514 case 0xC7: // movw a, #16 515 debug_only(has_disp32 = true); 516 tail_size = 2; // the imm16 517 break; 518 case 0x0F: // several SSE/SSE2 variants 519 ip--; // reparse the 0x0F 520 goto again_after_prefix; 521 default: 522 ShouldNotReachHere(); 523 } 524 break; 525 526 case REP8(0xB8): // movl/q r, #32/#64(oop?) 527 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4); 528 // these asserts are somewhat nonsensical 529 #ifndef _LP64 530 assert(which == imm_operand || which == disp32_operand, 531 err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip)); 532 #else 533 assert((which == call32_operand || which == imm_operand) && is_64bit || 534 which == narrow_oop_operand && !is_64bit, 535 err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip)); 536 #endif // _LP64 537 return ip; 538 539 case 0x69: // imul r, a, #32 540 case 0xC7: // movl a, #32(oop?) 541 tail_size = 4; 542 debug_only(has_disp32 = true); // has both kinds of operands! 543 break; 544 545 case 0x0F: // movx..., etc. 546 switch (0xFF & *ip++) { 547 case 0x3A: // pcmpestri 548 tail_size = 1; 549 case 0x38: // ptest, pmovzxbw 550 ip++; // skip opcode 551 debug_only(has_disp32 = true); // has both kinds of operands! 552 break; 553 554 case 0x70: // pshufd r, r/a, #8 555 debug_only(has_disp32 = true); // has both kinds of operands! 556 case 0x73: // psrldq r, #8 557 tail_size = 1; 558 break; 559 560 case 0x12: // movlps 561 case 0x28: // movaps 562 case 0x2E: // ucomiss 563 case 0x2F: // comiss 564 case 0x54: // andps 565 case 0x55: // andnps 566 case 0x56: // orps 567 case 0x57: // xorps 568 case 0x6E: // movd 569 case 0x7E: // movd 570 case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush 571 debug_only(has_disp32 = true); 572 break; 573 574 case 0xAD: // shrd r, a, %cl 575 case 0xAF: // imul r, a 576 case 0xBE: // movsbl r, a (movsxb) 577 case 0xBF: // movswl r, a (movsxw) 578 case 0xB6: // movzbl r, a (movzxb) 579 case 0xB7: // movzwl r, a (movzxw) 580 case REP16(0x40): // cmovl cc, r, a 581 case 0xB0: // cmpxchgb 582 case 0xB1: // cmpxchg 583 case 0xC1: // xaddl 584 case 0xC7: // cmpxchg8 585 case REP16(0x90): // setcc a 586 debug_only(has_disp32 = true); 587 // fall out of the switch to decode the address 588 break; 589 590 case 0xC4: // pinsrw r, a, #8 591 debug_only(has_disp32 = true); 592 case 0xC5: // pextrw r, r, #8 593 tail_size = 1; // the imm8 594 break; 595 596 case 0xAC: // shrd r, a, #8 597 debug_only(has_disp32 = true); 598 tail_size = 1; // the imm8 599 break; 600 601 case REP16(0x80): // jcc rdisp32 602 if (which == end_pc_operand) return ip + 4; 603 assert(which == call32_operand, "jcc has no disp32 or imm"); 604 return ip; 605 default: 606 ShouldNotReachHere(); 607 } 608 break; 609 610 case 0x81: // addl a, #32; addl r, #32 611 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 612 // on 32bit in the case of cmpl, the imm might be an oop 613 tail_size = 4; 614 debug_only(has_disp32 = true); // has both kinds of operands! 615 break; 616 617 case 0x83: // addl a, #8; addl r, #8 618 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 619 debug_only(has_disp32 = true); // has both kinds of operands! 620 tail_size = 1; 621 break; 622 623 case 0x9B: 624 switch (0xFF & *ip++) { 625 case 0xD9: // fnstcw a 626 debug_only(has_disp32 = true); 627 break; 628 default: 629 ShouldNotReachHere(); 630 } 631 break; 632 633 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a 634 case REP4(0x10): // adc... 635 case REP4(0x20): // and... 636 case REP4(0x30): // xor... 637 case REP4(0x08): // or... 638 case REP4(0x18): // sbb... 639 case REP4(0x28): // sub... 640 case 0xF7: // mull a 641 case 0x8D: // lea r, a 642 case 0x87: // xchg r, a 643 case REP4(0x38): // cmp... 644 case 0x85: // test r, a 645 debug_only(has_disp32 = true); // has both kinds of operands! 646 break; 647 648 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8 649 case 0xC6: // movb a, #8 650 case 0x80: // cmpb a, #8 651 case 0x6B: // imul r, a, #8 652 debug_only(has_disp32 = true); // has both kinds of operands! 653 tail_size = 1; // the imm8 654 break; 655 656 case 0xC4: // VEX_3bytes 657 case 0xC5: // VEX_2bytes 658 assert((UseAVX > 0), "shouldn't have VEX prefix"); 659 assert(ip == inst+1, "no prefixes allowed"); 660 // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions 661 // but they have prefix 0x0F and processed when 0x0F processed above. 662 // 663 // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES 664 // instructions (these instructions are not supported in 64-bit mode). 665 // To distinguish them bits [7:6] are set in the VEX second byte since 666 // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set 667 // those VEX bits REX and vvvv bits are inverted. 668 // 669 // Fortunately C2 doesn't generate these instructions so we don't need 670 // to check for them in product version. 671 672 // Check second byte 673 NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions")); 674 675 // First byte 676 if ((0xFF & *inst) == VEX_3bytes) { 677 ip++; // third byte 678 is_64bit = ((VEX_W & *ip) == VEX_W); 679 } 680 ip++; // opcode 681 // To find the end of instruction (which == end_pc_operand). 682 switch (0xFF & *ip) { 683 case 0x61: // pcmpestri r, r/a, #8 684 case 0x70: // pshufd r, r/a, #8 685 case 0x73: // psrldq r, #8 686 tail_size = 1; // the imm8 687 break; 688 default: 689 break; 690 } 691 ip++; // skip opcode 692 debug_only(has_disp32 = true); // has both kinds of operands! 693 break; 694 695 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 696 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 697 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a 698 case 0xDD: // fld_d a; fst_d a; fstp_d a 699 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a 700 case 0xDF: // fild_d a; fistp_d a 701 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a 702 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a 703 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a 704 debug_only(has_disp32 = true); 705 break; 706 707 case 0xE8: // call rdisp32 708 case 0xE9: // jmp rdisp32 709 if (which == end_pc_operand) return ip + 4; 710 assert(which == call32_operand, "call has no disp32 or imm"); 711 return ip; 712 713 case 0xF0: // Lock 714 assert(os::is_MP(), "only on MP"); 715 goto again_after_prefix; 716 717 case 0xF3: // For SSE 718 case 0xF2: // For SSE2 719 switch (0xFF & *ip++) { 720 case REX: 721 case REX_B: 722 case REX_X: 723 case REX_XB: 724 case REX_R: 725 case REX_RB: 726 case REX_RX: 727 case REX_RXB: 728 case REX_W: 729 case REX_WB: 730 case REX_WX: 731 case REX_WXB: 732 case REX_WR: 733 case REX_WRB: 734 case REX_WRX: 735 case REX_WRXB: 736 NOT_LP64(assert(false, "found 64bit prefix")); 737 ip++; 738 default: 739 ip++; 740 } 741 debug_only(has_disp32 = true); // has both kinds of operands! 742 break; 743 744 default: 745 ShouldNotReachHere(); 746 747 #undef REP8 748 #undef REP16 749 } 750 751 assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); 752 #ifdef _LP64 753 assert(which != imm_operand, "instruction is not a movq reg, imm64"); 754 #else 755 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field"); 756 assert(which != imm_operand || has_disp32, "instruction has no imm32 field"); 757 #endif // LP64 758 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field"); 759 760 // parse the output of emit_operand 761 int op2 = 0xFF & *ip++; 762 int base = op2 & 0x07; 763 int op3 = -1; 764 const int b100 = 4; 765 const int b101 = 5; 766 if (base == b100 && (op2 >> 6) != 3) { 767 op3 = 0xFF & *ip++; 768 base = op3 & 0x07; // refetch the base 769 } 770 // now ip points at the disp (if any) 771 772 switch (op2 >> 6) { 773 case 0: 774 // [00 reg 100][ss index base] 775 // [00 reg 100][00 100 esp] 776 // [00 reg base] 777 // [00 reg 100][ss index 101][disp32] 778 // [00 reg 101] [disp32] 779 780 if (base == b101) { 781 if (which == disp32_operand) 782 return ip; // caller wants the disp32 783 ip += 4; // skip the disp32 784 } 785 break; 786 787 case 1: 788 // [01 reg 100][ss index base][disp8] 789 // [01 reg 100][00 100 esp][disp8] 790 // [01 reg base] [disp8] 791 ip += 1; // skip the disp8 792 break; 793 794 case 2: 795 // [10 reg 100][ss index base][disp32] 796 // [10 reg 100][00 100 esp][disp32] 797 // [10 reg base] [disp32] 798 if (which == disp32_operand) 799 return ip; // caller wants the disp32 800 ip += 4; // skip the disp32 801 break; 802 803 case 3: 804 // [11 reg base] (not a memory addressing mode) 805 break; 806 } 807 808 if (which == end_pc_operand) { 809 return ip + tail_size; 810 } 811 812 #ifdef _LP64 813 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32"); 814 #else 815 assert(which == imm_operand, "instruction has only an imm field"); 816 #endif // LP64 817 return ip; 818 } 819 820 address Assembler::locate_next_instruction(address inst) { 821 // Secretly share code with locate_operand: 822 return locate_operand(inst, end_pc_operand); 823 } 824 825 826 #ifdef ASSERT 827 void Assembler::check_relocation(RelocationHolder const& rspec, int format) { 828 address inst = inst_mark(); 829 assert(inst != NULL && inst < pc(), "must point to beginning of instruction"); 830 address opnd; 831 832 Relocation* r = rspec.reloc(); 833 if (r->type() == relocInfo::none) { 834 return; 835 } else if (r->is_call() || format == call32_operand) { 836 // assert(format == imm32_operand, "cannot specify a nonzero format"); 837 opnd = locate_operand(inst, call32_operand); 838 } else if (r->is_data()) { 839 assert(format == imm_operand || format == disp32_operand 840 LP64_ONLY(|| format == narrow_oop_operand), "format ok"); 841 opnd = locate_operand(inst, (WhichOperand)format); 842 } else { 843 assert(format == imm_operand, "cannot specify a format"); 844 return; 845 } 846 assert(opnd == pc(), "must put operand where relocs can find it"); 847 } 848 #endif // ASSERT 849 850 void Assembler::emit_operand32(Register reg, Address adr) { 851 assert(reg->encoding() < 8, "no extended registers"); 852 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 853 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 854 adr._rspec); 855 } 856 857 void Assembler::emit_operand(Register reg, Address adr, 858 int rip_relative_correction) { 859 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 860 adr._rspec, 861 rip_relative_correction); 862 } 863 864 void Assembler::emit_operand(XMMRegister reg, Address adr) { 865 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 866 adr._rspec); 867 } 868 869 // MMX operations 870 void Assembler::emit_operand(MMXRegister reg, Address adr) { 871 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 872 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 873 } 874 875 // work around gcc (3.2.1-7a) bug 876 void Assembler::emit_operand(Address adr, MMXRegister reg) { 877 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 878 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 879 } 880 881 882 void Assembler::emit_farith(int b1, int b2, int i) { 883 assert(isByte(b1) && isByte(b2), "wrong opcode"); 884 assert(0 <= i && i < 8, "illegal stack offset"); 885 emit_byte(b1); 886 emit_byte(b2 + i); 887 } 888 889 890 // Now the Assembler instructions (identical for 32/64 bits) 891 892 void Assembler::adcl(Address dst, int32_t imm32) { 893 InstructionMark im(this); 894 prefix(dst); 895 emit_arith_operand(0x81, rdx, dst, imm32); 896 } 897 898 void Assembler::adcl(Address dst, Register src) { 899 InstructionMark im(this); 900 prefix(dst, src); 901 emit_byte(0x11); 902 emit_operand(src, dst); 903 } 904 905 void Assembler::adcl(Register dst, int32_t imm32) { 906 prefix(dst); 907 emit_arith(0x81, 0xD0, dst, imm32); 908 } 909 910 void Assembler::adcl(Register dst, Address src) { 911 InstructionMark im(this); 912 prefix(src, dst); 913 emit_byte(0x13); 914 emit_operand(dst, src); 915 } 916 917 void Assembler::adcl(Register dst, Register src) { 918 (void) prefix_and_encode(dst->encoding(), src->encoding()); 919 emit_arith(0x13, 0xC0, dst, src); 920 } 921 922 void Assembler::addl(Address dst, int32_t imm32) { 923 InstructionMark im(this); 924 prefix(dst); 925 emit_arith_operand(0x81, rax, dst, imm32); 926 } 927 928 void Assembler::addl(Address dst, Register src) { 929 InstructionMark im(this); 930 prefix(dst, src); 931 emit_byte(0x01); 932 emit_operand(src, dst); 933 } 934 935 void Assembler::addl(Register dst, int32_t imm32) { 936 prefix(dst); 937 emit_arith(0x81, 0xC0, dst, imm32); 938 } 939 940 void Assembler::addl(Register dst, Address src) { 941 InstructionMark im(this); 942 prefix(src, dst); 943 emit_byte(0x03); 944 emit_operand(dst, src); 945 } 946 947 void Assembler::addl(Register dst, Register src) { 948 (void) prefix_and_encode(dst->encoding(), src->encoding()); 949 emit_arith(0x03, 0xC0, dst, src); 950 } 951 952 void Assembler::addr_nop_4() { 953 assert(UseAddressNop, "no CPU support"); 954 // 4 bytes: NOP DWORD PTR [EAX+0] 955 emit_byte(0x0F); 956 emit_byte(0x1F); 957 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 958 emit_byte(0); // 8-bits offset (1 byte) 959 } 960 961 void Assembler::addr_nop_5() { 962 assert(UseAddressNop, "no CPU support"); 963 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 964 emit_byte(0x0F); 965 emit_byte(0x1F); 966 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 967 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 968 emit_byte(0); // 8-bits offset (1 byte) 969 } 970 971 void Assembler::addr_nop_7() { 972 assert(UseAddressNop, "no CPU support"); 973 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 974 emit_byte(0x0F); 975 emit_byte(0x1F); 976 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 977 emit_long(0); // 32-bits offset (4 bytes) 978 } 979 980 void Assembler::addr_nop_8() { 981 assert(UseAddressNop, "no CPU support"); 982 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 983 emit_byte(0x0F); 984 emit_byte(0x1F); 985 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 986 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 987 emit_long(0); // 32-bits offset (4 bytes) 988 } 989 990 void Assembler::addsd(XMMRegister dst, XMMRegister src) { 991 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 992 emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); 993 } 994 995 void Assembler::addsd(XMMRegister dst, Address src) { 996 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 997 emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); 998 } 999 1000 void Assembler::addss(XMMRegister dst, XMMRegister src) { 1001 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1002 emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); 1003 } 1004 1005 void Assembler::addss(XMMRegister dst, Address src) { 1006 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1007 emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); 1008 } 1009 1010 void Assembler::andl(Address dst, int32_t imm32) { 1011 InstructionMark im(this); 1012 prefix(dst); 1013 emit_byte(0x81); 1014 emit_operand(rsp, dst, 4); 1015 emit_long(imm32); 1016 } 1017 1018 void Assembler::andl(Register dst, int32_t imm32) { 1019 prefix(dst); 1020 emit_arith(0x81, 0xE0, dst, imm32); 1021 } 1022 1023 void Assembler::andl(Register dst, Address src) { 1024 InstructionMark im(this); 1025 prefix(src, dst); 1026 emit_byte(0x23); 1027 emit_operand(dst, src); 1028 } 1029 1030 void Assembler::andl(Register dst, Register src) { 1031 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1032 emit_arith(0x23, 0xC0, dst, src); 1033 } 1034 1035 void Assembler::bsfl(Register dst, Register src) { 1036 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1037 emit_byte(0x0F); 1038 emit_byte(0xBC); 1039 emit_byte(0xC0 | encode); 1040 } 1041 1042 void Assembler::bsrl(Register dst, Register src) { 1043 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 1044 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1045 emit_byte(0x0F); 1046 emit_byte(0xBD); 1047 emit_byte(0xC0 | encode); 1048 } 1049 1050 void Assembler::bswapl(Register reg) { // bswap 1051 int encode = prefix_and_encode(reg->encoding()); 1052 emit_byte(0x0F); 1053 emit_byte(0xC8 | encode); 1054 } 1055 1056 void Assembler::call(Label& L, relocInfo::relocType rtype) { 1057 // suspect disp32 is always good 1058 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); 1059 1060 if (L.is_bound()) { 1061 const int long_size = 5; 1062 int offs = (int)( target(L) - pc() ); 1063 assert(offs <= 0, "assembler error"); 1064 InstructionMark im(this); 1065 // 1110 1000 #32-bit disp 1066 emit_byte(0xE8); 1067 emit_data(offs - long_size, rtype, operand); 1068 } else { 1069 InstructionMark im(this); 1070 // 1110 1000 #32-bit disp 1071 L.add_patch_at(code(), locator()); 1072 1073 emit_byte(0xE8); 1074 emit_data(int(0), rtype, operand); 1075 } 1076 } 1077 1078 void Assembler::call(Register dst) { 1079 int encode = prefix_and_encode(dst->encoding()); 1080 emit_byte(0xFF); 1081 emit_byte(0xD0 | encode); 1082 } 1083 1084 1085 void Assembler::call(Address adr) { 1086 InstructionMark im(this); 1087 prefix(adr); 1088 emit_byte(0xFF); 1089 emit_operand(rdx, adr); 1090 } 1091 1092 void Assembler::call_literal(address entry, RelocationHolder const& rspec) { 1093 assert(entry != NULL, "call most probably wrong"); 1094 InstructionMark im(this); 1095 emit_byte(0xE8); 1096 intptr_t disp = entry - (_code_pos + sizeof(int32_t)); 1097 assert(is_simm32(disp), "must be 32bit offset (call2)"); 1098 // Technically, should use call32_operand, but this format is 1099 // implied by the fact that we're emitting a call instruction. 1100 1101 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand); 1102 emit_data((int) disp, rspec, operand); 1103 } 1104 1105 void Assembler::cdql() { 1106 emit_byte(0x99); 1107 } 1108 1109 void Assembler::cmovl(Condition cc, Register dst, Register src) { 1110 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1111 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1112 emit_byte(0x0F); 1113 emit_byte(0x40 | cc); 1114 emit_byte(0xC0 | encode); 1115 } 1116 1117 1118 void Assembler::cmovl(Condition cc, Register dst, Address src) { 1119 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1120 prefix(src, dst); 1121 emit_byte(0x0F); 1122 emit_byte(0x40 | cc); 1123 emit_operand(dst, src); 1124 } 1125 1126 void Assembler::cmpb(Address dst, int imm8) { 1127 InstructionMark im(this); 1128 prefix(dst); 1129 emit_byte(0x80); 1130 emit_operand(rdi, dst, 1); 1131 emit_byte(imm8); 1132 } 1133 1134 void Assembler::cmpl(Address dst, int32_t imm32) { 1135 InstructionMark im(this); 1136 prefix(dst); 1137 emit_byte(0x81); 1138 emit_operand(rdi, dst, 4); 1139 emit_long(imm32); 1140 } 1141 1142 void Assembler::cmpl(Register dst, int32_t imm32) { 1143 prefix(dst); 1144 emit_arith(0x81, 0xF8, dst, imm32); 1145 } 1146 1147 void Assembler::cmpl(Register dst, Register src) { 1148 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1149 emit_arith(0x3B, 0xC0, dst, src); 1150 } 1151 1152 1153 void Assembler::cmpl(Register dst, Address src) { 1154 InstructionMark im(this); 1155 prefix(src, dst); 1156 emit_byte(0x3B); 1157 emit_operand(dst, src); 1158 } 1159 1160 void Assembler::cmpw(Address dst, int imm16) { 1161 InstructionMark im(this); 1162 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers"); 1163 emit_byte(0x66); 1164 emit_byte(0x81); 1165 emit_operand(rdi, dst, 2); 1166 emit_word(imm16); 1167 } 1168 1169 // The 32-bit cmpxchg compares the value at adr with the contents of rax, 1170 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1171 // The ZF is set if the compared values were equal, and cleared otherwise. 1172 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg 1173 if (Atomics & 2) { 1174 // caveat: no instructionmark, so this isn't relocatable. 1175 // Emit a synthetic, non-atomic, CAS equivalent. 1176 // Beware. The synthetic form sets all ICCs, not just ZF. 1177 // cmpxchg r,[m] is equivalent to rax, = CAS (m, rax, r) 1178 cmpl(rax, adr); 1179 movl(rax, adr); 1180 if (reg != rax) { 1181 Label L ; 1182 jcc(Assembler::notEqual, L); 1183 movl(adr, reg); 1184 bind(L); 1185 } 1186 } else { 1187 InstructionMark im(this); 1188 prefix(adr, reg); 1189 emit_byte(0x0F); 1190 emit_byte(0xB1); 1191 emit_operand(reg, adr); 1192 } 1193 } 1194 1195 void Assembler::comisd(XMMRegister dst, Address src) { 1196 // NOTE: dbx seems to decode this as comiss even though the 1197 // 0x66 is there. Strangly ucomisd comes out correct 1198 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1199 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); 1200 } 1201 1202 void Assembler::comisd(XMMRegister dst, XMMRegister src) { 1203 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1204 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); 1205 } 1206 1207 void Assembler::comiss(XMMRegister dst, Address src) { 1208 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1209 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE); 1210 } 1211 1212 void Assembler::comiss(XMMRegister dst, XMMRegister src) { 1213 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1214 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE); 1215 } 1216 1217 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 1218 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1219 emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3); 1220 } 1221 1222 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 1223 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1224 emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE); 1225 } 1226 1227 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 1228 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1229 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); 1230 } 1231 1232 void Assembler::cvtsd2ss(XMMRegister dst, Address src) { 1233 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1234 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); 1235 } 1236 1237 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { 1238 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1239 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1240 emit_byte(0x2A); 1241 emit_byte(0xC0 | encode); 1242 } 1243 1244 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { 1245 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1246 emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2); 1247 } 1248 1249 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { 1250 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1251 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1252 emit_byte(0x2A); 1253 emit_byte(0xC0 | encode); 1254 } 1255 1256 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { 1257 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1258 emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3); 1259 } 1260 1261 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { 1262 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1263 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); 1264 } 1265 1266 void Assembler::cvtss2sd(XMMRegister dst, Address src) { 1267 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1268 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); 1269 } 1270 1271 1272 void Assembler::cvttsd2sil(Register dst, XMMRegister src) { 1273 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1274 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); 1275 emit_byte(0x2C); 1276 emit_byte(0xC0 | encode); 1277 } 1278 1279 void Assembler::cvttss2sil(Register dst, XMMRegister src) { 1280 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1281 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1282 emit_byte(0x2C); 1283 emit_byte(0xC0 | encode); 1284 } 1285 1286 void Assembler::decl(Address dst) { 1287 // Don't use it directly. Use MacroAssembler::decrement() instead. 1288 InstructionMark im(this); 1289 prefix(dst); 1290 emit_byte(0xFF); 1291 emit_operand(rcx, dst); 1292 } 1293 1294 void Assembler::divsd(XMMRegister dst, Address src) { 1295 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1296 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); 1297 } 1298 1299 void Assembler::divsd(XMMRegister dst, XMMRegister src) { 1300 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1301 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); 1302 } 1303 1304 void Assembler::divss(XMMRegister dst, Address src) { 1305 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1306 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); 1307 } 1308 1309 void Assembler::divss(XMMRegister dst, XMMRegister src) { 1310 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1311 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); 1312 } 1313 1314 void Assembler::emms() { 1315 NOT_LP64(assert(VM_Version::supports_mmx(), "")); 1316 emit_byte(0x0F); 1317 emit_byte(0x77); 1318 } 1319 1320 void Assembler::hlt() { 1321 emit_byte(0xF4); 1322 } 1323 1324 void Assembler::idivl(Register src) { 1325 int encode = prefix_and_encode(src->encoding()); 1326 emit_byte(0xF7); 1327 emit_byte(0xF8 | encode); 1328 } 1329 1330 void Assembler::divl(Register src) { // Unsigned 1331 int encode = prefix_and_encode(src->encoding()); 1332 emit_byte(0xF7); 1333 emit_byte(0xF0 | encode); 1334 } 1335 1336 void Assembler::imull(Register dst, Register src) { 1337 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1338 emit_byte(0x0F); 1339 emit_byte(0xAF); 1340 emit_byte(0xC0 | encode); 1341 } 1342 1343 1344 void Assembler::imull(Register dst, Register src, int value) { 1345 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1346 if (is8bit(value)) { 1347 emit_byte(0x6B); 1348 emit_byte(0xC0 | encode); 1349 emit_byte(value & 0xFF); 1350 } else { 1351 emit_byte(0x69); 1352 emit_byte(0xC0 | encode); 1353 emit_long(value); 1354 } 1355 } 1356 1357 void Assembler::incl(Address dst) { 1358 // Don't use it directly. Use MacroAssembler::increment() instead. 1359 InstructionMark im(this); 1360 prefix(dst); 1361 emit_byte(0xFF); 1362 emit_operand(rax, dst); 1363 } 1364 1365 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) { 1366 InstructionMark im(this); 1367 assert((0 <= cc) && (cc < 16), "illegal cc"); 1368 if (L.is_bound()) { 1369 address dst = target(L); 1370 assert(dst != NULL, "jcc most probably wrong"); 1371 1372 const int short_size = 2; 1373 const int long_size = 6; 1374 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos; 1375 if (maybe_short && is8bit(offs - short_size)) { 1376 // 0111 tttn #8-bit disp 1377 emit_byte(0x70 | cc); 1378 emit_byte((offs - short_size) & 0xFF); 1379 } else { 1380 // 0000 1111 1000 tttn #32-bit disp 1381 assert(is_simm32(offs - long_size), 1382 "must be 32bit offset (call4)"); 1383 emit_byte(0x0F); 1384 emit_byte(0x80 | cc); 1385 emit_long(offs - long_size); 1386 } 1387 } else { 1388 // Note: could eliminate cond. jumps to this jump if condition 1389 // is the same however, seems to be rather unlikely case. 1390 // Note: use jccb() if label to be bound is very close to get 1391 // an 8-bit displacement 1392 L.add_patch_at(code(), locator()); 1393 emit_byte(0x0F); 1394 emit_byte(0x80 | cc); 1395 emit_long(0); 1396 } 1397 } 1398 1399 void Assembler::jccb(Condition cc, Label& L) { 1400 if (L.is_bound()) { 1401 const int short_size = 2; 1402 address entry = target(L); 1403 #ifdef ASSERT 1404 intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); 1405 intptr_t delta = short_branch_delta(); 1406 if (delta != 0) { 1407 dist += (dist < 0 ? (-delta) :delta); 1408 } 1409 assert(is8bit(dist), "Dispacement too large for a short jmp"); 1410 #endif 1411 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; 1412 // 0111 tttn #8-bit disp 1413 emit_byte(0x70 | cc); 1414 emit_byte((offs - short_size) & 0xFF); 1415 } else { 1416 InstructionMark im(this); 1417 L.add_patch_at(code(), locator()); 1418 emit_byte(0x70 | cc); 1419 emit_byte(0); 1420 } 1421 } 1422 1423 void Assembler::jmp(Address adr) { 1424 InstructionMark im(this); 1425 prefix(adr); 1426 emit_byte(0xFF); 1427 emit_operand(rsp, adr); 1428 } 1429 1430 void Assembler::jmp(Label& L, bool maybe_short) { 1431 if (L.is_bound()) { 1432 address entry = target(L); 1433 assert(entry != NULL, "jmp most probably wrong"); 1434 InstructionMark im(this); 1435 const int short_size = 2; 1436 const int long_size = 5; 1437 intptr_t offs = entry - _code_pos; 1438 if (maybe_short && is8bit(offs - short_size)) { 1439 emit_byte(0xEB); 1440 emit_byte((offs - short_size) & 0xFF); 1441 } else { 1442 emit_byte(0xE9); 1443 emit_long(offs - long_size); 1444 } 1445 } else { 1446 // By default, forward jumps are always 32-bit displacements, since 1447 // we can't yet know where the label will be bound. If you're sure that 1448 // the forward jump will not run beyond 256 bytes, use jmpb to 1449 // force an 8-bit displacement. 1450 InstructionMark im(this); 1451 L.add_patch_at(code(), locator()); 1452 emit_byte(0xE9); 1453 emit_long(0); 1454 } 1455 } 1456 1457 void Assembler::jmp(Register entry) { 1458 int encode = prefix_and_encode(entry->encoding()); 1459 emit_byte(0xFF); 1460 emit_byte(0xE0 | encode); 1461 } 1462 1463 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { 1464 InstructionMark im(this); 1465 emit_byte(0xE9); 1466 assert(dest != NULL, "must have a target"); 1467 intptr_t disp = dest - (_code_pos + sizeof(int32_t)); 1468 assert(is_simm32(disp), "must be 32bit offset (jmp)"); 1469 emit_data(disp, rspec.reloc(), call32_operand); 1470 } 1471 1472 void Assembler::jmpb(Label& L) { 1473 if (L.is_bound()) { 1474 const int short_size = 2; 1475 address entry = target(L); 1476 assert(entry != NULL, "jmp most probably wrong"); 1477 #ifdef ASSERT 1478 intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); 1479 intptr_t delta = short_branch_delta(); 1480 if (delta != 0) { 1481 dist += (dist < 0 ? (-delta) :delta); 1482 } 1483 assert(is8bit(dist), "Dispacement too large for a short jmp"); 1484 #endif 1485 intptr_t offs = entry - _code_pos; 1486 emit_byte(0xEB); 1487 emit_byte((offs - short_size) & 0xFF); 1488 } else { 1489 InstructionMark im(this); 1490 L.add_patch_at(code(), locator()); 1491 emit_byte(0xEB); 1492 emit_byte(0); 1493 } 1494 } 1495 1496 void Assembler::ldmxcsr( Address src) { 1497 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1498 InstructionMark im(this); 1499 prefix(src); 1500 emit_byte(0x0F); 1501 emit_byte(0xAE); 1502 emit_operand(as_Register(2), src); 1503 } 1504 1505 void Assembler::leal(Register dst, Address src) { 1506 InstructionMark im(this); 1507 #ifdef _LP64 1508 emit_byte(0x67); // addr32 1509 prefix(src, dst); 1510 #endif // LP64 1511 emit_byte(0x8D); 1512 emit_operand(dst, src); 1513 } 1514 1515 void Assembler::lock() { 1516 if (Atomics & 1) { 1517 // Emit either nothing, a NOP, or a NOP: prefix 1518 emit_byte(0x90) ; 1519 } else { 1520 emit_byte(0xF0); 1521 } 1522 } 1523 1524 void Assembler::lzcntl(Register dst, Register src) { 1525 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 1526 emit_byte(0xF3); 1527 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1528 emit_byte(0x0F); 1529 emit_byte(0xBD); 1530 emit_byte(0xC0 | encode); 1531 } 1532 1533 // Emit mfence instruction 1534 void Assembler::mfence() { 1535 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) 1536 emit_byte( 0x0F ); 1537 emit_byte( 0xAE ); 1538 emit_byte( 0xF0 ); 1539 } 1540 1541 void Assembler::mov(Register dst, Register src) { 1542 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 1543 } 1544 1545 void Assembler::movapd(XMMRegister dst, XMMRegister src) { 1546 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1547 emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66); 1548 } 1549 1550 void Assembler::movaps(XMMRegister dst, XMMRegister src) { 1551 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1552 emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE); 1553 } 1554 1555 void Assembler::movlhps(XMMRegister dst, XMMRegister src) { 1556 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1557 int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE); 1558 emit_byte(0x16); 1559 emit_byte(0xC0 | encode); 1560 } 1561 1562 void Assembler::movb(Register dst, Address src) { 1563 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 1564 InstructionMark im(this); 1565 prefix(src, dst, true); 1566 emit_byte(0x8A); 1567 emit_operand(dst, src); 1568 } 1569 1570 1571 void Assembler::movb(Address dst, int imm8) { 1572 InstructionMark im(this); 1573 prefix(dst); 1574 emit_byte(0xC6); 1575 emit_operand(rax, dst, 1); 1576 emit_byte(imm8); 1577 } 1578 1579 1580 void Assembler::movb(Address dst, Register src) { 1581 assert(src->has_byte_register(), "must have byte register"); 1582 InstructionMark im(this); 1583 prefix(dst, src, true); 1584 emit_byte(0x88); 1585 emit_operand(src, dst); 1586 } 1587 1588 void Assembler::movdl(XMMRegister dst, Register src) { 1589 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1590 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1591 emit_byte(0x6E); 1592 emit_byte(0xC0 | encode); 1593 } 1594 1595 void Assembler::movdl(Register dst, XMMRegister src) { 1596 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1597 // swap src/dst to get correct prefix 1598 int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66); 1599 emit_byte(0x7E); 1600 emit_byte(0xC0 | encode); 1601 } 1602 1603 void Assembler::movdl(XMMRegister dst, Address src) { 1604 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1605 InstructionMark im(this); 1606 simd_prefix(dst, src, VEX_SIMD_66); 1607 emit_byte(0x6E); 1608 emit_operand(dst, src); 1609 } 1610 1611 void Assembler::movdl(Address dst, XMMRegister src) { 1612 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1613 InstructionMark im(this); 1614 simd_prefix(dst, src, VEX_SIMD_66); 1615 emit_byte(0x7E); 1616 emit_operand(src, dst); 1617 } 1618 1619 void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 1620 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1621 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66); 1622 } 1623 1624 void Assembler::movdqu(XMMRegister dst, Address src) { 1625 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1626 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); 1627 } 1628 1629 void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 1630 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1631 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); 1632 } 1633 1634 void Assembler::movdqu(Address dst, XMMRegister src) { 1635 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1636 InstructionMark im(this); 1637 simd_prefix(dst, src, VEX_SIMD_F3); 1638 emit_byte(0x7F); 1639 emit_operand(src, dst); 1640 } 1641 1642 // Move Unaligned 256bit Vector 1643 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) { 1644 assert(UseAVX, ""); 1645 bool vector256 = true; 1646 int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256); 1647 emit_byte(0x6F); 1648 emit_byte(0xC0 | encode); 1649 } 1650 1651 void Assembler::vmovdqu(XMMRegister dst, Address src) { 1652 assert(UseAVX, ""); 1653 InstructionMark im(this); 1654 bool vector256 = true; 1655 vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256); 1656 emit_byte(0x6F); 1657 emit_operand(dst, src); 1658 } 1659 1660 void Assembler::vmovdqu(Address dst, XMMRegister src) { 1661 assert(UseAVX, ""); 1662 InstructionMark im(this); 1663 bool vector256 = true; 1664 // swap src<->dst for encoding 1665 assert(src != xnoreg, "sanity"); 1666 vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256); 1667 emit_byte(0x7F); 1668 emit_operand(src, dst); 1669 } 1670 1671 // Uses zero extension on 64bit 1672 1673 void Assembler::movl(Register dst, int32_t imm32) { 1674 int encode = prefix_and_encode(dst->encoding()); 1675 emit_byte(0xB8 | encode); 1676 emit_long(imm32); 1677 } 1678 1679 void Assembler::movl(Register dst, Register src) { 1680 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1681 emit_byte(0x8B); 1682 emit_byte(0xC0 | encode); 1683 } 1684 1685 void Assembler::movl(Register dst, Address src) { 1686 InstructionMark im(this); 1687 prefix(src, dst); 1688 emit_byte(0x8B); 1689 emit_operand(dst, src); 1690 } 1691 1692 void Assembler::movl(Address dst, int32_t imm32) { 1693 InstructionMark im(this); 1694 prefix(dst); 1695 emit_byte(0xC7); 1696 emit_operand(rax, dst, 4); 1697 emit_long(imm32); 1698 } 1699 1700 void Assembler::movl(Address dst, Register src) { 1701 InstructionMark im(this); 1702 prefix(dst, src); 1703 emit_byte(0x89); 1704 emit_operand(src, dst); 1705 } 1706 1707 // New cpus require to use movsd and movss to avoid partial register stall 1708 // when loading from memory. But for old Opteron use movlpd instead of movsd. 1709 // The selection is done in MacroAssembler::movdbl() and movflt(). 1710 void Assembler::movlpd(XMMRegister dst, Address src) { 1711 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1712 emit_simd_arith(0x12, dst, src, VEX_SIMD_66); 1713 } 1714 1715 void Assembler::movq( MMXRegister dst, Address src ) { 1716 assert( VM_Version::supports_mmx(), "" ); 1717 emit_byte(0x0F); 1718 emit_byte(0x6F); 1719 emit_operand(dst, src); 1720 } 1721 1722 void Assembler::movq( Address dst, MMXRegister src ) { 1723 assert( VM_Version::supports_mmx(), "" ); 1724 emit_byte(0x0F); 1725 emit_byte(0x7F); 1726 // workaround gcc (3.2.1-7a) bug 1727 // In that version of gcc with only an emit_operand(MMX, Address) 1728 // gcc will tail jump and try and reverse the parameters completely 1729 // obliterating dst in the process. By having a version available 1730 // that doesn't need to swap the args at the tail jump the bug is 1731 // avoided. 1732 emit_operand(dst, src); 1733 } 1734 1735 void Assembler::movq(XMMRegister dst, Address src) { 1736 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1737 InstructionMark im(this); 1738 simd_prefix(dst, src, VEX_SIMD_F3); 1739 emit_byte(0x7E); 1740 emit_operand(dst, src); 1741 } 1742 1743 void Assembler::movq(Address dst, XMMRegister src) { 1744 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1745 InstructionMark im(this); 1746 simd_prefix(dst, src, VEX_SIMD_66); 1747 emit_byte(0xD6); 1748 emit_operand(src, dst); 1749 } 1750 1751 void Assembler::movsbl(Register dst, Address src) { // movsxb 1752 InstructionMark im(this); 1753 prefix(src, dst); 1754 emit_byte(0x0F); 1755 emit_byte(0xBE); 1756 emit_operand(dst, src); 1757 } 1758 1759 void Assembler::movsbl(Register dst, Register src) { // movsxb 1760 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1761 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1762 emit_byte(0x0F); 1763 emit_byte(0xBE); 1764 emit_byte(0xC0 | encode); 1765 } 1766 1767 void Assembler::movsd(XMMRegister dst, XMMRegister src) { 1768 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1769 emit_simd_arith(0x10, dst, src, VEX_SIMD_F2); 1770 } 1771 1772 void Assembler::movsd(XMMRegister dst, Address src) { 1773 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1774 emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2); 1775 } 1776 1777 void Assembler::movsd(Address dst, XMMRegister src) { 1778 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1779 InstructionMark im(this); 1780 simd_prefix(dst, src, VEX_SIMD_F2); 1781 emit_byte(0x11); 1782 emit_operand(src, dst); 1783 } 1784 1785 void Assembler::movss(XMMRegister dst, XMMRegister src) { 1786 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1787 emit_simd_arith(0x10, dst, src, VEX_SIMD_F3); 1788 } 1789 1790 void Assembler::movss(XMMRegister dst, Address src) { 1791 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1792 emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3); 1793 } 1794 1795 void Assembler::movss(Address dst, XMMRegister src) { 1796 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1797 InstructionMark im(this); 1798 simd_prefix(dst, src, VEX_SIMD_F3); 1799 emit_byte(0x11); 1800 emit_operand(src, dst); 1801 } 1802 1803 void Assembler::movswl(Register dst, Address src) { // movsxw 1804 InstructionMark im(this); 1805 prefix(src, dst); 1806 emit_byte(0x0F); 1807 emit_byte(0xBF); 1808 emit_operand(dst, src); 1809 } 1810 1811 void Assembler::movswl(Register dst, Register src) { // movsxw 1812 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1813 emit_byte(0x0F); 1814 emit_byte(0xBF); 1815 emit_byte(0xC0 | encode); 1816 } 1817 1818 void Assembler::movw(Address dst, int imm16) { 1819 InstructionMark im(this); 1820 1821 emit_byte(0x66); // switch to 16-bit mode 1822 prefix(dst); 1823 emit_byte(0xC7); 1824 emit_operand(rax, dst, 2); 1825 emit_word(imm16); 1826 } 1827 1828 void Assembler::movw(Register dst, Address src) { 1829 InstructionMark im(this); 1830 emit_byte(0x66); 1831 prefix(src, dst); 1832 emit_byte(0x8B); 1833 emit_operand(dst, src); 1834 } 1835 1836 void Assembler::movw(Address dst, Register src) { 1837 InstructionMark im(this); 1838 emit_byte(0x66); 1839 prefix(dst, src); 1840 emit_byte(0x89); 1841 emit_operand(src, dst); 1842 } 1843 1844 void Assembler::movzbl(Register dst, Address src) { // movzxb 1845 InstructionMark im(this); 1846 prefix(src, dst); 1847 emit_byte(0x0F); 1848 emit_byte(0xB6); 1849 emit_operand(dst, src); 1850 } 1851 1852 void Assembler::movzbl(Register dst, Register src) { // movzxb 1853 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1854 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1855 emit_byte(0x0F); 1856 emit_byte(0xB6); 1857 emit_byte(0xC0 | encode); 1858 } 1859 1860 void Assembler::movzwl(Register dst, Address src) { // movzxw 1861 InstructionMark im(this); 1862 prefix(src, dst); 1863 emit_byte(0x0F); 1864 emit_byte(0xB7); 1865 emit_operand(dst, src); 1866 } 1867 1868 void Assembler::movzwl(Register dst, Register src) { // movzxw 1869 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1870 emit_byte(0x0F); 1871 emit_byte(0xB7); 1872 emit_byte(0xC0 | encode); 1873 } 1874 1875 void Assembler::mull(Address src) { 1876 InstructionMark im(this); 1877 prefix(src); 1878 emit_byte(0xF7); 1879 emit_operand(rsp, src); 1880 } 1881 1882 void Assembler::mull(Register src) { 1883 int encode = prefix_and_encode(src->encoding()); 1884 emit_byte(0xF7); 1885 emit_byte(0xE0 | encode); 1886 } 1887 1888 void Assembler::mulsd(XMMRegister dst, Address src) { 1889 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1890 emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); 1891 } 1892 1893 void Assembler::mulsd(XMMRegister dst, XMMRegister src) { 1894 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1895 emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); 1896 } 1897 1898 void Assembler::mulss(XMMRegister dst, Address src) { 1899 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1900 emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); 1901 } 1902 1903 void Assembler::mulss(XMMRegister dst, XMMRegister src) { 1904 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1905 emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); 1906 } 1907 1908 void Assembler::negl(Register dst) { 1909 int encode = prefix_and_encode(dst->encoding()); 1910 emit_byte(0xF7); 1911 emit_byte(0xD8 | encode); 1912 } 1913 1914 void Assembler::nop(int i) { 1915 #ifdef ASSERT 1916 assert(i > 0, " "); 1917 // The fancy nops aren't currently recognized by debuggers making it a 1918 // pain to disassemble code while debugging. If asserts are on clearly 1919 // speed is not an issue so simply use the single byte traditional nop 1920 // to do alignment. 1921 1922 for (; i > 0 ; i--) emit_byte(0x90); 1923 return; 1924 1925 #endif // ASSERT 1926 1927 if (UseAddressNop && VM_Version::is_intel()) { 1928 // 1929 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 1930 // 1: 0x90 1931 // 2: 0x66 0x90 1932 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 1933 // 4: 0x0F 0x1F 0x40 0x00 1934 // 5: 0x0F 0x1F 0x44 0x00 0x00 1935 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 1936 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1937 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1938 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1939 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1940 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1941 1942 // The rest coding is Intel specific - don't use consecutive address nops 1943 1944 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1945 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1946 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1947 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1948 1949 while(i >= 15) { 1950 // For Intel don't generate consecutive addess nops (mix with regular nops) 1951 i -= 15; 1952 emit_byte(0x66); // size prefix 1953 emit_byte(0x66); // size prefix 1954 emit_byte(0x66); // size prefix 1955 addr_nop_8(); 1956 emit_byte(0x66); // size prefix 1957 emit_byte(0x66); // size prefix 1958 emit_byte(0x66); // size prefix 1959 emit_byte(0x90); // nop 1960 } 1961 switch (i) { 1962 case 14: 1963 emit_byte(0x66); // size prefix 1964 case 13: 1965 emit_byte(0x66); // size prefix 1966 case 12: 1967 addr_nop_8(); 1968 emit_byte(0x66); // size prefix 1969 emit_byte(0x66); // size prefix 1970 emit_byte(0x66); // size prefix 1971 emit_byte(0x90); // nop 1972 break; 1973 case 11: 1974 emit_byte(0x66); // size prefix 1975 case 10: 1976 emit_byte(0x66); // size prefix 1977 case 9: 1978 emit_byte(0x66); // size prefix 1979 case 8: 1980 addr_nop_8(); 1981 break; 1982 case 7: 1983 addr_nop_7(); 1984 break; 1985 case 6: 1986 emit_byte(0x66); // size prefix 1987 case 5: 1988 addr_nop_5(); 1989 break; 1990 case 4: 1991 addr_nop_4(); 1992 break; 1993 case 3: 1994 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 1995 emit_byte(0x66); // size prefix 1996 case 2: 1997 emit_byte(0x66); // size prefix 1998 case 1: 1999 emit_byte(0x90); // nop 2000 break; 2001 default: 2002 assert(i == 0, " "); 2003 } 2004 return; 2005 } 2006 if (UseAddressNop && VM_Version::is_amd()) { 2007 // 2008 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD. 2009 // 1: 0x90 2010 // 2: 0x66 0x90 2011 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2012 // 4: 0x0F 0x1F 0x40 0x00 2013 // 5: 0x0F 0x1F 0x44 0x00 0x00 2014 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2015 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2016 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2017 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2018 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2019 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2020 2021 // The rest coding is AMD specific - use consecutive address nops 2022 2023 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2024 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2025 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2026 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2027 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2028 // Size prefixes (0x66) are added for larger sizes 2029 2030 while(i >= 22) { 2031 i -= 11; 2032 emit_byte(0x66); // size prefix 2033 emit_byte(0x66); // size prefix 2034 emit_byte(0x66); // size prefix 2035 addr_nop_8(); 2036 } 2037 // Generate first nop for size between 21-12 2038 switch (i) { 2039 case 21: 2040 i -= 1; 2041 emit_byte(0x66); // size prefix 2042 case 20: 2043 case 19: 2044 i -= 1; 2045 emit_byte(0x66); // size prefix 2046 case 18: 2047 case 17: 2048 i -= 1; 2049 emit_byte(0x66); // size prefix 2050 case 16: 2051 case 15: 2052 i -= 8; 2053 addr_nop_8(); 2054 break; 2055 case 14: 2056 case 13: 2057 i -= 7; 2058 addr_nop_7(); 2059 break; 2060 case 12: 2061 i -= 6; 2062 emit_byte(0x66); // size prefix 2063 addr_nop_5(); 2064 break; 2065 default: 2066 assert(i < 12, " "); 2067 } 2068 2069 // Generate second nop for size between 11-1 2070 switch (i) { 2071 case 11: 2072 emit_byte(0x66); // size prefix 2073 case 10: 2074 emit_byte(0x66); // size prefix 2075 case 9: 2076 emit_byte(0x66); // size prefix 2077 case 8: 2078 addr_nop_8(); 2079 break; 2080 case 7: 2081 addr_nop_7(); 2082 break; 2083 case 6: 2084 emit_byte(0x66); // size prefix 2085 case 5: 2086 addr_nop_5(); 2087 break; 2088 case 4: 2089 addr_nop_4(); 2090 break; 2091 case 3: 2092 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2093 emit_byte(0x66); // size prefix 2094 case 2: 2095 emit_byte(0x66); // size prefix 2096 case 1: 2097 emit_byte(0x90); // nop 2098 break; 2099 default: 2100 assert(i == 0, " "); 2101 } 2102 return; 2103 } 2104 2105 // Using nops with size prefixes "0x66 0x90". 2106 // From AMD Optimization Guide: 2107 // 1: 0x90 2108 // 2: 0x66 0x90 2109 // 3: 0x66 0x66 0x90 2110 // 4: 0x66 0x66 0x66 0x90 2111 // 5: 0x66 0x66 0x90 0x66 0x90 2112 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2113 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2114 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2115 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2116 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2117 // 2118 while(i > 12) { 2119 i -= 4; 2120 emit_byte(0x66); // size prefix 2121 emit_byte(0x66); 2122 emit_byte(0x66); 2123 emit_byte(0x90); // nop 2124 } 2125 // 1 - 12 nops 2126 if(i > 8) { 2127 if(i > 9) { 2128 i -= 1; 2129 emit_byte(0x66); 2130 } 2131 i -= 3; 2132 emit_byte(0x66); 2133 emit_byte(0x66); 2134 emit_byte(0x90); 2135 } 2136 // 1 - 8 nops 2137 if(i > 4) { 2138 if(i > 6) { 2139 i -= 1; 2140 emit_byte(0x66); 2141 } 2142 i -= 3; 2143 emit_byte(0x66); 2144 emit_byte(0x66); 2145 emit_byte(0x90); 2146 } 2147 switch (i) { 2148 case 4: 2149 emit_byte(0x66); 2150 case 3: 2151 emit_byte(0x66); 2152 case 2: 2153 emit_byte(0x66); 2154 case 1: 2155 emit_byte(0x90); 2156 break; 2157 default: 2158 assert(i == 0, " "); 2159 } 2160 } 2161 2162 void Assembler::notl(Register dst) { 2163 int encode = prefix_and_encode(dst->encoding()); 2164 emit_byte(0xF7); 2165 emit_byte(0xD0 | encode ); 2166 } 2167 2168 void Assembler::orl(Address dst, int32_t imm32) { 2169 InstructionMark im(this); 2170 prefix(dst); 2171 emit_arith_operand(0x81, rcx, dst, imm32); 2172 } 2173 2174 void Assembler::orl(Register dst, int32_t imm32) { 2175 prefix(dst); 2176 emit_arith(0x81, 0xC8, dst, imm32); 2177 } 2178 2179 void Assembler::orl(Register dst, Address src) { 2180 InstructionMark im(this); 2181 prefix(src, dst); 2182 emit_byte(0x0B); 2183 emit_operand(dst, src); 2184 } 2185 2186 void Assembler::orl(Register dst, Register src) { 2187 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2188 emit_arith(0x0B, 0xC0, dst, src); 2189 } 2190 2191 void Assembler::packuswb(XMMRegister dst, Address src) { 2192 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2193 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2194 emit_simd_arith(0x67, dst, src, VEX_SIMD_66); 2195 } 2196 2197 void Assembler::packuswb(XMMRegister dst, XMMRegister src) { 2198 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2199 emit_simd_arith(0x67, dst, src, VEX_SIMD_66); 2200 } 2201 2202 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { 2203 assert(VM_Version::supports_sse4_2(), ""); 2204 InstructionMark im(this); 2205 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2206 emit_byte(0x61); 2207 emit_operand(dst, src); 2208 emit_byte(imm8); 2209 } 2210 2211 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { 2212 assert(VM_Version::supports_sse4_2(), ""); 2213 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2214 emit_byte(0x61); 2215 emit_byte(0xC0 | encode); 2216 emit_byte(imm8); 2217 } 2218 2219 void Assembler::pmovzxbw(XMMRegister dst, Address src) { 2220 assert(VM_Version::supports_sse4_1(), ""); 2221 InstructionMark im(this); 2222 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2223 emit_byte(0x30); 2224 emit_operand(dst, src); 2225 } 2226 2227 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { 2228 assert(VM_Version::supports_sse4_1(), ""); 2229 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2230 emit_byte(0x30); 2231 emit_byte(0xC0 | encode); 2232 } 2233 2234 // generic 2235 void Assembler::pop(Register dst) { 2236 int encode = prefix_and_encode(dst->encoding()); 2237 emit_byte(0x58 | encode); 2238 } 2239 2240 void Assembler::popcntl(Register dst, Address src) { 2241 assert(VM_Version::supports_popcnt(), "must support"); 2242 InstructionMark im(this); 2243 emit_byte(0xF3); 2244 prefix(src, dst); 2245 emit_byte(0x0F); 2246 emit_byte(0xB8); 2247 emit_operand(dst, src); 2248 } 2249 2250 void Assembler::popcntl(Register dst, Register src) { 2251 assert(VM_Version::supports_popcnt(), "must support"); 2252 emit_byte(0xF3); 2253 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2254 emit_byte(0x0F); 2255 emit_byte(0xB8); 2256 emit_byte(0xC0 | encode); 2257 } 2258 2259 void Assembler::popf() { 2260 emit_byte(0x9D); 2261 } 2262 2263 #ifndef _LP64 // no 32bit push/pop on amd64 2264 void Assembler::popl(Address dst) { 2265 // NOTE: this will adjust stack by 8byte on 64bits 2266 InstructionMark im(this); 2267 prefix(dst); 2268 emit_byte(0x8F); 2269 emit_operand(rax, dst); 2270 } 2271 #endif 2272 2273 void Assembler::prefetch_prefix(Address src) { 2274 prefix(src); 2275 emit_byte(0x0F); 2276 } 2277 2278 void Assembler::prefetchnta(Address src) { 2279 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2280 InstructionMark im(this); 2281 prefetch_prefix(src); 2282 emit_byte(0x18); 2283 emit_operand(rax, src); // 0, src 2284 } 2285 2286 void Assembler::prefetchr(Address src) { 2287 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2288 InstructionMark im(this); 2289 prefetch_prefix(src); 2290 emit_byte(0x0D); 2291 emit_operand(rax, src); // 0, src 2292 } 2293 2294 void Assembler::prefetcht0(Address src) { 2295 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2296 InstructionMark im(this); 2297 prefetch_prefix(src); 2298 emit_byte(0x18); 2299 emit_operand(rcx, src); // 1, src 2300 } 2301 2302 void Assembler::prefetcht1(Address src) { 2303 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2304 InstructionMark im(this); 2305 prefetch_prefix(src); 2306 emit_byte(0x18); 2307 emit_operand(rdx, src); // 2, src 2308 } 2309 2310 void Assembler::prefetcht2(Address src) { 2311 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2312 InstructionMark im(this); 2313 prefetch_prefix(src); 2314 emit_byte(0x18); 2315 emit_operand(rbx, src); // 3, src 2316 } 2317 2318 void Assembler::prefetchw(Address src) { 2319 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2320 InstructionMark im(this); 2321 prefetch_prefix(src); 2322 emit_byte(0x0D); 2323 emit_operand(rcx, src); // 1, src 2324 } 2325 2326 void Assembler::prefix(Prefix p) { 2327 a_byte(p); 2328 } 2329 2330 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 2331 assert(isByte(mode), "invalid value"); 2332 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2333 emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66); 2334 emit_byte(mode & 0xFF); 2335 2336 } 2337 2338 void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 2339 assert(isByte(mode), "invalid value"); 2340 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2341 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2342 InstructionMark im(this); 2343 simd_prefix(dst, src, VEX_SIMD_66); 2344 emit_byte(0x70); 2345 emit_operand(dst, src); 2346 emit_byte(mode & 0xFF); 2347 } 2348 2349 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 2350 assert(isByte(mode), "invalid value"); 2351 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2352 emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2); 2353 emit_byte(mode & 0xFF); 2354 } 2355 2356 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 2357 assert(isByte(mode), "invalid value"); 2358 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2359 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2360 InstructionMark im(this); 2361 simd_prefix(dst, src, VEX_SIMD_F2); 2362 emit_byte(0x70); 2363 emit_operand(dst, src); 2364 emit_byte(mode & 0xFF); 2365 } 2366 2367 void Assembler::psrldq(XMMRegister dst, int shift) { 2368 // Shift 128 bit value in xmm register by number of bytes. 2369 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2370 int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66); 2371 emit_byte(0x73); 2372 emit_byte(0xC0 | encode); 2373 emit_byte(shift); 2374 } 2375 2376 void Assembler::ptest(XMMRegister dst, Address src) { 2377 assert(VM_Version::supports_sse4_1(), ""); 2378 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2379 InstructionMark im(this); 2380 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2381 emit_byte(0x17); 2382 emit_operand(dst, src); 2383 } 2384 2385 void Assembler::ptest(XMMRegister dst, XMMRegister src) { 2386 assert(VM_Version::supports_sse4_1(), ""); 2387 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2388 emit_byte(0x17); 2389 emit_byte(0xC0 | encode); 2390 } 2391 2392 void Assembler::punpcklbw(XMMRegister dst, Address src) { 2393 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2394 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2395 emit_simd_arith(0x60, dst, src, VEX_SIMD_66); 2396 } 2397 2398 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 2399 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2400 emit_simd_arith(0x60, dst, src, VEX_SIMD_66); 2401 } 2402 2403 void Assembler::punpckldq(XMMRegister dst, Address src) { 2404 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2405 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2406 emit_simd_arith(0x62, dst, src, VEX_SIMD_66); 2407 } 2408 2409 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { 2410 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2411 emit_simd_arith(0x62, dst, src, VEX_SIMD_66); 2412 } 2413 2414 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) { 2415 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2416 emit_simd_arith(0x6C, dst, src, VEX_SIMD_66); 2417 } 2418 2419 void Assembler::push(int32_t imm32) { 2420 // in 64bits we push 64bits onto the stack but only 2421 // take a 32bit immediate 2422 emit_byte(0x68); 2423 emit_long(imm32); 2424 } 2425 2426 void Assembler::push(Register src) { 2427 int encode = prefix_and_encode(src->encoding()); 2428 2429 emit_byte(0x50 | encode); 2430 } 2431 2432 void Assembler::pushf() { 2433 emit_byte(0x9C); 2434 } 2435 2436 #ifndef _LP64 // no 32bit push/pop on amd64 2437 void Assembler::pushl(Address src) { 2438 // Note this will push 64bit on 64bit 2439 InstructionMark im(this); 2440 prefix(src); 2441 emit_byte(0xFF); 2442 emit_operand(rsi, src); 2443 } 2444 #endif 2445 2446 void Assembler::rcll(Register dst, int imm8) { 2447 assert(isShiftCount(imm8), "illegal shift count"); 2448 int encode = prefix_and_encode(dst->encoding()); 2449 if (imm8 == 1) { 2450 emit_byte(0xD1); 2451 emit_byte(0xD0 | encode); 2452 } else { 2453 emit_byte(0xC1); 2454 emit_byte(0xD0 | encode); 2455 emit_byte(imm8); 2456 } 2457 } 2458 2459 // copies data from [esi] to [edi] using rcx pointer sized words 2460 // generic 2461 void Assembler::rep_mov() { 2462 emit_byte(0xF3); 2463 // MOVSQ 2464 LP64_ONLY(prefix(REX_W)); 2465 emit_byte(0xA5); 2466 } 2467 2468 // sets rcx pointer sized words with rax, value at [edi] 2469 // generic 2470 void Assembler::rep_set() { // rep_set 2471 emit_byte(0xF3); 2472 // STOSQ 2473 LP64_ONLY(prefix(REX_W)); 2474 emit_byte(0xAB); 2475 } 2476 2477 // scans rcx pointer sized words at [edi] for occurance of rax, 2478 // generic 2479 void Assembler::repne_scan() { // repne_scan 2480 emit_byte(0xF2); 2481 // SCASQ 2482 LP64_ONLY(prefix(REX_W)); 2483 emit_byte(0xAF); 2484 } 2485 2486 #ifdef _LP64 2487 // scans rcx 4 byte words at [edi] for occurance of rax, 2488 // generic 2489 void Assembler::repne_scanl() { // repne_scan 2490 emit_byte(0xF2); 2491 // SCASL 2492 emit_byte(0xAF); 2493 } 2494 #endif 2495 2496 void Assembler::ret(int imm16) { 2497 if (imm16 == 0) { 2498 emit_byte(0xC3); 2499 } else { 2500 emit_byte(0xC2); 2501 emit_word(imm16); 2502 } 2503 } 2504 2505 void Assembler::sahf() { 2506 #ifdef _LP64 2507 // Not supported in 64bit mode 2508 ShouldNotReachHere(); 2509 #endif 2510 emit_byte(0x9E); 2511 } 2512 2513 void Assembler::sarl(Register dst, int imm8) { 2514 int encode = prefix_and_encode(dst->encoding()); 2515 assert(isShiftCount(imm8), "illegal shift count"); 2516 if (imm8 == 1) { 2517 emit_byte(0xD1); 2518 emit_byte(0xF8 | encode); 2519 } else { 2520 emit_byte(0xC1); 2521 emit_byte(0xF8 | encode); 2522 emit_byte(imm8); 2523 } 2524 } 2525 2526 void Assembler::sarl(Register dst) { 2527 int encode = prefix_and_encode(dst->encoding()); 2528 emit_byte(0xD3); 2529 emit_byte(0xF8 | encode); 2530 } 2531 2532 void Assembler::sbbl(Address dst, int32_t imm32) { 2533 InstructionMark im(this); 2534 prefix(dst); 2535 emit_arith_operand(0x81, rbx, dst, imm32); 2536 } 2537 2538 void Assembler::sbbl(Register dst, int32_t imm32) { 2539 prefix(dst); 2540 emit_arith(0x81, 0xD8, dst, imm32); 2541 } 2542 2543 2544 void Assembler::sbbl(Register dst, Address src) { 2545 InstructionMark im(this); 2546 prefix(src, dst); 2547 emit_byte(0x1B); 2548 emit_operand(dst, src); 2549 } 2550 2551 void Assembler::sbbl(Register dst, Register src) { 2552 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2553 emit_arith(0x1B, 0xC0, dst, src); 2554 } 2555 2556 void Assembler::setb(Condition cc, Register dst) { 2557 assert(0 <= cc && cc < 16, "illegal cc"); 2558 int encode = prefix_and_encode(dst->encoding(), true); 2559 emit_byte(0x0F); 2560 emit_byte(0x90 | cc); 2561 emit_byte(0xC0 | encode); 2562 } 2563 2564 void Assembler::shll(Register dst, int imm8) { 2565 assert(isShiftCount(imm8), "illegal shift count"); 2566 int encode = prefix_and_encode(dst->encoding()); 2567 if (imm8 == 1 ) { 2568 emit_byte(0xD1); 2569 emit_byte(0xE0 | encode); 2570 } else { 2571 emit_byte(0xC1); 2572 emit_byte(0xE0 | encode); 2573 emit_byte(imm8); 2574 } 2575 } 2576 2577 void Assembler::shll(Register dst) { 2578 int encode = prefix_and_encode(dst->encoding()); 2579 emit_byte(0xD3); 2580 emit_byte(0xE0 | encode); 2581 } 2582 2583 void Assembler::shrl(Register dst, int imm8) { 2584 assert(isShiftCount(imm8), "illegal shift count"); 2585 int encode = prefix_and_encode(dst->encoding()); 2586 emit_byte(0xC1); 2587 emit_byte(0xE8 | encode); 2588 emit_byte(imm8); 2589 } 2590 2591 void Assembler::shrl(Register dst) { 2592 int encode = prefix_and_encode(dst->encoding()); 2593 emit_byte(0xD3); 2594 emit_byte(0xE8 | encode); 2595 } 2596 2597 // copies a single word from [esi] to [edi] 2598 void Assembler::smovl() { 2599 emit_byte(0xA5); 2600 } 2601 2602 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { 2603 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2604 emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); 2605 } 2606 2607 void Assembler::sqrtsd(XMMRegister dst, Address src) { 2608 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2609 emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); 2610 } 2611 2612 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { 2613 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2614 emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); 2615 } 2616 2617 void Assembler::sqrtss(XMMRegister dst, Address src) { 2618 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2619 emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); 2620 } 2621 2622 void Assembler::stmxcsr( Address dst) { 2623 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2624 InstructionMark im(this); 2625 prefix(dst); 2626 emit_byte(0x0F); 2627 emit_byte(0xAE); 2628 emit_operand(as_Register(3), dst); 2629 } 2630 2631 void Assembler::subl(Address dst, int32_t imm32) { 2632 InstructionMark im(this); 2633 prefix(dst); 2634 emit_arith_operand(0x81, rbp, dst, imm32); 2635 } 2636 2637 void Assembler::subl(Address dst, Register src) { 2638 InstructionMark im(this); 2639 prefix(dst, src); 2640 emit_byte(0x29); 2641 emit_operand(src, dst); 2642 } 2643 2644 void Assembler::subl(Register dst, int32_t imm32) { 2645 prefix(dst); 2646 emit_arith(0x81, 0xE8, dst, imm32); 2647 } 2648 2649 // Force generation of a 4 byte immediate value even if it fits into 8bit 2650 void Assembler::subl_imm32(Register dst, int32_t imm32) { 2651 prefix(dst); 2652 emit_arith_imm32(0x81, 0xE8, dst, imm32); 2653 } 2654 2655 void Assembler::subl(Register dst, Address src) { 2656 InstructionMark im(this); 2657 prefix(src, dst); 2658 emit_byte(0x2B); 2659 emit_operand(dst, src); 2660 } 2661 2662 void Assembler::subl(Register dst, Register src) { 2663 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2664 emit_arith(0x2B, 0xC0, dst, src); 2665 } 2666 2667 void Assembler::subsd(XMMRegister dst, XMMRegister src) { 2668 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2669 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); 2670 } 2671 2672 void Assembler::subsd(XMMRegister dst, Address src) { 2673 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2674 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); 2675 } 2676 2677 void Assembler::subss(XMMRegister dst, XMMRegister src) { 2678 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2679 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); 2680 } 2681 2682 void Assembler::subss(XMMRegister dst, Address src) { 2683 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2684 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); 2685 } 2686 2687 void Assembler::testb(Register dst, int imm8) { 2688 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 2689 (void) prefix_and_encode(dst->encoding(), true); 2690 emit_arith_b(0xF6, 0xC0, dst, imm8); 2691 } 2692 2693 void Assembler::testl(Register dst, int32_t imm32) { 2694 // not using emit_arith because test 2695 // doesn't support sign-extension of 2696 // 8bit operands 2697 int encode = dst->encoding(); 2698 if (encode == 0) { 2699 emit_byte(0xA9); 2700 } else { 2701 encode = prefix_and_encode(encode); 2702 emit_byte(0xF7); 2703 emit_byte(0xC0 | encode); 2704 } 2705 emit_long(imm32); 2706 } 2707 2708 void Assembler::testl(Register dst, Register src) { 2709 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2710 emit_arith(0x85, 0xC0, dst, src); 2711 } 2712 2713 void Assembler::testl(Register dst, Address src) { 2714 InstructionMark im(this); 2715 prefix(src, dst); 2716 emit_byte(0x85); 2717 emit_operand(dst, src); 2718 } 2719 2720 void Assembler::ucomisd(XMMRegister dst, Address src) { 2721 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2722 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); 2723 } 2724 2725 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { 2726 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2727 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); 2728 } 2729 2730 void Assembler::ucomiss(XMMRegister dst, Address src) { 2731 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2732 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE); 2733 } 2734 2735 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { 2736 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2737 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE); 2738 } 2739 2740 2741 void Assembler::xaddl(Address dst, Register src) { 2742 InstructionMark im(this); 2743 prefix(dst, src); 2744 emit_byte(0x0F); 2745 emit_byte(0xC1); 2746 emit_operand(src, dst); 2747 } 2748 2749 void Assembler::xchgl(Register dst, Address src) { // xchg 2750 InstructionMark im(this); 2751 prefix(src, dst); 2752 emit_byte(0x87); 2753 emit_operand(dst, src); 2754 } 2755 2756 void Assembler::xchgl(Register dst, Register src) { 2757 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2758 emit_byte(0x87); 2759 emit_byte(0xc0 | encode); 2760 } 2761 2762 void Assembler::xorl(Register dst, int32_t imm32) { 2763 prefix(dst); 2764 emit_arith(0x81, 0xF0, dst, imm32); 2765 } 2766 2767 void Assembler::xorl(Register dst, Address src) { 2768 InstructionMark im(this); 2769 prefix(src, dst); 2770 emit_byte(0x33); 2771 emit_operand(dst, src); 2772 } 2773 2774 void Assembler::xorl(Register dst, Register src) { 2775 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2776 emit_arith(0x33, 0xC0, dst, src); 2777 } 2778 2779 2780 // AVX 3-operands scalar float-point arithmetic instructions 2781 2782 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { 2783 assert(VM_Version::supports_avx(), ""); 2784 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2785 } 2786 2787 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2788 assert(VM_Version::supports_avx(), ""); 2789 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2790 } 2791 2792 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { 2793 assert(VM_Version::supports_avx(), ""); 2794 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2795 } 2796 2797 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2798 assert(VM_Version::supports_avx(), ""); 2799 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2800 } 2801 2802 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { 2803 assert(VM_Version::supports_avx(), ""); 2804 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2805 } 2806 2807 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2808 assert(VM_Version::supports_avx(), ""); 2809 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2810 } 2811 2812 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { 2813 assert(VM_Version::supports_avx(), ""); 2814 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2815 } 2816 2817 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2818 assert(VM_Version::supports_avx(), ""); 2819 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2820 } 2821 2822 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { 2823 assert(VM_Version::supports_avx(), ""); 2824 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2825 } 2826 2827 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2828 assert(VM_Version::supports_avx(), ""); 2829 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2830 } 2831 2832 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { 2833 assert(VM_Version::supports_avx(), ""); 2834 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2835 } 2836 2837 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2838 assert(VM_Version::supports_avx(), ""); 2839 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2840 } 2841 2842 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { 2843 assert(VM_Version::supports_avx(), ""); 2844 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2845 } 2846 2847 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2848 assert(VM_Version::supports_avx(), ""); 2849 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2850 } 2851 2852 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { 2853 assert(VM_Version::supports_avx(), ""); 2854 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2855 } 2856 2857 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2858 assert(VM_Version::supports_avx(), ""); 2859 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2860 } 2861 2862 //====================VECTOR ARITHMETIC===================================== 2863 2864 // Float-point vector arithmetic 2865 2866 void Assembler::addpd(XMMRegister dst, XMMRegister src) { 2867 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2868 emit_simd_arith(0x58, dst, src, VEX_SIMD_66); 2869 } 2870 2871 void Assembler::addps(XMMRegister dst, XMMRegister src) { 2872 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2873 emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE); 2874 } 2875 2876 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2877 assert(VM_Version::supports_avx(), ""); 2878 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256); 2879 } 2880 2881 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2882 assert(VM_Version::supports_avx(), ""); 2883 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256); 2884 } 2885 2886 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2887 assert(VM_Version::supports_avx(), ""); 2888 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256); 2889 } 2890 2891 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2892 assert(VM_Version::supports_avx(), ""); 2893 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256); 2894 } 2895 2896 void Assembler::subpd(XMMRegister dst, XMMRegister src) { 2897 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2898 emit_simd_arith(0x5C, dst, src, VEX_SIMD_66); 2899 } 2900 2901 void Assembler::subps(XMMRegister dst, XMMRegister src) { 2902 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2903 emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE); 2904 } 2905 2906 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2907 assert(VM_Version::supports_avx(), ""); 2908 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256); 2909 } 2910 2911 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2912 assert(VM_Version::supports_avx(), ""); 2913 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256); 2914 } 2915 2916 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2917 assert(VM_Version::supports_avx(), ""); 2918 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256); 2919 } 2920 2921 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2922 assert(VM_Version::supports_avx(), ""); 2923 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256); 2924 } 2925 2926 void Assembler::mulpd(XMMRegister dst, XMMRegister src) { 2927 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2928 emit_simd_arith(0x59, dst, src, VEX_SIMD_66); 2929 } 2930 2931 void Assembler::mulps(XMMRegister dst, XMMRegister src) { 2932 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2933 emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE); 2934 } 2935 2936 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2937 assert(VM_Version::supports_avx(), ""); 2938 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256); 2939 } 2940 2941 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2942 assert(VM_Version::supports_avx(), ""); 2943 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256); 2944 } 2945 2946 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2947 assert(VM_Version::supports_avx(), ""); 2948 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256); 2949 } 2950 2951 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2952 assert(VM_Version::supports_avx(), ""); 2953 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256); 2954 } 2955 2956 void Assembler::divpd(XMMRegister dst, XMMRegister src) { 2957 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2958 emit_simd_arith(0x5E, dst, src, VEX_SIMD_66); 2959 } 2960 2961 void Assembler::divps(XMMRegister dst, XMMRegister src) { 2962 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2963 emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE); 2964 } 2965 2966 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2967 assert(VM_Version::supports_avx(), ""); 2968 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256); 2969 } 2970 2971 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2972 assert(VM_Version::supports_avx(), ""); 2973 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256); 2974 } 2975 2976 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2977 assert(VM_Version::supports_avx(), ""); 2978 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256); 2979 } 2980 2981 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2982 assert(VM_Version::supports_avx(), ""); 2983 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256); 2984 } 2985 2986 void Assembler::andpd(XMMRegister dst, XMMRegister src) { 2987 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2988 emit_simd_arith(0x54, dst, src, VEX_SIMD_66); 2989 } 2990 2991 void Assembler::andps(XMMRegister dst, XMMRegister src) { 2992 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2993 emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE); 2994 } 2995 2996 void Assembler::andps(XMMRegister dst, Address src) { 2997 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2998 emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE); 2999 } 3000 3001 void Assembler::andpd(XMMRegister dst, Address src) { 3002 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3003 emit_simd_arith(0x54, dst, src, VEX_SIMD_66); 3004 } 3005 3006 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3007 assert(VM_Version::supports_avx(), ""); 3008 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256); 3009 } 3010 3011 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3012 assert(VM_Version::supports_avx(), ""); 3013 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256); 3014 } 3015 3016 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3017 assert(VM_Version::supports_avx(), ""); 3018 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256); 3019 } 3020 3021 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3022 assert(VM_Version::supports_avx(), ""); 3023 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256); 3024 } 3025 3026 void Assembler::xorpd(XMMRegister dst, XMMRegister src) { 3027 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3028 emit_simd_arith(0x57, dst, src, VEX_SIMD_66); 3029 } 3030 3031 void Assembler::xorps(XMMRegister dst, XMMRegister src) { 3032 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3033 emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE); 3034 } 3035 3036 void Assembler::xorpd(XMMRegister dst, Address src) { 3037 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3038 emit_simd_arith(0x57, dst, src, VEX_SIMD_66); 3039 } 3040 3041 void Assembler::xorps(XMMRegister dst, Address src) { 3042 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3043 emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE); 3044 } 3045 3046 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3047 assert(VM_Version::supports_avx(), ""); 3048 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256); 3049 } 3050 3051 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3052 assert(VM_Version::supports_avx(), ""); 3053 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256); 3054 } 3055 3056 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3057 assert(VM_Version::supports_avx(), ""); 3058 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256); 3059 } 3060 3061 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3062 assert(VM_Version::supports_avx(), ""); 3063 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256); 3064 } 3065 3066 3067 // Integer vector arithmetic 3068 void Assembler::paddb(XMMRegister dst, XMMRegister src) { 3069 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3070 emit_simd_arith(0xFC, dst, src, VEX_SIMD_66); 3071 } 3072 3073 void Assembler::paddw(XMMRegister dst, XMMRegister src) { 3074 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3075 emit_simd_arith(0xFD, dst, src, VEX_SIMD_66); 3076 } 3077 3078 void Assembler::paddd(XMMRegister dst, XMMRegister src) { 3079 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3080 emit_simd_arith(0xFE, dst, src, VEX_SIMD_66); 3081 } 3082 3083 void Assembler::paddq(XMMRegister dst, XMMRegister src) { 3084 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3085 emit_simd_arith(0xD4, dst, src, VEX_SIMD_66); 3086 } 3087 3088 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3089 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3090 emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256); 3091 } 3092 3093 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3094 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3095 emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256); 3096 } 3097 3098 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3099 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3100 emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256); 3101 } 3102 3103 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3104 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3105 emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256); 3106 } 3107 3108 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3109 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3110 emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256); 3111 } 3112 3113 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3114 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3115 emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256); 3116 } 3117 3118 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3119 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3120 emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256); 3121 } 3122 3123 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3124 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3125 emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256); 3126 } 3127 3128 void Assembler::psubb(XMMRegister dst, XMMRegister src) { 3129 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3130 emit_simd_arith(0xF8, dst, src, VEX_SIMD_66); 3131 } 3132 3133 void Assembler::psubw(XMMRegister dst, XMMRegister src) { 3134 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3135 emit_simd_arith(0xF9, dst, src, VEX_SIMD_66); 3136 } 3137 3138 void Assembler::psubd(XMMRegister dst, XMMRegister src) { 3139 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3140 emit_simd_arith(0xFA, dst, src, VEX_SIMD_66); 3141 } 3142 3143 void Assembler::psubq(XMMRegister dst, XMMRegister src) { 3144 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3145 emit_simd_arith(0xFB, dst, src, VEX_SIMD_66); 3146 } 3147 3148 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3149 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3150 emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256); 3151 } 3152 3153 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3154 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3155 emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256); 3156 } 3157 3158 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3159 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3160 emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256); 3161 } 3162 3163 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3164 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3165 emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256); 3166 } 3167 3168 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3169 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3170 emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256); 3171 } 3172 3173 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3174 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3175 emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256); 3176 } 3177 3178 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3179 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3180 emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256); 3181 } 3182 3183 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3184 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3185 emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256); 3186 } 3187 3188 void Assembler::pmullw(XMMRegister dst, XMMRegister src) { 3189 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3190 emit_simd_arith(0xD5, dst, src, VEX_SIMD_66); 3191 } 3192 3193 void Assembler::pmulld(XMMRegister dst, XMMRegister src) { 3194 assert(VM_Version::supports_sse4_1(), ""); 3195 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 3196 emit_byte(0x40); 3197 emit_byte(0xC0 | encode); 3198 } 3199 3200 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3201 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3202 emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256); 3203 } 3204 3205 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3206 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3207 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38); 3208 emit_byte(0x40); 3209 emit_byte(0xC0 | encode); 3210 } 3211 3212 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3213 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3214 emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256); 3215 } 3216 3217 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3218 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3219 InstructionMark im(this); 3220 int dst_enc = dst->encoding(); 3221 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 3222 vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256); 3223 emit_byte(0x40); 3224 emit_operand(dst, src); 3225 } 3226 3227 // Shift packed integers left by specified number of bits. 3228 void Assembler::psllw(XMMRegister dst, int shift) { 3229 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3230 // XMM6 is for /6 encoding: 66 0F 71 /6 ib 3231 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); 3232 emit_byte(0x71); 3233 emit_byte(0xC0 | encode); 3234 emit_byte(shift & 0xFF); 3235 } 3236 3237 void Assembler::pslld(XMMRegister dst, int shift) { 3238 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3239 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 3240 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); 3241 emit_byte(0x72); 3242 emit_byte(0xC0 | encode); 3243 emit_byte(shift & 0xFF); 3244 } 3245 3246 void Assembler::psllq(XMMRegister dst, int shift) { 3247 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3248 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 3249 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); 3250 emit_byte(0x73); 3251 emit_byte(0xC0 | encode); 3252 emit_byte(shift & 0xFF); 3253 } 3254 3255 void Assembler::psllw(XMMRegister dst, XMMRegister shift) { 3256 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3257 emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66); 3258 } 3259 3260 void Assembler::pslld(XMMRegister dst, XMMRegister shift) { 3261 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3262 emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66); 3263 } 3264 3265 void Assembler::psllq(XMMRegister dst, XMMRegister shift) { 3266 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3267 emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66); 3268 } 3269 3270 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3271 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3272 // XMM6 is for /6 encoding: 66 0F 71 /6 ib 3273 emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256); 3274 emit_byte(shift & 0xFF); 3275 } 3276 3277 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3278 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3279 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 3280 emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256); 3281 emit_byte(shift & 0xFF); 3282 } 3283 3284 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3285 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3286 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 3287 emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256); 3288 emit_byte(shift & 0xFF); 3289 } 3290 3291 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3292 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3293 emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256); 3294 } 3295 3296 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3297 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3298 emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256); 3299 } 3300 3301 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3302 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3303 emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256); 3304 } 3305 3306 // Shift packed integers logically right by specified number of bits. 3307 void Assembler::psrlw(XMMRegister dst, int shift) { 3308 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3309 // XMM2 is for /2 encoding: 66 0F 71 /2 ib 3310 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 3311 emit_byte(0x71); 3312 emit_byte(0xC0 | encode); 3313 emit_byte(shift & 0xFF); 3314 } 3315 3316 void Assembler::psrld(XMMRegister dst, int shift) { 3317 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3318 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 3319 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 3320 emit_byte(0x72); 3321 emit_byte(0xC0 | encode); 3322 emit_byte(shift & 0xFF); 3323 } 3324 3325 void Assembler::psrlq(XMMRegister dst, int shift) { 3326 // Do not confuse it with psrldq SSE2 instruction which 3327 // shifts 128 bit value in xmm register by number of bytes. 3328 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3329 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3330 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 3331 emit_byte(0x73); 3332 emit_byte(0xC0 | encode); 3333 emit_byte(shift & 0xFF); 3334 } 3335 3336 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) { 3337 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3338 emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66); 3339 } 3340 3341 void Assembler::psrld(XMMRegister dst, XMMRegister shift) { 3342 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3343 emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66); 3344 } 3345 3346 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) { 3347 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3348 emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66); 3349 } 3350 3351 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3352 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3353 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3354 emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256); 3355 emit_byte(shift & 0xFF); 3356 } 3357 3358 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3359 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3360 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3361 emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256); 3362 emit_byte(shift & 0xFF); 3363 } 3364 3365 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3366 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3367 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3368 emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256); 3369 emit_byte(shift & 0xFF); 3370 } 3371 3372 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3373 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3374 emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256); 3375 } 3376 3377 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3378 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3379 emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256); 3380 } 3381 3382 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3383 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3384 emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256); 3385 } 3386 3387 // Shift packed integers arithmetically right by specified number of bits. 3388 void Assembler::psraw(XMMRegister dst, int shift) { 3389 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3390 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 3391 int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66); 3392 emit_byte(0x71); 3393 emit_byte(0xC0 | encode); 3394 emit_byte(shift & 0xFF); 3395 } 3396 3397 void Assembler::psrad(XMMRegister dst, int shift) { 3398 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3399 // XMM4 is for /4 encoding: 66 0F 72 /4 ib 3400 int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66); 3401 emit_byte(0x72); 3402 emit_byte(0xC0 | encode); 3403 emit_byte(shift & 0xFF); 3404 } 3405 3406 void Assembler::psraw(XMMRegister dst, XMMRegister shift) { 3407 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3408 emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66); 3409 } 3410 3411 void Assembler::psrad(XMMRegister dst, XMMRegister shift) { 3412 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3413 emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66); 3414 } 3415 3416 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3417 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3418 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 3419 emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256); 3420 emit_byte(shift & 0xFF); 3421 } 3422 3423 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3424 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3425 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 3426 emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256); 3427 emit_byte(shift & 0xFF); 3428 } 3429 3430 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3431 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3432 emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256); 3433 } 3434 3435 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3436 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3437 emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256); 3438 } 3439 3440 3441 // AND packed integers 3442 void Assembler::pand(XMMRegister dst, XMMRegister src) { 3443 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3444 emit_simd_arith(0xDB, dst, src, VEX_SIMD_66); 3445 } 3446 3447 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3448 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3449 emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256); 3450 } 3451 3452 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3453 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3454 emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256); 3455 } 3456 3457 void Assembler::por(XMMRegister dst, XMMRegister src) { 3458 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3459 emit_simd_arith(0xEB, dst, src, VEX_SIMD_66); 3460 } 3461 3462 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3463 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3464 emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256); 3465 } 3466 3467 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3468 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3469 emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256); 3470 } 3471 3472 void Assembler::pxor(XMMRegister dst, XMMRegister src) { 3473 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3474 emit_simd_arith(0xEF, dst, src, VEX_SIMD_66); 3475 } 3476 3477 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3478 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3479 emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256); 3480 } 3481 3482 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3483 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3484 emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256); 3485 } 3486 3487 3488 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3489 assert(VM_Version::supports_avx(), ""); 3490 bool vector256 = true; 3491 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); 3492 emit_byte(0x18); 3493 emit_byte(0xC0 | encode); 3494 // 0x00 - insert into lower 128 bits 3495 // 0x01 - insert into upper 128 bits 3496 emit_byte(0x01); 3497 } 3498 3499 void Assembler::vinsertf128h(XMMRegister dst, Address src) { 3500 assert(VM_Version::supports_avx(), ""); 3501 InstructionMark im(this); 3502 bool vector256 = true; 3503 assert(dst != xnoreg, "sanity"); 3504 int dst_enc = dst->encoding(); 3505 // swap src<->dst for encoding 3506 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); 3507 emit_byte(0x18); 3508 emit_operand(dst, src); 3509 // 0x01 - insert into upper 128 bits 3510 emit_byte(0x01); 3511 } 3512 3513 void Assembler::vextractf128h(Address dst, XMMRegister src) { 3514 assert(VM_Version::supports_avx(), ""); 3515 InstructionMark im(this); 3516 bool vector256 = true; 3517 assert(src != xnoreg, "sanity"); 3518 int src_enc = src->encoding(); 3519 vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); 3520 emit_byte(0x19); 3521 emit_operand(src, dst); 3522 // 0x01 - extract from upper 128 bits 3523 emit_byte(0x01); 3524 } 3525 3526 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3527 assert(VM_Version::supports_avx2(), ""); 3528 bool vector256 = true; 3529 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); 3530 emit_byte(0x38); 3531 emit_byte(0xC0 | encode); 3532 // 0x00 - insert into lower 128 bits 3533 // 0x01 - insert into upper 128 bits 3534 emit_byte(0x01); 3535 } 3536 3537 void Assembler::vinserti128h(XMMRegister dst, Address src) { 3538 assert(VM_Version::supports_avx2(), ""); 3539 InstructionMark im(this); 3540 bool vector256 = true; 3541 assert(dst != xnoreg, "sanity"); 3542 int dst_enc = dst->encoding(); 3543 // swap src<->dst for encoding 3544 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); 3545 emit_byte(0x38); 3546 emit_operand(dst, src); 3547 // 0x01 - insert into upper 128 bits 3548 emit_byte(0x01); 3549 } 3550 3551 void Assembler::vextracti128h(Address dst, XMMRegister src) { 3552 assert(VM_Version::supports_avx2(), ""); 3553 InstructionMark im(this); 3554 bool vector256 = true; 3555 assert(src != xnoreg, "sanity"); 3556 int src_enc = src->encoding(); 3557 vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); 3558 emit_byte(0x39); 3559 emit_operand(src, dst); 3560 // 0x01 - extract from upper 128 bits 3561 emit_byte(0x01); 3562 } 3563 3564 void Assembler::vzeroupper() { 3565 assert(VM_Version::supports_avx(), ""); 3566 (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE); 3567 emit_byte(0x77); 3568 } 3569 3570 3571 #ifndef _LP64 3572 // 32bit only pieces of the assembler 3573 3574 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { 3575 // NO PREFIX AS NEVER 64BIT 3576 InstructionMark im(this); 3577 emit_byte(0x81); 3578 emit_byte(0xF8 | src1->encoding()); 3579 emit_data(imm32, rspec, 0); 3580 } 3581 3582 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { 3583 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs 3584 InstructionMark im(this); 3585 emit_byte(0x81); 3586 emit_operand(rdi, src1); 3587 emit_data(imm32, rspec, 0); 3588 } 3589 3590 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax, 3591 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded 3592 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. 3593 void Assembler::cmpxchg8(Address adr) { 3594 InstructionMark im(this); 3595 emit_byte(0x0F); 3596 emit_byte(0xc7); 3597 emit_operand(rcx, adr); 3598 } 3599 3600 void Assembler::decl(Register dst) { 3601 // Don't use it directly. Use MacroAssembler::decrementl() instead. 3602 emit_byte(0x48 | dst->encoding()); 3603 } 3604 3605 #endif // _LP64 3606 3607 // 64bit typically doesn't use the x87 but needs to for the trig funcs 3608 3609 void Assembler::fabs() { 3610 emit_byte(0xD9); 3611 emit_byte(0xE1); 3612 } 3613 3614 void Assembler::fadd(int i) { 3615 emit_farith(0xD8, 0xC0, i); 3616 } 3617 3618 void Assembler::fadd_d(Address src) { 3619 InstructionMark im(this); 3620 emit_byte(0xDC); 3621 emit_operand32(rax, src); 3622 } 3623 3624 void Assembler::fadd_s(Address src) { 3625 InstructionMark im(this); 3626 emit_byte(0xD8); 3627 emit_operand32(rax, src); 3628 } 3629 3630 void Assembler::fadda(int i) { 3631 emit_farith(0xDC, 0xC0, i); 3632 } 3633 3634 void Assembler::faddp(int i) { 3635 emit_farith(0xDE, 0xC0, i); 3636 } 3637 3638 void Assembler::fchs() { 3639 emit_byte(0xD9); 3640 emit_byte(0xE0); 3641 } 3642 3643 void Assembler::fcom(int i) { 3644 emit_farith(0xD8, 0xD0, i); 3645 } 3646 3647 void Assembler::fcomp(int i) { 3648 emit_farith(0xD8, 0xD8, i); 3649 } 3650 3651 void Assembler::fcomp_d(Address src) { 3652 InstructionMark im(this); 3653 emit_byte(0xDC); 3654 emit_operand32(rbx, src); 3655 } 3656 3657 void Assembler::fcomp_s(Address src) { 3658 InstructionMark im(this); 3659 emit_byte(0xD8); 3660 emit_operand32(rbx, src); 3661 } 3662 3663 void Assembler::fcompp() { 3664 emit_byte(0xDE); 3665 emit_byte(0xD9); 3666 } 3667 3668 void Assembler::fcos() { 3669 emit_byte(0xD9); 3670 emit_byte(0xFF); 3671 } 3672 3673 void Assembler::fdecstp() { 3674 emit_byte(0xD9); 3675 emit_byte(0xF6); 3676 } 3677 3678 void Assembler::fdiv(int i) { 3679 emit_farith(0xD8, 0xF0, i); 3680 } 3681 3682 void Assembler::fdiv_d(Address src) { 3683 InstructionMark im(this); 3684 emit_byte(0xDC); 3685 emit_operand32(rsi, src); 3686 } 3687 3688 void Assembler::fdiv_s(Address src) { 3689 InstructionMark im(this); 3690 emit_byte(0xD8); 3691 emit_operand32(rsi, src); 3692 } 3693 3694 void Assembler::fdiva(int i) { 3695 emit_farith(0xDC, 0xF8, i); 3696 } 3697 3698 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) 3699 // is erroneous for some of the floating-point instructions below. 3700 3701 void Assembler::fdivp(int i) { 3702 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) 3703 } 3704 3705 void Assembler::fdivr(int i) { 3706 emit_farith(0xD8, 0xF8, i); 3707 } 3708 3709 void Assembler::fdivr_d(Address src) { 3710 InstructionMark im(this); 3711 emit_byte(0xDC); 3712 emit_operand32(rdi, src); 3713 } 3714 3715 void Assembler::fdivr_s(Address src) { 3716 InstructionMark im(this); 3717 emit_byte(0xD8); 3718 emit_operand32(rdi, src); 3719 } 3720 3721 void Assembler::fdivra(int i) { 3722 emit_farith(0xDC, 0xF0, i); 3723 } 3724 3725 void Assembler::fdivrp(int i) { 3726 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) 3727 } 3728 3729 void Assembler::ffree(int i) { 3730 emit_farith(0xDD, 0xC0, i); 3731 } 3732 3733 void Assembler::fild_d(Address adr) { 3734 InstructionMark im(this); 3735 emit_byte(0xDF); 3736 emit_operand32(rbp, adr); 3737 } 3738 3739 void Assembler::fild_s(Address adr) { 3740 InstructionMark im(this); 3741 emit_byte(0xDB); 3742 emit_operand32(rax, adr); 3743 } 3744 3745 void Assembler::fincstp() { 3746 emit_byte(0xD9); 3747 emit_byte(0xF7); 3748 } 3749 3750 void Assembler::finit() { 3751 emit_byte(0x9B); 3752 emit_byte(0xDB); 3753 emit_byte(0xE3); 3754 } 3755 3756 void Assembler::fist_s(Address adr) { 3757 InstructionMark im(this); 3758 emit_byte(0xDB); 3759 emit_operand32(rdx, adr); 3760 } 3761 3762 void Assembler::fistp_d(Address adr) { 3763 InstructionMark im(this); 3764 emit_byte(0xDF); 3765 emit_operand32(rdi, adr); 3766 } 3767 3768 void Assembler::fistp_s(Address adr) { 3769 InstructionMark im(this); 3770 emit_byte(0xDB); 3771 emit_operand32(rbx, adr); 3772 } 3773 3774 void Assembler::fld1() { 3775 emit_byte(0xD9); 3776 emit_byte(0xE8); 3777 } 3778 3779 void Assembler::fld_d(Address adr) { 3780 InstructionMark im(this); 3781 emit_byte(0xDD); 3782 emit_operand32(rax, adr); 3783 } 3784 3785 void Assembler::fld_s(Address adr) { 3786 InstructionMark im(this); 3787 emit_byte(0xD9); 3788 emit_operand32(rax, adr); 3789 } 3790 3791 3792 void Assembler::fld_s(int index) { 3793 emit_farith(0xD9, 0xC0, index); 3794 } 3795 3796 void Assembler::fld_x(Address adr) { 3797 InstructionMark im(this); 3798 emit_byte(0xDB); 3799 emit_operand32(rbp, adr); 3800 } 3801 3802 void Assembler::fldcw(Address src) { 3803 InstructionMark im(this); 3804 emit_byte(0xd9); 3805 emit_operand32(rbp, src); 3806 } 3807 3808 void Assembler::fldenv(Address src) { 3809 InstructionMark im(this); 3810 emit_byte(0xD9); 3811 emit_operand32(rsp, src); 3812 } 3813 3814 void Assembler::fldlg2() { 3815 emit_byte(0xD9); 3816 emit_byte(0xEC); 3817 } 3818 3819 void Assembler::fldln2() { 3820 emit_byte(0xD9); 3821 emit_byte(0xED); 3822 } 3823 3824 void Assembler::fldz() { 3825 emit_byte(0xD9); 3826 emit_byte(0xEE); 3827 } 3828 3829 void Assembler::flog() { 3830 fldln2(); 3831 fxch(); 3832 fyl2x(); 3833 } 3834 3835 void Assembler::flog10() { 3836 fldlg2(); 3837 fxch(); 3838 fyl2x(); 3839 } 3840 3841 void Assembler::fmul(int i) { 3842 emit_farith(0xD8, 0xC8, i); 3843 } 3844 3845 void Assembler::fmul_d(Address src) { 3846 InstructionMark im(this); 3847 emit_byte(0xDC); 3848 emit_operand32(rcx, src); 3849 } 3850 3851 void Assembler::fmul_s(Address src) { 3852 InstructionMark im(this); 3853 emit_byte(0xD8); 3854 emit_operand32(rcx, src); 3855 } 3856 3857 void Assembler::fmula(int i) { 3858 emit_farith(0xDC, 0xC8, i); 3859 } 3860 3861 void Assembler::fmulp(int i) { 3862 emit_farith(0xDE, 0xC8, i); 3863 } 3864 3865 void Assembler::fnsave(Address dst) { 3866 InstructionMark im(this); 3867 emit_byte(0xDD); 3868 emit_operand32(rsi, dst); 3869 } 3870 3871 void Assembler::fnstcw(Address src) { 3872 InstructionMark im(this); 3873 emit_byte(0x9B); 3874 emit_byte(0xD9); 3875 emit_operand32(rdi, src); 3876 } 3877 3878 void Assembler::fnstsw_ax() { 3879 emit_byte(0xdF); 3880 emit_byte(0xE0); 3881 } 3882 3883 void Assembler::fprem() { 3884 emit_byte(0xD9); 3885 emit_byte(0xF8); 3886 } 3887 3888 void Assembler::fprem1() { 3889 emit_byte(0xD9); 3890 emit_byte(0xF5); 3891 } 3892 3893 void Assembler::frstor(Address src) { 3894 InstructionMark im(this); 3895 emit_byte(0xDD); 3896 emit_operand32(rsp, src); 3897 } 3898 3899 void Assembler::fsin() { 3900 emit_byte(0xD9); 3901 emit_byte(0xFE); 3902 } 3903 3904 void Assembler::fsqrt() { 3905 emit_byte(0xD9); 3906 emit_byte(0xFA); 3907 } 3908 3909 void Assembler::fst_d(Address adr) { 3910 InstructionMark im(this); 3911 emit_byte(0xDD); 3912 emit_operand32(rdx, adr); 3913 } 3914 3915 void Assembler::fst_s(Address adr) { 3916 InstructionMark im(this); 3917 emit_byte(0xD9); 3918 emit_operand32(rdx, adr); 3919 } 3920 3921 void Assembler::fstp_d(Address adr) { 3922 InstructionMark im(this); 3923 emit_byte(0xDD); 3924 emit_operand32(rbx, adr); 3925 } 3926 3927 void Assembler::fstp_d(int index) { 3928 emit_farith(0xDD, 0xD8, index); 3929 } 3930 3931 void Assembler::fstp_s(Address adr) { 3932 InstructionMark im(this); 3933 emit_byte(0xD9); 3934 emit_operand32(rbx, adr); 3935 } 3936 3937 void Assembler::fstp_x(Address adr) { 3938 InstructionMark im(this); 3939 emit_byte(0xDB); 3940 emit_operand32(rdi, adr); 3941 } 3942 3943 void Assembler::fsub(int i) { 3944 emit_farith(0xD8, 0xE0, i); 3945 } 3946 3947 void Assembler::fsub_d(Address src) { 3948 InstructionMark im(this); 3949 emit_byte(0xDC); 3950 emit_operand32(rsp, src); 3951 } 3952 3953 void Assembler::fsub_s(Address src) { 3954 InstructionMark im(this); 3955 emit_byte(0xD8); 3956 emit_operand32(rsp, src); 3957 } 3958 3959 void Assembler::fsuba(int i) { 3960 emit_farith(0xDC, 0xE8, i); 3961 } 3962 3963 void Assembler::fsubp(int i) { 3964 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) 3965 } 3966 3967 void Assembler::fsubr(int i) { 3968 emit_farith(0xD8, 0xE8, i); 3969 } 3970 3971 void Assembler::fsubr_d(Address src) { 3972 InstructionMark im(this); 3973 emit_byte(0xDC); 3974 emit_operand32(rbp, src); 3975 } 3976 3977 void Assembler::fsubr_s(Address src) { 3978 InstructionMark im(this); 3979 emit_byte(0xD8); 3980 emit_operand32(rbp, src); 3981 } 3982 3983 void Assembler::fsubra(int i) { 3984 emit_farith(0xDC, 0xE0, i); 3985 } 3986 3987 void Assembler::fsubrp(int i) { 3988 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) 3989 } 3990 3991 void Assembler::ftan() { 3992 emit_byte(0xD9); 3993 emit_byte(0xF2); 3994 emit_byte(0xDD); 3995 emit_byte(0xD8); 3996 } 3997 3998 void Assembler::ftst() { 3999 emit_byte(0xD9); 4000 emit_byte(0xE4); 4001 } 4002 4003 void Assembler::fucomi(int i) { 4004 // make sure the instruction is supported (introduced for P6, together with cmov) 4005 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 4006 emit_farith(0xDB, 0xE8, i); 4007 } 4008 4009 void Assembler::fucomip(int i) { 4010 // make sure the instruction is supported (introduced for P6, together with cmov) 4011 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 4012 emit_farith(0xDF, 0xE8, i); 4013 } 4014 4015 void Assembler::fwait() { 4016 emit_byte(0x9B); 4017 } 4018 4019 void Assembler::fxch(int i) { 4020 emit_farith(0xD9, 0xC8, i); 4021 } 4022 4023 void Assembler::fyl2x() { 4024 emit_byte(0xD9); 4025 emit_byte(0xF1); 4026 } 4027 4028 void Assembler::frndint() { 4029 emit_byte(0xD9); 4030 emit_byte(0xFC); 4031 } 4032 4033 void Assembler::f2xm1() { 4034 emit_byte(0xD9); 4035 emit_byte(0xF0); 4036 } 4037 4038 void Assembler::fldl2e() { 4039 emit_byte(0xD9); 4040 emit_byte(0xEA); 4041 } 4042 4043 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. 4044 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; 4045 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding. 4046 static int simd_opc[4] = { 0, 0, 0x38, 0x3A }; 4047 4048 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding. 4049 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 4050 if (pre > 0) { 4051 emit_byte(simd_pre[pre]); 4052 } 4053 if (rex_w) { 4054 prefixq(adr, xreg); 4055 } else { 4056 prefix(adr, xreg); 4057 } 4058 if (opc > 0) { 4059 emit_byte(0x0F); 4060 int opc2 = simd_opc[opc]; 4061 if (opc2 > 0) { 4062 emit_byte(opc2); 4063 } 4064 } 4065 } 4066 4067 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 4068 if (pre > 0) { 4069 emit_byte(simd_pre[pre]); 4070 } 4071 int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : 4072 prefix_and_encode(dst_enc, src_enc); 4073 if (opc > 0) { 4074 emit_byte(0x0F); 4075 int opc2 = simd_opc[opc]; 4076 if (opc2 > 0) { 4077 emit_byte(opc2); 4078 } 4079 } 4080 return encode; 4081 } 4082 4083 4084 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) { 4085 if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { 4086 prefix(VEX_3bytes); 4087 4088 int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0); 4089 byte1 = (~byte1) & 0xE0; 4090 byte1 |= opc; 4091 a_byte(byte1); 4092 4093 int byte2 = ((~nds_enc) & 0xf) << 3; 4094 byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre; 4095 emit_byte(byte2); 4096 } else { 4097 prefix(VEX_2bytes); 4098 4099 int byte1 = vex_r ? VEX_R : 0; 4100 byte1 = (~byte1) & 0x80; 4101 byte1 |= ((~nds_enc) & 0xf) << 3; 4102 byte1 |= (vector256 ? 4 : 0) | pre; 4103 emit_byte(byte1); 4104 } 4105 } 4106 4107 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){ 4108 bool vex_r = (xreg_enc >= 8); 4109 bool vex_b = adr.base_needs_rex(); 4110 bool vex_x = adr.index_needs_rex(); 4111 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 4112 } 4113 4114 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) { 4115 bool vex_r = (dst_enc >= 8); 4116 bool vex_b = (src_enc >= 8); 4117 bool vex_x = false; 4118 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 4119 return (((dst_enc & 7) << 3) | (src_enc & 7)); 4120 } 4121 4122 4123 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 4124 if (UseAVX > 0) { 4125 int xreg_enc = xreg->encoding(); 4126 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 4127 vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256); 4128 } else { 4129 assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); 4130 rex_prefix(adr, xreg, pre, opc, rex_w); 4131 } 4132 } 4133 4134 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 4135 int dst_enc = dst->encoding(); 4136 int src_enc = src->encoding(); 4137 if (UseAVX > 0) { 4138 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 4139 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256); 4140 } else { 4141 assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); 4142 return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); 4143 } 4144 } 4145 4146 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) { 4147 InstructionMark im(this); 4148 simd_prefix(dst, dst, src, pre); 4149 emit_byte(opcode); 4150 emit_operand(dst, src); 4151 } 4152 4153 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) { 4154 int encode = simd_prefix_and_encode(dst, dst, src, pre); 4155 emit_byte(opcode); 4156 emit_byte(0xC0 | encode); 4157 } 4158 4159 // Versions with no second source register (non-destructive source). 4160 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) { 4161 InstructionMark im(this); 4162 simd_prefix(dst, xnoreg, src, pre); 4163 emit_byte(opcode); 4164 emit_operand(dst, src); 4165 } 4166 4167 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) { 4168 int encode = simd_prefix_and_encode(dst, xnoreg, src, pre); 4169 emit_byte(opcode); 4170 emit_byte(0xC0 | encode); 4171 } 4172 4173 // 3-operands AVX instructions 4174 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, 4175 Address src, VexSimdPrefix pre, bool vector256) { 4176 InstructionMark im(this); 4177 vex_prefix(dst, nds, src, pre, vector256); 4178 emit_byte(opcode); 4179 emit_operand(dst, src); 4180 } 4181 4182 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, 4183 XMMRegister src, VexSimdPrefix pre, bool vector256) { 4184 int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256); 4185 emit_byte(opcode); 4186 emit_byte(0xC0 | encode); 4187 } 4188 4189 #ifndef _LP64 4190 4191 void Assembler::incl(Register dst) { 4192 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4193 emit_byte(0x40 | dst->encoding()); 4194 } 4195 4196 void Assembler::lea(Register dst, Address src) { 4197 leal(dst, src); 4198 } 4199 4200 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4201 InstructionMark im(this); 4202 emit_byte(0xC7); 4203 emit_operand(rax, dst); 4204 emit_data((int)imm32, rspec, 0); 4205 } 4206 4207 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4208 InstructionMark im(this); 4209 int encode = prefix_and_encode(dst->encoding()); 4210 emit_byte(0xB8 | encode); 4211 emit_data((int)imm32, rspec, 0); 4212 } 4213 4214 void Assembler::popa() { // 32bit 4215 emit_byte(0x61); 4216 } 4217 4218 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { 4219 InstructionMark im(this); 4220 emit_byte(0x68); 4221 emit_data(imm32, rspec, 0); 4222 } 4223 4224 void Assembler::pusha() { // 32bit 4225 emit_byte(0x60); 4226 } 4227 4228 void Assembler::set_byte_if_not_zero(Register dst) { 4229 emit_byte(0x0F); 4230 emit_byte(0x95); 4231 emit_byte(0xE0 | dst->encoding()); 4232 } 4233 4234 void Assembler::shldl(Register dst, Register src) { 4235 emit_byte(0x0F); 4236 emit_byte(0xA5); 4237 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 4238 } 4239 4240 void Assembler::shrdl(Register dst, Register src) { 4241 emit_byte(0x0F); 4242 emit_byte(0xAD); 4243 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 4244 } 4245 4246 #else // LP64 4247 4248 void Assembler::set_byte_if_not_zero(Register dst) { 4249 int enc = prefix_and_encode(dst->encoding(), true); 4250 emit_byte(0x0F); 4251 emit_byte(0x95); 4252 emit_byte(0xE0 | enc); 4253 } 4254 4255 // 64bit only pieces of the assembler 4256 // This should only be used by 64bit instructions that can use rip-relative 4257 // it cannot be used by instructions that want an immediate value. 4258 4259 bool Assembler::reachable(AddressLiteral adr) { 4260 int64_t disp; 4261 // None will force a 64bit literal to the code stream. Likely a placeholder 4262 // for something that will be patched later and we need to certain it will 4263 // always be reachable. 4264 if (adr.reloc() == relocInfo::none) { 4265 return false; 4266 } 4267 if (adr.reloc() == relocInfo::internal_word_type) { 4268 // This should be rip relative and easily reachable. 4269 return true; 4270 } 4271 if (adr.reloc() == relocInfo::virtual_call_type || 4272 adr.reloc() == relocInfo::opt_virtual_call_type || 4273 adr.reloc() == relocInfo::static_call_type || 4274 adr.reloc() == relocInfo::static_stub_type ) { 4275 // This should be rip relative within the code cache and easily 4276 // reachable until we get huge code caches. (At which point 4277 // ic code is going to have issues). 4278 return true; 4279 } 4280 if (adr.reloc() != relocInfo::external_word_type && 4281 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special 4282 adr.reloc() != relocInfo::poll_type && // relocs to identify them 4283 adr.reloc() != relocInfo::runtime_call_type ) { 4284 return false; 4285 } 4286 4287 // Stress the correction code 4288 if (ForceUnreachable) { 4289 // Must be runtimecall reloc, see if it is in the codecache 4290 // Flipping stuff in the codecache to be unreachable causes issues 4291 // with things like inline caches where the additional instructions 4292 // are not handled. 4293 if (CodeCache::find_blob(adr._target) == NULL) { 4294 return false; 4295 } 4296 } 4297 // For external_word_type/runtime_call_type if it is reachable from where we 4298 // are now (possibly a temp buffer) and where we might end up 4299 // anywhere in the codeCache then we are always reachable. 4300 // This would have to change if we ever save/restore shared code 4301 // to be more pessimistic. 4302 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int)); 4303 if (!is_simm32(disp)) return false; 4304 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int)); 4305 if (!is_simm32(disp)) return false; 4306 4307 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int)); 4308 4309 // Because rip relative is a disp + address_of_next_instruction and we 4310 // don't know the value of address_of_next_instruction we apply a fudge factor 4311 // to make sure we will be ok no matter the size of the instruction we get placed into. 4312 // We don't have to fudge the checks above here because they are already worst case. 4313 4314 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal 4315 // + 4 because better safe than sorry. 4316 const int fudge = 12 + 4; 4317 if (disp < 0) { 4318 disp -= fudge; 4319 } else { 4320 disp += fudge; 4321 } 4322 return is_simm32(disp); 4323 } 4324 4325 // Check if the polling page is not reachable from the code cache using rip-relative 4326 // addressing. 4327 bool Assembler::is_polling_page_far() { 4328 intptr_t addr = (intptr_t)os::get_polling_page(); 4329 return ForceUnreachable || 4330 !is_simm32(addr - (intptr_t)CodeCache::low_bound()) || 4331 !is_simm32(addr - (intptr_t)CodeCache::high_bound()); 4332 } 4333 4334 void Assembler::emit_data64(jlong data, 4335 relocInfo::relocType rtype, 4336 int format) { 4337 if (rtype == relocInfo::none) { 4338 emit_long64(data); 4339 } else { 4340 emit_data64(data, Relocation::spec_simple(rtype), format); 4341 } 4342 } 4343 4344 void Assembler::emit_data64(jlong data, 4345 RelocationHolder const& rspec, 4346 int format) { 4347 assert(imm_operand == 0, "default format must be immediate in this file"); 4348 assert(imm_operand == format, "must be immediate"); 4349 assert(inst_mark() != NULL, "must be inside InstructionMark"); 4350 // Do not use AbstractAssembler::relocate, which is not intended for 4351 // embedded words. Instead, relocate to the enclosing instruction. 4352 code_section()->relocate(inst_mark(), rspec, format); 4353 #ifdef ASSERT 4354 check_relocation(rspec, format); 4355 #endif 4356 emit_long64(data); 4357 } 4358 4359 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { 4360 if (reg_enc >= 8) { 4361 prefix(REX_B); 4362 reg_enc -= 8; 4363 } else if (byteinst && reg_enc >= 4) { 4364 prefix(REX); 4365 } 4366 return reg_enc; 4367 } 4368 4369 int Assembler::prefixq_and_encode(int reg_enc) { 4370 if (reg_enc < 8) { 4371 prefix(REX_W); 4372 } else { 4373 prefix(REX_WB); 4374 reg_enc -= 8; 4375 } 4376 return reg_enc; 4377 } 4378 4379 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { 4380 if (dst_enc < 8) { 4381 if (src_enc >= 8) { 4382 prefix(REX_B); 4383 src_enc -= 8; 4384 } else if (byteinst && src_enc >= 4) { 4385 prefix(REX); 4386 } 4387 } else { 4388 if (src_enc < 8) { 4389 prefix(REX_R); 4390 } else { 4391 prefix(REX_RB); 4392 src_enc -= 8; 4393 } 4394 dst_enc -= 8; 4395 } 4396 return dst_enc << 3 | src_enc; 4397 } 4398 4399 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { 4400 if (dst_enc < 8) { 4401 if (src_enc < 8) { 4402 prefix(REX_W); 4403 } else { 4404 prefix(REX_WB); 4405 src_enc -= 8; 4406 } 4407 } else { 4408 if (src_enc < 8) { 4409 prefix(REX_WR); 4410 } else { 4411 prefix(REX_WRB); 4412 src_enc -= 8; 4413 } 4414 dst_enc -= 8; 4415 } 4416 return dst_enc << 3 | src_enc; 4417 } 4418 4419 void Assembler::prefix(Register reg) { 4420 if (reg->encoding() >= 8) { 4421 prefix(REX_B); 4422 } 4423 } 4424 4425 void Assembler::prefix(Address adr) { 4426 if (adr.base_needs_rex()) { 4427 if (adr.index_needs_rex()) { 4428 prefix(REX_XB); 4429 } else { 4430 prefix(REX_B); 4431 } 4432 } else { 4433 if (adr.index_needs_rex()) { 4434 prefix(REX_X); 4435 } 4436 } 4437 } 4438 4439 void Assembler::prefixq(Address adr) { 4440 if (adr.base_needs_rex()) { 4441 if (adr.index_needs_rex()) { 4442 prefix(REX_WXB); 4443 } else { 4444 prefix(REX_WB); 4445 } 4446 } else { 4447 if (adr.index_needs_rex()) { 4448 prefix(REX_WX); 4449 } else { 4450 prefix(REX_W); 4451 } 4452 } 4453 } 4454 4455 4456 void Assembler::prefix(Address adr, Register reg, bool byteinst) { 4457 if (reg->encoding() < 8) { 4458 if (adr.base_needs_rex()) { 4459 if (adr.index_needs_rex()) { 4460 prefix(REX_XB); 4461 } else { 4462 prefix(REX_B); 4463 } 4464 } else { 4465 if (adr.index_needs_rex()) { 4466 prefix(REX_X); 4467 } else if (byteinst && reg->encoding() >= 4 ) { 4468 prefix(REX); 4469 } 4470 } 4471 } else { 4472 if (adr.base_needs_rex()) { 4473 if (adr.index_needs_rex()) { 4474 prefix(REX_RXB); 4475 } else { 4476 prefix(REX_RB); 4477 } 4478 } else { 4479 if (adr.index_needs_rex()) { 4480 prefix(REX_RX); 4481 } else { 4482 prefix(REX_R); 4483 } 4484 } 4485 } 4486 } 4487 4488 void Assembler::prefixq(Address adr, Register src) { 4489 if (src->encoding() < 8) { 4490 if (adr.base_needs_rex()) { 4491 if (adr.index_needs_rex()) { 4492 prefix(REX_WXB); 4493 } else { 4494 prefix(REX_WB); 4495 } 4496 } else { 4497 if (adr.index_needs_rex()) { 4498 prefix(REX_WX); 4499 } else { 4500 prefix(REX_W); 4501 } 4502 } 4503 } else { 4504 if (adr.base_needs_rex()) { 4505 if (adr.index_needs_rex()) { 4506 prefix(REX_WRXB); 4507 } else { 4508 prefix(REX_WRB); 4509 } 4510 } else { 4511 if (adr.index_needs_rex()) { 4512 prefix(REX_WRX); 4513 } else { 4514 prefix(REX_WR); 4515 } 4516 } 4517 } 4518 } 4519 4520 void Assembler::prefix(Address adr, XMMRegister reg) { 4521 if (reg->encoding() < 8) { 4522 if (adr.base_needs_rex()) { 4523 if (adr.index_needs_rex()) { 4524 prefix(REX_XB); 4525 } else { 4526 prefix(REX_B); 4527 } 4528 } else { 4529 if (adr.index_needs_rex()) { 4530 prefix(REX_X); 4531 } 4532 } 4533 } else { 4534 if (adr.base_needs_rex()) { 4535 if (adr.index_needs_rex()) { 4536 prefix(REX_RXB); 4537 } else { 4538 prefix(REX_RB); 4539 } 4540 } else { 4541 if (adr.index_needs_rex()) { 4542 prefix(REX_RX); 4543 } else { 4544 prefix(REX_R); 4545 } 4546 } 4547 } 4548 } 4549 4550 void Assembler::prefixq(Address adr, XMMRegister src) { 4551 if (src->encoding() < 8) { 4552 if (adr.base_needs_rex()) { 4553 if (adr.index_needs_rex()) { 4554 prefix(REX_WXB); 4555 } else { 4556 prefix(REX_WB); 4557 } 4558 } else { 4559 if (adr.index_needs_rex()) { 4560 prefix(REX_WX); 4561 } else { 4562 prefix(REX_W); 4563 } 4564 } 4565 } else { 4566 if (adr.base_needs_rex()) { 4567 if (adr.index_needs_rex()) { 4568 prefix(REX_WRXB); 4569 } else { 4570 prefix(REX_WRB); 4571 } 4572 } else { 4573 if (adr.index_needs_rex()) { 4574 prefix(REX_WRX); 4575 } else { 4576 prefix(REX_WR); 4577 } 4578 } 4579 } 4580 } 4581 4582 void Assembler::adcq(Register dst, int32_t imm32) { 4583 (void) prefixq_and_encode(dst->encoding()); 4584 emit_arith(0x81, 0xD0, dst, imm32); 4585 } 4586 4587 void Assembler::adcq(Register dst, Address src) { 4588 InstructionMark im(this); 4589 prefixq(src, dst); 4590 emit_byte(0x13); 4591 emit_operand(dst, src); 4592 } 4593 4594 void Assembler::adcq(Register dst, Register src) { 4595 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 4596 emit_arith(0x13, 0xC0, dst, src); 4597 } 4598 4599 void Assembler::addq(Address dst, int32_t imm32) { 4600 InstructionMark im(this); 4601 prefixq(dst); 4602 emit_arith_operand(0x81, rax, dst,imm32); 4603 } 4604 4605 void Assembler::addq(Address dst, Register src) { 4606 InstructionMark im(this); 4607 prefixq(dst, src); 4608 emit_byte(0x01); 4609 emit_operand(src, dst); 4610 } 4611 4612 void Assembler::addq(Register dst, int32_t imm32) { 4613 (void) prefixq_and_encode(dst->encoding()); 4614 emit_arith(0x81, 0xC0, dst, imm32); 4615 } 4616 4617 void Assembler::addq(Register dst, Address src) { 4618 InstructionMark im(this); 4619 prefixq(src, dst); 4620 emit_byte(0x03); 4621 emit_operand(dst, src); 4622 } 4623 4624 void Assembler::addq(Register dst, Register src) { 4625 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4626 emit_arith(0x03, 0xC0, dst, src); 4627 } 4628 4629 void Assembler::andq(Address dst, int32_t imm32) { 4630 InstructionMark im(this); 4631 prefixq(dst); 4632 emit_byte(0x81); 4633 emit_operand(rsp, dst, 4); 4634 emit_long(imm32); 4635 } 4636 4637 void Assembler::andq(Register dst, int32_t imm32) { 4638 (void) prefixq_and_encode(dst->encoding()); 4639 emit_arith(0x81, 0xE0, dst, imm32); 4640 } 4641 4642 void Assembler::andq(Register dst, Address src) { 4643 InstructionMark im(this); 4644 prefixq(src, dst); 4645 emit_byte(0x23); 4646 emit_operand(dst, src); 4647 } 4648 4649 void Assembler::andq(Register dst, Register src) { 4650 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 4651 emit_arith(0x23, 0xC0, dst, src); 4652 } 4653 4654 void Assembler::bsfq(Register dst, Register src) { 4655 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4656 emit_byte(0x0F); 4657 emit_byte(0xBC); 4658 emit_byte(0xC0 | encode); 4659 } 4660 4661 void Assembler::bsrq(Register dst, Register src) { 4662 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 4663 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4664 emit_byte(0x0F); 4665 emit_byte(0xBD); 4666 emit_byte(0xC0 | encode); 4667 } 4668 4669 void Assembler::bswapq(Register reg) { 4670 int encode = prefixq_and_encode(reg->encoding()); 4671 emit_byte(0x0F); 4672 emit_byte(0xC8 | encode); 4673 } 4674 4675 void Assembler::cdqq() { 4676 prefix(REX_W); 4677 emit_byte(0x99); 4678 } 4679 4680 void Assembler::clflush(Address adr) { 4681 prefix(adr); 4682 emit_byte(0x0F); 4683 emit_byte(0xAE); 4684 emit_operand(rdi, adr); 4685 } 4686 4687 void Assembler::cmovq(Condition cc, Register dst, Register src) { 4688 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4689 emit_byte(0x0F); 4690 emit_byte(0x40 | cc); 4691 emit_byte(0xC0 | encode); 4692 } 4693 4694 void Assembler::cmovq(Condition cc, Register dst, Address src) { 4695 InstructionMark im(this); 4696 prefixq(src, dst); 4697 emit_byte(0x0F); 4698 emit_byte(0x40 | cc); 4699 emit_operand(dst, src); 4700 } 4701 4702 void Assembler::cmpq(Address dst, int32_t imm32) { 4703 InstructionMark im(this); 4704 prefixq(dst); 4705 emit_byte(0x81); 4706 emit_operand(rdi, dst, 4); 4707 emit_long(imm32); 4708 } 4709 4710 void Assembler::cmpq(Register dst, int32_t imm32) { 4711 (void) prefixq_and_encode(dst->encoding()); 4712 emit_arith(0x81, 0xF8, dst, imm32); 4713 } 4714 4715 void Assembler::cmpq(Address dst, Register src) { 4716 InstructionMark im(this); 4717 prefixq(dst, src); 4718 emit_byte(0x3B); 4719 emit_operand(src, dst); 4720 } 4721 4722 void Assembler::cmpq(Register dst, Register src) { 4723 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4724 emit_arith(0x3B, 0xC0, dst, src); 4725 } 4726 4727 void Assembler::cmpq(Register dst, Address src) { 4728 InstructionMark im(this); 4729 prefixq(src, dst); 4730 emit_byte(0x3B); 4731 emit_operand(dst, src); 4732 } 4733 4734 void Assembler::cmpxchgq(Register reg, Address adr) { 4735 InstructionMark im(this); 4736 prefixq(adr, reg); 4737 emit_byte(0x0F); 4738 emit_byte(0xB1); 4739 emit_operand(reg, adr); 4740 } 4741 4742 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { 4743 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4744 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2); 4745 emit_byte(0x2A); 4746 emit_byte(0xC0 | encode); 4747 } 4748 4749 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { 4750 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4751 InstructionMark im(this); 4752 simd_prefix_q(dst, dst, src, VEX_SIMD_F2); 4753 emit_byte(0x2A); 4754 emit_operand(dst, src); 4755 } 4756 4757 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { 4758 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4759 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3); 4760 emit_byte(0x2A); 4761 emit_byte(0xC0 | encode); 4762 } 4763 4764 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { 4765 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4766 InstructionMark im(this); 4767 simd_prefix_q(dst, dst, src, VEX_SIMD_F3); 4768 emit_byte(0x2A); 4769 emit_operand(dst, src); 4770 } 4771 4772 void Assembler::cvttsd2siq(Register dst, XMMRegister src) { 4773 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4774 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2); 4775 emit_byte(0x2C); 4776 emit_byte(0xC0 | encode); 4777 } 4778 4779 void Assembler::cvttss2siq(Register dst, XMMRegister src) { 4780 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4781 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3); 4782 emit_byte(0x2C); 4783 emit_byte(0xC0 | encode); 4784 } 4785 4786 void Assembler::decl(Register dst) { 4787 // Don't use it directly. Use MacroAssembler::decrementl() instead. 4788 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 4789 int encode = prefix_and_encode(dst->encoding()); 4790 emit_byte(0xFF); 4791 emit_byte(0xC8 | encode); 4792 } 4793 4794 void Assembler::decq(Register dst) { 4795 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4796 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4797 int encode = prefixq_and_encode(dst->encoding()); 4798 emit_byte(0xFF); 4799 emit_byte(0xC8 | encode); 4800 } 4801 4802 void Assembler::decq(Address dst) { 4803 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4804 InstructionMark im(this); 4805 prefixq(dst); 4806 emit_byte(0xFF); 4807 emit_operand(rcx, dst); 4808 } 4809 4810 void Assembler::fxrstor(Address src) { 4811 prefixq(src); 4812 emit_byte(0x0F); 4813 emit_byte(0xAE); 4814 emit_operand(as_Register(1), src); 4815 } 4816 4817 void Assembler::fxsave(Address dst) { 4818 prefixq(dst); 4819 emit_byte(0x0F); 4820 emit_byte(0xAE); 4821 emit_operand(as_Register(0), dst); 4822 } 4823 4824 void Assembler::idivq(Register src) { 4825 int encode = prefixq_and_encode(src->encoding()); 4826 emit_byte(0xF7); 4827 emit_byte(0xF8 | encode); 4828 } 4829 4830 void Assembler::imulq(Register dst, Register src) { 4831 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4832 emit_byte(0x0F); 4833 emit_byte(0xAF); 4834 emit_byte(0xC0 | encode); 4835 } 4836 4837 void Assembler::imulq(Register dst, Register src, int value) { 4838 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4839 if (is8bit(value)) { 4840 emit_byte(0x6B); 4841 emit_byte(0xC0 | encode); 4842 emit_byte(value & 0xFF); 4843 } else { 4844 emit_byte(0x69); 4845 emit_byte(0xC0 | encode); 4846 emit_long(value); 4847 } 4848 } 4849 4850 void Assembler::incl(Register dst) { 4851 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4852 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4853 int encode = prefix_and_encode(dst->encoding()); 4854 emit_byte(0xFF); 4855 emit_byte(0xC0 | encode); 4856 } 4857 4858 void Assembler::incq(Register dst) { 4859 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4860 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4861 int encode = prefixq_and_encode(dst->encoding()); 4862 emit_byte(0xFF); 4863 emit_byte(0xC0 | encode); 4864 } 4865 4866 void Assembler::incq(Address dst) { 4867 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4868 InstructionMark im(this); 4869 prefixq(dst); 4870 emit_byte(0xFF); 4871 emit_operand(rax, dst); 4872 } 4873 4874 void Assembler::lea(Register dst, Address src) { 4875 leaq(dst, src); 4876 } 4877 4878 void Assembler::leaq(Register dst, Address src) { 4879 InstructionMark im(this); 4880 prefixq(src, dst); 4881 emit_byte(0x8D); 4882 emit_operand(dst, src); 4883 } 4884 4885 void Assembler::mov64(Register dst, int64_t imm64) { 4886 InstructionMark im(this); 4887 int encode = prefixq_and_encode(dst->encoding()); 4888 emit_byte(0xB8 | encode); 4889 emit_long64(imm64); 4890 } 4891 4892 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { 4893 InstructionMark im(this); 4894 int encode = prefixq_and_encode(dst->encoding()); 4895 emit_byte(0xB8 | encode); 4896 emit_data64(imm64, rspec); 4897 } 4898 4899 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4900 InstructionMark im(this); 4901 int encode = prefix_and_encode(dst->encoding()); 4902 emit_byte(0xB8 | encode); 4903 emit_data((int)imm32, rspec, narrow_oop_operand); 4904 } 4905 4906 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4907 InstructionMark im(this); 4908 prefix(dst); 4909 emit_byte(0xC7); 4910 emit_operand(rax, dst, 4); 4911 emit_data((int)imm32, rspec, narrow_oop_operand); 4912 } 4913 4914 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) { 4915 InstructionMark im(this); 4916 int encode = prefix_and_encode(src1->encoding()); 4917 emit_byte(0x81); 4918 emit_byte(0xF8 | encode); 4919 emit_data((int)imm32, rspec, narrow_oop_operand); 4920 } 4921 4922 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) { 4923 InstructionMark im(this); 4924 prefix(src1); 4925 emit_byte(0x81); 4926 emit_operand(rax, src1, 4); 4927 emit_data((int)imm32, rspec, narrow_oop_operand); 4928 } 4929 4930 void Assembler::lzcntq(Register dst, Register src) { 4931 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 4932 emit_byte(0xF3); 4933 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4934 emit_byte(0x0F); 4935 emit_byte(0xBD); 4936 emit_byte(0xC0 | encode); 4937 } 4938 4939 void Assembler::movdq(XMMRegister dst, Register src) { 4940 // table D-1 says MMX/SSE2 4941 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4942 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66); 4943 emit_byte(0x6E); 4944 emit_byte(0xC0 | encode); 4945 } 4946 4947 void Assembler::movdq(Register dst, XMMRegister src) { 4948 // table D-1 says MMX/SSE2 4949 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4950 // swap src/dst to get correct prefix 4951 int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66); 4952 emit_byte(0x7E); 4953 emit_byte(0xC0 | encode); 4954 } 4955 4956 void Assembler::movq(Register dst, Register src) { 4957 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4958 emit_byte(0x8B); 4959 emit_byte(0xC0 | encode); 4960 } 4961 4962 void Assembler::movq(Register dst, Address src) { 4963 InstructionMark im(this); 4964 prefixq(src, dst); 4965 emit_byte(0x8B); 4966 emit_operand(dst, src); 4967 } 4968 4969 void Assembler::movq(Address dst, Register src) { 4970 InstructionMark im(this); 4971 prefixq(dst, src); 4972 emit_byte(0x89); 4973 emit_operand(src, dst); 4974 } 4975 4976 void Assembler::movsbq(Register dst, Address src) { 4977 InstructionMark im(this); 4978 prefixq(src, dst); 4979 emit_byte(0x0F); 4980 emit_byte(0xBE); 4981 emit_operand(dst, src); 4982 } 4983 4984 void Assembler::movsbq(Register dst, Register src) { 4985 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4986 emit_byte(0x0F); 4987 emit_byte(0xBE); 4988 emit_byte(0xC0 | encode); 4989 } 4990 4991 void Assembler::movslq(Register dst, int32_t imm32) { 4992 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx) 4993 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx) 4994 // as a result we shouldn't use until tested at runtime... 4995 ShouldNotReachHere(); 4996 InstructionMark im(this); 4997 int encode = prefixq_and_encode(dst->encoding()); 4998 emit_byte(0xC7 | encode); 4999 emit_long(imm32); 5000 } 5001 5002 void Assembler::movslq(Address dst, int32_t imm32) { 5003 assert(is_simm32(imm32), "lost bits"); 5004 InstructionMark im(this); 5005 prefixq(dst); 5006 emit_byte(0xC7); 5007 emit_operand(rax, dst, 4); 5008 emit_long(imm32); 5009 } 5010 5011 void Assembler::movslq(Register dst, Address src) { 5012 InstructionMark im(this); 5013 prefixq(src, dst); 5014 emit_byte(0x63); 5015 emit_operand(dst, src); 5016 } 5017 5018 void Assembler::movslq(Register dst, Register src) { 5019 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5020 emit_byte(0x63); 5021 emit_byte(0xC0 | encode); 5022 } 5023 5024 void Assembler::movswq(Register dst, Address src) { 5025 InstructionMark im(this); 5026 prefixq(src, dst); 5027 emit_byte(0x0F); 5028 emit_byte(0xBF); 5029 emit_operand(dst, src); 5030 } 5031 5032 void Assembler::movswq(Register dst, Register src) { 5033 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5034 emit_byte(0x0F); 5035 emit_byte(0xBF); 5036 emit_byte(0xC0 | encode); 5037 } 5038 5039 void Assembler::movzbq(Register dst, Address src) { 5040 InstructionMark im(this); 5041 prefixq(src, dst); 5042 emit_byte(0x0F); 5043 emit_byte(0xB6); 5044 emit_operand(dst, src); 5045 } 5046 5047 void Assembler::movzbq(Register dst, Register src) { 5048 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5049 emit_byte(0x0F); 5050 emit_byte(0xB6); 5051 emit_byte(0xC0 | encode); 5052 } 5053 5054 void Assembler::movzwq(Register dst, Address src) { 5055 InstructionMark im(this); 5056 prefixq(src, dst); 5057 emit_byte(0x0F); 5058 emit_byte(0xB7); 5059 emit_operand(dst, src); 5060 } 5061 5062 void Assembler::movzwq(Register dst, Register src) { 5063 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5064 emit_byte(0x0F); 5065 emit_byte(0xB7); 5066 emit_byte(0xC0 | encode); 5067 } 5068 5069 void Assembler::negq(Register dst) { 5070 int encode = prefixq_and_encode(dst->encoding()); 5071 emit_byte(0xF7); 5072 emit_byte(0xD8 | encode); 5073 } 5074 5075 void Assembler::notq(Register dst) { 5076 int encode = prefixq_and_encode(dst->encoding()); 5077 emit_byte(0xF7); 5078 emit_byte(0xD0 | encode); 5079 } 5080 5081 void Assembler::orq(Address dst, int32_t imm32) { 5082 InstructionMark im(this); 5083 prefixq(dst); 5084 emit_byte(0x81); 5085 emit_operand(rcx, dst, 4); 5086 emit_long(imm32); 5087 } 5088 5089 void Assembler::orq(Register dst, int32_t imm32) { 5090 (void) prefixq_and_encode(dst->encoding()); 5091 emit_arith(0x81, 0xC8, dst, imm32); 5092 } 5093 5094 void Assembler::orq(Register dst, Address src) { 5095 InstructionMark im(this); 5096 prefixq(src, dst); 5097 emit_byte(0x0B); 5098 emit_operand(dst, src); 5099 } 5100 5101 void Assembler::orq(Register dst, Register src) { 5102 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5103 emit_arith(0x0B, 0xC0, dst, src); 5104 } 5105 5106 void Assembler::popa() { // 64bit 5107 movq(r15, Address(rsp, 0)); 5108 movq(r14, Address(rsp, wordSize)); 5109 movq(r13, Address(rsp, 2 * wordSize)); 5110 movq(r12, Address(rsp, 3 * wordSize)); 5111 movq(r11, Address(rsp, 4 * wordSize)); 5112 movq(r10, Address(rsp, 5 * wordSize)); 5113 movq(r9, Address(rsp, 6 * wordSize)); 5114 movq(r8, Address(rsp, 7 * wordSize)); 5115 movq(rdi, Address(rsp, 8 * wordSize)); 5116 movq(rsi, Address(rsp, 9 * wordSize)); 5117 movq(rbp, Address(rsp, 10 * wordSize)); 5118 // skip rsp 5119 movq(rbx, Address(rsp, 12 * wordSize)); 5120 movq(rdx, Address(rsp, 13 * wordSize)); 5121 movq(rcx, Address(rsp, 14 * wordSize)); 5122 movq(rax, Address(rsp, 15 * wordSize)); 5123 5124 addq(rsp, 16 * wordSize); 5125 } 5126 5127 void Assembler::popcntq(Register dst, Address src) { 5128 assert(VM_Version::supports_popcnt(), "must support"); 5129 InstructionMark im(this); 5130 emit_byte(0xF3); 5131 prefixq(src, dst); 5132 emit_byte(0x0F); 5133 emit_byte(0xB8); 5134 emit_operand(dst, src); 5135 } 5136 5137 void Assembler::popcntq(Register dst, Register src) { 5138 assert(VM_Version::supports_popcnt(), "must support"); 5139 emit_byte(0xF3); 5140 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5141 emit_byte(0x0F); 5142 emit_byte(0xB8); 5143 emit_byte(0xC0 | encode); 5144 } 5145 5146 void Assembler::popq(Address dst) { 5147 InstructionMark im(this); 5148 prefixq(dst); 5149 emit_byte(0x8F); 5150 emit_operand(rax, dst); 5151 } 5152 5153 void Assembler::pusha() { // 64bit 5154 // we have to store original rsp. ABI says that 128 bytes 5155 // below rsp are local scratch. 5156 movq(Address(rsp, -5 * wordSize), rsp); 5157 5158 subq(rsp, 16 * wordSize); 5159 5160 movq(Address(rsp, 15 * wordSize), rax); 5161 movq(Address(rsp, 14 * wordSize), rcx); 5162 movq(Address(rsp, 13 * wordSize), rdx); 5163 movq(Address(rsp, 12 * wordSize), rbx); 5164 // skip rsp 5165 movq(Address(rsp, 10 * wordSize), rbp); 5166 movq(Address(rsp, 9 * wordSize), rsi); 5167 movq(Address(rsp, 8 * wordSize), rdi); 5168 movq(Address(rsp, 7 * wordSize), r8); 5169 movq(Address(rsp, 6 * wordSize), r9); 5170 movq(Address(rsp, 5 * wordSize), r10); 5171 movq(Address(rsp, 4 * wordSize), r11); 5172 movq(Address(rsp, 3 * wordSize), r12); 5173 movq(Address(rsp, 2 * wordSize), r13); 5174 movq(Address(rsp, wordSize), r14); 5175 movq(Address(rsp, 0), r15); 5176 } 5177 5178 void Assembler::pushq(Address src) { 5179 InstructionMark im(this); 5180 prefixq(src); 5181 emit_byte(0xFF); 5182 emit_operand(rsi, src); 5183 } 5184 5185 void Assembler::rclq(Register dst, int imm8) { 5186 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5187 int encode = prefixq_and_encode(dst->encoding()); 5188 if (imm8 == 1) { 5189 emit_byte(0xD1); 5190 emit_byte(0xD0 | encode); 5191 } else { 5192 emit_byte(0xC1); 5193 emit_byte(0xD0 | encode); 5194 emit_byte(imm8); 5195 } 5196 } 5197 void Assembler::sarq(Register dst, int imm8) { 5198 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5199 int encode = prefixq_and_encode(dst->encoding()); 5200 if (imm8 == 1) { 5201 emit_byte(0xD1); 5202 emit_byte(0xF8 | encode); 5203 } else { 5204 emit_byte(0xC1); 5205 emit_byte(0xF8 | encode); 5206 emit_byte(imm8); 5207 } 5208 } 5209 5210 void Assembler::sarq(Register dst) { 5211 int encode = prefixq_and_encode(dst->encoding()); 5212 emit_byte(0xD3); 5213 emit_byte(0xF8 | encode); 5214 } 5215 5216 void Assembler::sbbq(Address dst, int32_t imm32) { 5217 InstructionMark im(this); 5218 prefixq(dst); 5219 emit_arith_operand(0x81, rbx, dst, imm32); 5220 } 5221 5222 void Assembler::sbbq(Register dst, int32_t imm32) { 5223 (void) prefixq_and_encode(dst->encoding()); 5224 emit_arith(0x81, 0xD8, dst, imm32); 5225 } 5226 5227 void Assembler::sbbq(Register dst, Address src) { 5228 InstructionMark im(this); 5229 prefixq(src, dst); 5230 emit_byte(0x1B); 5231 emit_operand(dst, src); 5232 } 5233 5234 void Assembler::sbbq(Register dst, Register src) { 5235 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5236 emit_arith(0x1B, 0xC0, dst, src); 5237 } 5238 5239 void Assembler::shlq(Register dst, int imm8) { 5240 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5241 int encode = prefixq_and_encode(dst->encoding()); 5242 if (imm8 == 1) { 5243 emit_byte(0xD1); 5244 emit_byte(0xE0 | encode); 5245 } else { 5246 emit_byte(0xC1); 5247 emit_byte(0xE0 | encode); 5248 emit_byte(imm8); 5249 } 5250 } 5251 5252 void Assembler::shlq(Register dst) { 5253 int encode = prefixq_and_encode(dst->encoding()); 5254 emit_byte(0xD3); 5255 emit_byte(0xE0 | encode); 5256 } 5257 5258 void Assembler::shrq(Register dst, int imm8) { 5259 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5260 int encode = prefixq_and_encode(dst->encoding()); 5261 emit_byte(0xC1); 5262 emit_byte(0xE8 | encode); 5263 emit_byte(imm8); 5264 } 5265 5266 void Assembler::shrq(Register dst) { 5267 int encode = prefixq_and_encode(dst->encoding()); 5268 emit_byte(0xD3); 5269 emit_byte(0xE8 | encode); 5270 } 5271 5272 void Assembler::subq(Address dst, int32_t imm32) { 5273 InstructionMark im(this); 5274 prefixq(dst); 5275 emit_arith_operand(0x81, rbp, dst, imm32); 5276 } 5277 5278 void Assembler::subq(Address dst, Register src) { 5279 InstructionMark im(this); 5280 prefixq(dst, src); 5281 emit_byte(0x29); 5282 emit_operand(src, dst); 5283 } 5284 5285 void Assembler::subq(Register dst, int32_t imm32) { 5286 (void) prefixq_and_encode(dst->encoding()); 5287 emit_arith(0x81, 0xE8, dst, imm32); 5288 } 5289 5290 // Force generation of a 4 byte immediate value even if it fits into 8bit 5291 void Assembler::subq_imm32(Register dst, int32_t imm32) { 5292 (void) prefixq_and_encode(dst->encoding()); 5293 emit_arith_imm32(0x81, 0xE8, dst, imm32); 5294 } 5295 5296 void Assembler::subq(Register dst, Address src) { 5297 InstructionMark im(this); 5298 prefixq(src, dst); 5299 emit_byte(0x2B); 5300 emit_operand(dst, src); 5301 } 5302 5303 void Assembler::subq(Register dst, Register src) { 5304 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5305 emit_arith(0x2B, 0xC0, dst, src); 5306 } 5307 5308 void Assembler::testq(Register dst, int32_t imm32) { 5309 // not using emit_arith because test 5310 // doesn't support sign-extension of 5311 // 8bit operands 5312 int encode = dst->encoding(); 5313 if (encode == 0) { 5314 prefix(REX_W); 5315 emit_byte(0xA9); 5316 } else { 5317 encode = prefixq_and_encode(encode); 5318 emit_byte(0xF7); 5319 emit_byte(0xC0 | encode); 5320 } 5321 emit_long(imm32); 5322 } 5323 5324 void Assembler::testq(Register dst, Register src) { 5325 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5326 emit_arith(0x85, 0xC0, dst, src); 5327 } 5328 5329 void Assembler::xaddq(Address dst, Register src) { 5330 InstructionMark im(this); 5331 prefixq(dst, src); 5332 emit_byte(0x0F); 5333 emit_byte(0xC1); 5334 emit_operand(src, dst); 5335 } 5336 5337 void Assembler::xchgq(Register dst, Address src) { 5338 InstructionMark im(this); 5339 prefixq(src, dst); 5340 emit_byte(0x87); 5341 emit_operand(dst, src); 5342 } 5343 5344 void Assembler::xchgq(Register dst, Register src) { 5345 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5346 emit_byte(0x87); 5347 emit_byte(0xc0 | encode); 5348 } 5349 5350 void Assembler::xorq(Register dst, Register src) { 5351 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5352 emit_arith(0x33, 0xC0, dst, src); 5353 } 5354 5355 void Assembler::xorq(Register dst, Address src) { 5356 InstructionMark im(this); 5357 prefixq(src, dst); 5358 emit_byte(0x33); 5359 emit_operand(dst, src); 5360 } 5361 5362 #endif // !LP64 5363 5364 static Assembler::Condition reverse[] = { 5365 Assembler::noOverflow /* overflow = 0x0 */ , 5366 Assembler::overflow /* noOverflow = 0x1 */ , 5367 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 5368 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 5369 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 5370 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 5371 Assembler::above /* belowEqual = 0x6 */ , 5372 Assembler::belowEqual /* above = 0x7 */ , 5373 Assembler::positive /* negative = 0x8 */ , 5374 Assembler::negative /* positive = 0x9 */ , 5375 Assembler::noParity /* parity = 0xa */ , 5376 Assembler::parity /* noParity = 0xb */ , 5377 Assembler::greaterEqual /* less = 0xc */ , 5378 Assembler::less /* greaterEqual = 0xd */ , 5379 Assembler::greater /* lessEqual = 0xe */ , 5380 Assembler::lessEqual /* greater = 0xf, */ 5381 5382 }; 5383 5384 5385 // Implementation of MacroAssembler 5386 5387 // First all the versions that have distinct versions depending on 32/64 bit 5388 // Unless the difference is trivial (1 line or so). 5389 5390 #ifndef _LP64 5391 5392 // 32bit versions 5393 5394 Address MacroAssembler::as_Address(AddressLiteral adr) { 5395 return Address(adr.target(), adr.rspec()); 5396 } 5397 5398 Address MacroAssembler::as_Address(ArrayAddress adr) { 5399 return Address::make_array(adr); 5400 } 5401 5402 int MacroAssembler::biased_locking_enter(Register lock_reg, 5403 Register obj_reg, 5404 Register swap_reg, 5405 Register tmp_reg, 5406 bool swap_reg_contains_mark, 5407 Label& done, 5408 Label* slow_case, 5409 BiasedLockingCounters* counters) { 5410 assert(UseBiasedLocking, "why call this otherwise?"); 5411 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 5412 assert_different_registers(lock_reg, obj_reg, swap_reg); 5413 5414 if (PrintBiasedLockingStatistics && counters == NULL) 5415 counters = BiasedLocking::counters(); 5416 5417 bool need_tmp_reg = false; 5418 if (tmp_reg == noreg) { 5419 need_tmp_reg = true; 5420 tmp_reg = lock_reg; 5421 } else { 5422 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 5423 } 5424 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 5425 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 5426 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 5427 Address saved_mark_addr(lock_reg, 0); 5428 5429 // Biased locking 5430 // See whether the lock is currently biased toward our thread and 5431 // whether the epoch is still valid 5432 // Note that the runtime guarantees sufficient alignment of JavaThread 5433 // pointers to allow age to be placed into low bits 5434 // First check to see whether biasing is even enabled for this object 5435 Label cas_label; 5436 int null_check_offset = -1; 5437 if (!swap_reg_contains_mark) { 5438 null_check_offset = offset(); 5439 movl(swap_reg, mark_addr); 5440 } 5441 if (need_tmp_reg) { 5442 push(tmp_reg); 5443 } 5444 movl(tmp_reg, swap_reg); 5445 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5446 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 5447 if (need_tmp_reg) { 5448 pop(tmp_reg); 5449 } 5450 jcc(Assembler::notEqual, cas_label); 5451 // The bias pattern is present in the object's header. Need to check 5452 // whether the bias owner and the epoch are both still current. 5453 // Note that because there is no current thread register on x86 we 5454 // need to store off the mark word we read out of the object to 5455 // avoid reloading it and needing to recheck invariants below. This 5456 // store is unfortunate but it makes the overall code shorter and 5457 // simpler. 5458 movl(saved_mark_addr, swap_reg); 5459 if (need_tmp_reg) { 5460 push(tmp_reg); 5461 } 5462 get_thread(tmp_reg); 5463 xorl(swap_reg, tmp_reg); 5464 if (swap_reg_contains_mark) { 5465 null_check_offset = offset(); 5466 } 5467 movl(tmp_reg, klass_addr); 5468 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); 5469 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 5470 if (need_tmp_reg) { 5471 pop(tmp_reg); 5472 } 5473 if (counters != NULL) { 5474 cond_inc32(Assembler::zero, 5475 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 5476 } 5477 jcc(Assembler::equal, done); 5478 5479 Label try_revoke_bias; 5480 Label try_rebias; 5481 5482 // At this point we know that the header has the bias pattern and 5483 // that we are not the bias owner in the current epoch. We need to 5484 // figure out more details about the state of the header in order to 5485 // know what operations can be legally performed on the object's 5486 // header. 5487 5488 // If the low three bits in the xor result aren't clear, that means 5489 // the prototype header is no longer biased and we have to revoke 5490 // the bias on this object. 5491 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 5492 jcc(Assembler::notZero, try_revoke_bias); 5493 5494 // Biasing is still enabled for this data type. See whether the 5495 // epoch of the current bias is still valid, meaning that the epoch 5496 // bits of the mark word are equal to the epoch bits of the 5497 // prototype header. (Note that the prototype header's epoch bits 5498 // only change at a safepoint.) If not, attempt to rebias the object 5499 // toward the current thread. Note that we must be absolutely sure 5500 // that the current epoch is invalid in order to do this because 5501 // otherwise the manipulations it performs on the mark word are 5502 // illegal. 5503 testl(swap_reg, markOopDesc::epoch_mask_in_place); 5504 jcc(Assembler::notZero, try_rebias); 5505 5506 // The epoch of the current bias is still valid but we know nothing 5507 // about the owner; it might be set or it might be clear. Try to 5508 // acquire the bias of the object using an atomic operation. If this 5509 // fails we will go in to the runtime to revoke the object's bias. 5510 // Note that we first construct the presumed unbiased header so we 5511 // don't accidentally blow away another thread's valid bias. 5512 movl(swap_reg, saved_mark_addr); 5513 andl(swap_reg, 5514 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 5515 if (need_tmp_reg) { 5516 push(tmp_reg); 5517 } 5518 get_thread(tmp_reg); 5519 orl(tmp_reg, swap_reg); 5520 if (os::is_MP()) { 5521 lock(); 5522 } 5523 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5524 if (need_tmp_reg) { 5525 pop(tmp_reg); 5526 } 5527 // If the biasing toward our thread failed, this means that 5528 // another thread succeeded in biasing it toward itself and we 5529 // need to revoke that bias. The revocation will occur in the 5530 // interpreter runtime in the slow case. 5531 if (counters != NULL) { 5532 cond_inc32(Assembler::zero, 5533 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 5534 } 5535 if (slow_case != NULL) { 5536 jcc(Assembler::notZero, *slow_case); 5537 } 5538 jmp(done); 5539 5540 bind(try_rebias); 5541 // At this point we know the epoch has expired, meaning that the 5542 // current "bias owner", if any, is actually invalid. Under these 5543 // circumstances _only_, we are allowed to use the current header's 5544 // value as the comparison value when doing the cas to acquire the 5545 // bias in the current epoch. In other words, we allow transfer of 5546 // the bias from one thread to another directly in this situation. 5547 // 5548 // FIXME: due to a lack of registers we currently blow away the age 5549 // bits in this situation. Should attempt to preserve them. 5550 if (need_tmp_reg) { 5551 push(tmp_reg); 5552 } 5553 get_thread(tmp_reg); 5554 movl(swap_reg, klass_addr); 5555 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); 5556 movl(swap_reg, saved_mark_addr); 5557 if (os::is_MP()) { 5558 lock(); 5559 } 5560 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5561 if (need_tmp_reg) { 5562 pop(tmp_reg); 5563 } 5564 // If the biasing toward our thread failed, then another thread 5565 // succeeded in biasing it toward itself and we need to revoke that 5566 // bias. The revocation will occur in the runtime in the slow case. 5567 if (counters != NULL) { 5568 cond_inc32(Assembler::zero, 5569 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 5570 } 5571 if (slow_case != NULL) { 5572 jcc(Assembler::notZero, *slow_case); 5573 } 5574 jmp(done); 5575 5576 bind(try_revoke_bias); 5577 // The prototype mark in the klass doesn't have the bias bit set any 5578 // more, indicating that objects of this data type are not supposed 5579 // to be biased any more. We are going to try to reset the mark of 5580 // this object to the prototype value and fall through to the 5581 // CAS-based locking scheme. Note that if our CAS fails, it means 5582 // that another thread raced us for the privilege of revoking the 5583 // bias of this particular object, so it's okay to continue in the 5584 // normal locking code. 5585 // 5586 // FIXME: due to a lack of registers we currently blow away the age 5587 // bits in this situation. Should attempt to preserve them. 5588 movl(swap_reg, saved_mark_addr); 5589 if (need_tmp_reg) { 5590 push(tmp_reg); 5591 } 5592 movl(tmp_reg, klass_addr); 5593 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 5594 if (os::is_MP()) { 5595 lock(); 5596 } 5597 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5598 if (need_tmp_reg) { 5599 pop(tmp_reg); 5600 } 5601 // Fall through to the normal CAS-based lock, because no matter what 5602 // the result of the above CAS, some thread must have succeeded in 5603 // removing the bias bit from the object's header. 5604 if (counters != NULL) { 5605 cond_inc32(Assembler::zero, 5606 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 5607 } 5608 5609 bind(cas_label); 5610 5611 return null_check_offset; 5612 } 5613 void MacroAssembler::call_VM_leaf_base(address entry_point, 5614 int number_of_arguments) { 5615 call(RuntimeAddress(entry_point)); 5616 increment(rsp, number_of_arguments * wordSize); 5617 } 5618 5619 void MacroAssembler::cmpklass(Address src1, Metadata* obj) { 5620 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 5621 } 5622 5623 void MacroAssembler::cmpklass(Register src1, Metadata* obj) { 5624 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 5625 } 5626 5627 void MacroAssembler::cmpoop(Address src1, jobject obj) { 5628 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5629 } 5630 5631 void MacroAssembler::cmpoop(Register src1, jobject obj) { 5632 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5633 } 5634 5635 void MacroAssembler::extend_sign(Register hi, Register lo) { 5636 // According to Intel Doc. AP-526, "Integer Divide", p.18. 5637 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 5638 cdql(); 5639 } else { 5640 movl(hi, lo); 5641 sarl(hi, 31); 5642 } 5643 } 5644 5645 void MacroAssembler::jC2(Register tmp, Label& L) { 5646 // set parity bit if FPU flag C2 is set (via rax) 5647 save_rax(tmp); 5648 fwait(); fnstsw_ax(); 5649 sahf(); 5650 restore_rax(tmp); 5651 // branch 5652 jcc(Assembler::parity, L); 5653 } 5654 5655 void MacroAssembler::jnC2(Register tmp, Label& L) { 5656 // set parity bit if FPU flag C2 is set (via rax) 5657 save_rax(tmp); 5658 fwait(); fnstsw_ax(); 5659 sahf(); 5660 restore_rax(tmp); 5661 // branch 5662 jcc(Assembler::noParity, L); 5663 } 5664 5665 // 32bit can do a case table jump in one instruction but we no longer allow the base 5666 // to be installed in the Address class 5667 void MacroAssembler::jump(ArrayAddress entry) { 5668 jmp(as_Address(entry)); 5669 } 5670 5671 // Note: y_lo will be destroyed 5672 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5673 // Long compare for Java (semantics as described in JVM spec.) 5674 Label high, low, done; 5675 5676 cmpl(x_hi, y_hi); 5677 jcc(Assembler::less, low); 5678 jcc(Assembler::greater, high); 5679 // x_hi is the return register 5680 xorl(x_hi, x_hi); 5681 cmpl(x_lo, y_lo); 5682 jcc(Assembler::below, low); 5683 jcc(Assembler::equal, done); 5684 5685 bind(high); 5686 xorl(x_hi, x_hi); 5687 increment(x_hi); 5688 jmp(done); 5689 5690 bind(low); 5691 xorl(x_hi, x_hi); 5692 decrementl(x_hi); 5693 5694 bind(done); 5695 } 5696 5697 void MacroAssembler::lea(Register dst, AddressLiteral src) { 5698 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 5699 } 5700 5701 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5702 // leal(dst, as_Address(adr)); 5703 // see note in movl as to why we must use a move 5704 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 5705 } 5706 5707 void MacroAssembler::leave() { 5708 mov(rsp, rbp); 5709 pop(rbp); 5710 } 5711 5712 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 5713 // Multiplication of two Java long values stored on the stack 5714 // as illustrated below. Result is in rdx:rax. 5715 // 5716 // rsp ---> [ ?? ] \ \ 5717 // .... | y_rsp_offset | 5718 // [ y_lo ] / (in bytes) | x_rsp_offset 5719 // [ y_hi ] | (in bytes) 5720 // .... | 5721 // [ x_lo ] / 5722 // [ x_hi ] 5723 // .... 5724 // 5725 // Basic idea: lo(result) = lo(x_lo * y_lo) 5726 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 5727 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 5728 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 5729 Label quick; 5730 // load x_hi, y_hi and check if quick 5731 // multiplication is possible 5732 movl(rbx, x_hi); 5733 movl(rcx, y_hi); 5734 movl(rax, rbx); 5735 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 5736 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 5737 // do full multiplication 5738 // 1st step 5739 mull(y_lo); // x_hi * y_lo 5740 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 5741 // 2nd step 5742 movl(rax, x_lo); 5743 mull(rcx); // x_lo * y_hi 5744 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 5745 // 3rd step 5746 bind(quick); // note: rbx, = 0 if quick multiply! 5747 movl(rax, x_lo); 5748 mull(y_lo); // x_lo * y_lo 5749 addl(rdx, rbx); // correct hi(x_lo * y_lo) 5750 } 5751 5752 void MacroAssembler::lneg(Register hi, Register lo) { 5753 negl(lo); 5754 adcl(hi, 0); 5755 negl(hi); 5756 } 5757 5758 void MacroAssembler::lshl(Register hi, Register lo) { 5759 // Java shift left long support (semantics as described in JVM spec., p.305) 5760 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 5761 // shift value is in rcx ! 5762 assert(hi != rcx, "must not use rcx"); 5763 assert(lo != rcx, "must not use rcx"); 5764 const Register s = rcx; // shift count 5765 const int n = BitsPerWord; 5766 Label L; 5767 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5768 cmpl(s, n); // if (s < n) 5769 jcc(Assembler::less, L); // else (s >= n) 5770 movl(hi, lo); // x := x << n 5771 xorl(lo, lo); 5772 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5773 bind(L); // s (mod n) < n 5774 shldl(hi, lo); // x := x << s 5775 shll(lo); 5776 } 5777 5778 5779 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 5780 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 5781 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 5782 assert(hi != rcx, "must not use rcx"); 5783 assert(lo != rcx, "must not use rcx"); 5784 const Register s = rcx; // shift count 5785 const int n = BitsPerWord; 5786 Label L; 5787 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5788 cmpl(s, n); // if (s < n) 5789 jcc(Assembler::less, L); // else (s >= n) 5790 movl(lo, hi); // x := x >> n 5791 if (sign_extension) sarl(hi, 31); 5792 else xorl(hi, hi); 5793 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5794 bind(L); // s (mod n) < n 5795 shrdl(lo, hi); // x := x >> s 5796 if (sign_extension) sarl(hi); 5797 else shrl(hi); 5798 } 5799 5800 void MacroAssembler::movoop(Register dst, jobject obj) { 5801 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5802 } 5803 5804 void MacroAssembler::movoop(Address dst, jobject obj) { 5805 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5806 } 5807 5808 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 5809 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 5810 } 5811 5812 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 5813 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 5814 } 5815 5816 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5817 if (src.is_lval()) { 5818 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 5819 } else { 5820 movl(dst, as_Address(src)); 5821 } 5822 } 5823 5824 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5825 movl(as_Address(dst), src); 5826 } 5827 5828 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5829 movl(dst, as_Address(src)); 5830 } 5831 5832 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 5833 void MacroAssembler::movptr(Address dst, intptr_t src) { 5834 movl(dst, src); 5835 } 5836 5837 5838 void MacroAssembler::pop_callee_saved_registers() { 5839 pop(rcx); 5840 pop(rdx); 5841 pop(rdi); 5842 pop(rsi); 5843 } 5844 5845 void MacroAssembler::pop_fTOS() { 5846 fld_d(Address(rsp, 0)); 5847 addl(rsp, 2 * wordSize); 5848 } 5849 5850 void MacroAssembler::push_callee_saved_registers() { 5851 push(rsi); 5852 push(rdi); 5853 push(rdx); 5854 push(rcx); 5855 } 5856 5857 void MacroAssembler::push_fTOS() { 5858 subl(rsp, 2 * wordSize); 5859 fstp_d(Address(rsp, 0)); 5860 } 5861 5862 5863 void MacroAssembler::pushoop(jobject obj) { 5864 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 5865 } 5866 5867 void MacroAssembler::pushklass(Metadata* obj) { 5868 push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate()); 5869 } 5870 5871 void MacroAssembler::pushptr(AddressLiteral src) { 5872 if (src.is_lval()) { 5873 push_literal32((int32_t)src.target(), src.rspec()); 5874 } else { 5875 pushl(as_Address(src)); 5876 } 5877 } 5878 5879 void MacroAssembler::set_word_if_not_zero(Register dst) { 5880 xorl(dst, dst); 5881 set_byte_if_not_zero(dst); 5882 } 5883 5884 static void pass_arg0(MacroAssembler* masm, Register arg) { 5885 masm->push(arg); 5886 } 5887 5888 static void pass_arg1(MacroAssembler* masm, Register arg) { 5889 masm->push(arg); 5890 } 5891 5892 static void pass_arg2(MacroAssembler* masm, Register arg) { 5893 masm->push(arg); 5894 } 5895 5896 static void pass_arg3(MacroAssembler* masm, Register arg) { 5897 masm->push(arg); 5898 } 5899 5900 #ifndef PRODUCT 5901 extern "C" void findpc(intptr_t x); 5902 #endif 5903 5904 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 5905 // In order to get locks to work, we need to fake a in_VM state 5906 JavaThread* thread = JavaThread::current(); 5907 JavaThreadState saved_state = thread->thread_state(); 5908 thread->set_thread_state(_thread_in_vm); 5909 if (ShowMessageBoxOnError) { 5910 JavaThread* thread = JavaThread::current(); 5911 JavaThreadState saved_state = thread->thread_state(); 5912 thread->set_thread_state(_thread_in_vm); 5913 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5914 ttyLocker ttyl; 5915 BytecodeCounter::print(); 5916 } 5917 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5918 // This is the value of eip which points to where verify_oop will return. 5919 if (os::message_box(msg, "Execution stopped, print registers?")) { 5920 print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip); 5921 BREAKPOINT; 5922 } 5923 } else { 5924 ttyLocker ttyl; 5925 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 5926 } 5927 // Don't assert holding the ttyLock 5928 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 5929 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5930 } 5931 5932 void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) { 5933 ttyLocker ttyl; 5934 FlagSetting fs(Debugging, true); 5935 tty->print_cr("eip = 0x%08x", eip); 5936 #ifndef PRODUCT 5937 if ((WizardMode || Verbose) && PrintMiscellaneous) { 5938 tty->cr(); 5939 findpc(eip); 5940 tty->cr(); 5941 } 5942 #endif 5943 #define PRINT_REG(rax) \ 5944 { tty->print("%s = ", #rax); os::print_location(tty, rax); } 5945 PRINT_REG(rax); 5946 PRINT_REG(rbx); 5947 PRINT_REG(rcx); 5948 PRINT_REG(rdx); 5949 PRINT_REG(rdi); 5950 PRINT_REG(rsi); 5951 PRINT_REG(rbp); 5952 PRINT_REG(rsp); 5953 #undef PRINT_REG 5954 // Print some words near top of staack. 5955 int* dump_sp = (int*) rsp; 5956 for (int col1 = 0; col1 < 8; col1++) { 5957 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); 5958 os::print_location(tty, *dump_sp++); 5959 } 5960 for (int row = 0; row < 16; row++) { 5961 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); 5962 for (int col = 0; col < 8; col++) { 5963 tty->print(" 0x%08x", *dump_sp++); 5964 } 5965 tty->cr(); 5966 } 5967 // Print some instructions around pc: 5968 Disassembler::decode((address)eip-64, (address)eip); 5969 tty->print_cr("--------"); 5970 Disassembler::decode((address)eip, (address)eip+32); 5971 } 5972 5973 void MacroAssembler::stop(const char* msg) { 5974 ExternalAddress message((address)msg); 5975 // push address of message 5976 pushptr(message.addr()); 5977 { Label L; call(L, relocInfo::none); bind(L); } // push eip 5978 pusha(); // push registers 5979 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 5980 hlt(); 5981 } 5982 5983 void MacroAssembler::warn(const char* msg) { 5984 push_CPU_state(); 5985 5986 ExternalAddress message((address) msg); 5987 // push address of message 5988 pushptr(message.addr()); 5989 5990 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 5991 addl(rsp, wordSize); // discard argument 5992 pop_CPU_state(); 5993 } 5994 5995 void MacroAssembler::print_state() { 5996 { Label L; call(L, relocInfo::none); bind(L); } // push eip 5997 pusha(); // push registers 5998 5999 push_CPU_state(); 6000 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32))); 6001 pop_CPU_state(); 6002 6003 popa(); 6004 addl(rsp, wordSize); 6005 } 6006 6007 #else // _LP64 6008 6009 // 64 bit versions 6010 6011 Address MacroAssembler::as_Address(AddressLiteral adr) { 6012 // amd64 always does this as a pc-rel 6013 // we can be absolute or disp based on the instruction type 6014 // jmp/call are displacements others are absolute 6015 assert(!adr.is_lval(), "must be rval"); 6016 assert(reachable(adr), "must be"); 6017 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 6018 6019 } 6020 6021 Address MacroAssembler::as_Address(ArrayAddress adr) { 6022 AddressLiteral base = adr.base(); 6023 lea(rscratch1, base); 6024 Address index = adr.index(); 6025 assert(index._disp == 0, "must not have disp"); // maybe it can? 6026 Address array(rscratch1, index._index, index._scale, index._disp); 6027 return array; 6028 } 6029 6030 int MacroAssembler::biased_locking_enter(Register lock_reg, 6031 Register obj_reg, 6032 Register swap_reg, 6033 Register tmp_reg, 6034 bool swap_reg_contains_mark, 6035 Label& done, 6036 Label* slow_case, 6037 BiasedLockingCounters* counters) { 6038 assert(UseBiasedLocking, "why call this otherwise?"); 6039 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 6040 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 6041 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 6042 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 6043 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 6044 Address saved_mark_addr(lock_reg, 0); 6045 6046 if (PrintBiasedLockingStatistics && counters == NULL) 6047 counters = BiasedLocking::counters(); 6048 6049 // Biased locking 6050 // See whether the lock is currently biased toward our thread and 6051 // whether the epoch is still valid 6052 // Note that the runtime guarantees sufficient alignment of JavaThread 6053 // pointers to allow age to be placed into low bits 6054 // First check to see whether biasing is even enabled for this object 6055 Label cas_label; 6056 int null_check_offset = -1; 6057 if (!swap_reg_contains_mark) { 6058 null_check_offset = offset(); 6059 movq(swap_reg, mark_addr); 6060 } 6061 movq(tmp_reg, swap_reg); 6062 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 6063 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 6064 jcc(Assembler::notEqual, cas_label); 6065 // The bias pattern is present in the object's header. Need to check 6066 // whether the bias owner and the epoch are both still current. 6067 load_prototype_header(tmp_reg, obj_reg); 6068 orq(tmp_reg, r15_thread); 6069 xorq(tmp_reg, swap_reg); 6070 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 6071 if (counters != NULL) { 6072 cond_inc32(Assembler::zero, 6073 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 6074 } 6075 jcc(Assembler::equal, done); 6076 6077 Label try_revoke_bias; 6078 Label try_rebias; 6079 6080 // At this point we know that the header has the bias pattern and 6081 // that we are not the bias owner in the current epoch. We need to 6082 // figure out more details about the state of the header in order to 6083 // know what operations can be legally performed on the object's 6084 // header. 6085 6086 // If the low three bits in the xor result aren't clear, that means 6087 // the prototype header is no longer biased and we have to revoke 6088 // the bias on this object. 6089 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 6090 jcc(Assembler::notZero, try_revoke_bias); 6091 6092 // Biasing is still enabled for this data type. See whether the 6093 // epoch of the current bias is still valid, meaning that the epoch 6094 // bits of the mark word are equal to the epoch bits of the 6095 // prototype header. (Note that the prototype header's epoch bits 6096 // only change at a safepoint.) If not, attempt to rebias the object 6097 // toward the current thread. Note that we must be absolutely sure 6098 // that the current epoch is invalid in order to do this because 6099 // otherwise the manipulations it performs on the mark word are 6100 // illegal. 6101 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 6102 jcc(Assembler::notZero, try_rebias); 6103 6104 // The epoch of the current bias is still valid but we know nothing 6105 // about the owner; it might be set or it might be clear. Try to 6106 // acquire the bias of the object using an atomic operation. If this 6107 // fails we will go in to the runtime to revoke the object's bias. 6108 // Note that we first construct the presumed unbiased header so we 6109 // don't accidentally blow away another thread's valid bias. 6110 andq(swap_reg, 6111 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 6112 movq(tmp_reg, swap_reg); 6113 orq(tmp_reg, r15_thread); 6114 if (os::is_MP()) { 6115 lock(); 6116 } 6117 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 6118 // If the biasing toward our thread failed, this means that 6119 // another thread succeeded in biasing it toward itself and we 6120 // need to revoke that bias. The revocation will occur in the 6121 // interpreter runtime in the slow case. 6122 if (counters != NULL) { 6123 cond_inc32(Assembler::zero, 6124 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 6125 } 6126 if (slow_case != NULL) { 6127 jcc(Assembler::notZero, *slow_case); 6128 } 6129 jmp(done); 6130 6131 bind(try_rebias); 6132 // At this point we know the epoch has expired, meaning that the 6133 // current "bias owner", if any, is actually invalid. Under these 6134 // circumstances _only_, we are allowed to use the current header's 6135 // value as the comparison value when doing the cas to acquire the 6136 // bias in the current epoch. In other words, we allow transfer of 6137 // the bias from one thread to another directly in this situation. 6138 // 6139 // FIXME: due to a lack of registers we currently blow away the age 6140 // bits in this situation. Should attempt to preserve them. 6141 load_prototype_header(tmp_reg, obj_reg); 6142 orq(tmp_reg, r15_thread); 6143 if (os::is_MP()) { 6144 lock(); 6145 } 6146 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 6147 // If the biasing toward our thread failed, then another thread 6148 // succeeded in biasing it toward itself and we need to revoke that 6149 // bias. The revocation will occur in the runtime in the slow case. 6150 if (counters != NULL) { 6151 cond_inc32(Assembler::zero, 6152 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 6153 } 6154 if (slow_case != NULL) { 6155 jcc(Assembler::notZero, *slow_case); 6156 } 6157 jmp(done); 6158 6159 bind(try_revoke_bias); 6160 // The prototype mark in the klass doesn't have the bias bit set any 6161 // more, indicating that objects of this data type are not supposed 6162 // to be biased any more. We are going to try to reset the mark of 6163 // this object to the prototype value and fall through to the 6164 // CAS-based locking scheme. Note that if our CAS fails, it means 6165 // that another thread raced us for the privilege of revoking the 6166 // bias of this particular object, so it's okay to continue in the 6167 // normal locking code. 6168 // 6169 // FIXME: due to a lack of registers we currently blow away the age 6170 // bits in this situation. Should attempt to preserve them. 6171 load_prototype_header(tmp_reg, obj_reg); 6172 if (os::is_MP()) { 6173 lock(); 6174 } 6175 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 6176 // Fall through to the normal CAS-based lock, because no matter what 6177 // the result of the above CAS, some thread must have succeeded in 6178 // removing the bias bit from the object's header. 6179 if (counters != NULL) { 6180 cond_inc32(Assembler::zero, 6181 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 6182 } 6183 6184 bind(cas_label); 6185 6186 return null_check_offset; 6187 } 6188 6189 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 6190 Label L, E; 6191 6192 #ifdef _WIN64 6193 // Windows always allocates space for it's register args 6194 assert(num_args <= 4, "only register arguments supported"); 6195 subq(rsp, frame::arg_reg_save_area_bytes); 6196 #endif 6197 6198 // Align stack if necessary 6199 testl(rsp, 15); 6200 jcc(Assembler::zero, L); 6201 6202 subq(rsp, 8); 6203 { 6204 call(RuntimeAddress(entry_point)); 6205 } 6206 addq(rsp, 8); 6207 jmp(E); 6208 6209 bind(L); 6210 { 6211 call(RuntimeAddress(entry_point)); 6212 } 6213 6214 bind(E); 6215 6216 #ifdef _WIN64 6217 // restore stack pointer 6218 addq(rsp, frame::arg_reg_save_area_bytes); 6219 #endif 6220 6221 } 6222 6223 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 6224 assert(!src2.is_lval(), "should use cmpptr"); 6225 6226 if (reachable(src2)) { 6227 cmpq(src1, as_Address(src2)); 6228 } else { 6229 lea(rscratch1, src2); 6230 Assembler::cmpq(src1, Address(rscratch1, 0)); 6231 } 6232 } 6233 6234 int MacroAssembler::corrected_idivq(Register reg) { 6235 // Full implementation of Java ldiv and lrem; checks for special 6236 // case as described in JVM spec., p.243 & p.271. The function 6237 // returns the (pc) offset of the idivl instruction - may be needed 6238 // for implicit exceptions. 6239 // 6240 // normal case special case 6241 // 6242 // input : rax: dividend min_long 6243 // reg: divisor (may not be eax/edx) -1 6244 // 6245 // output: rax: quotient (= rax idiv reg) min_long 6246 // rdx: remainder (= rax irem reg) 0 6247 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 6248 static const int64_t min_long = 0x8000000000000000; 6249 Label normal_case, special_case; 6250 6251 // check for special case 6252 cmp64(rax, ExternalAddress((address) &min_long)); 6253 jcc(Assembler::notEqual, normal_case); 6254 xorl(rdx, rdx); // prepare rdx for possible special case (where 6255 // remainder = 0) 6256 cmpq(reg, -1); 6257 jcc(Assembler::equal, special_case); 6258 6259 // handle normal case 6260 bind(normal_case); 6261 cdqq(); 6262 int idivq_offset = offset(); 6263 idivq(reg); 6264 6265 // normal and special case exit 6266 bind(special_case); 6267 6268 return idivq_offset; 6269 } 6270 6271 void MacroAssembler::decrementq(Register reg, int value) { 6272 if (value == min_jint) { subq(reg, value); return; } 6273 if (value < 0) { incrementq(reg, -value); return; } 6274 if (value == 0) { ; return; } 6275 if (value == 1 && UseIncDec) { decq(reg) ; return; } 6276 /* else */ { subq(reg, value) ; return; } 6277 } 6278 6279 void MacroAssembler::decrementq(Address dst, int value) { 6280 if (value == min_jint) { subq(dst, value); return; } 6281 if (value < 0) { incrementq(dst, -value); return; } 6282 if (value == 0) { ; return; } 6283 if (value == 1 && UseIncDec) { decq(dst) ; return; } 6284 /* else */ { subq(dst, value) ; return; } 6285 } 6286 6287 void MacroAssembler::incrementq(Register reg, int value) { 6288 if (value == min_jint) { addq(reg, value); return; } 6289 if (value < 0) { decrementq(reg, -value); return; } 6290 if (value == 0) { ; return; } 6291 if (value == 1 && UseIncDec) { incq(reg) ; return; } 6292 /* else */ { addq(reg, value) ; return; } 6293 } 6294 6295 void MacroAssembler::incrementq(Address dst, int value) { 6296 if (value == min_jint) { addq(dst, value); return; } 6297 if (value < 0) { decrementq(dst, -value); return; } 6298 if (value == 0) { ; return; } 6299 if (value == 1 && UseIncDec) { incq(dst) ; return; } 6300 /* else */ { addq(dst, value) ; return; } 6301 } 6302 6303 // 32bit can do a case table jump in one instruction but we no longer allow the base 6304 // to be installed in the Address class 6305 void MacroAssembler::jump(ArrayAddress entry) { 6306 lea(rscratch1, entry.base()); 6307 Address dispatch = entry.index(); 6308 assert(dispatch._base == noreg, "must be"); 6309 dispatch._base = rscratch1; 6310 jmp(dispatch); 6311 } 6312 6313 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 6314 ShouldNotReachHere(); // 64bit doesn't use two regs 6315 cmpq(x_lo, y_lo); 6316 } 6317 6318 void MacroAssembler::lea(Register dst, AddressLiteral src) { 6319 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 6320 } 6321 6322 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 6323 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 6324 movptr(dst, rscratch1); 6325 } 6326 6327 void MacroAssembler::leave() { 6328 // %%% is this really better? Why not on 32bit too? 6329 emit_byte(0xC9); // LEAVE 6330 } 6331 6332 void MacroAssembler::lneg(Register hi, Register lo) { 6333 ShouldNotReachHere(); // 64bit doesn't use two regs 6334 negq(lo); 6335 } 6336 6337 void MacroAssembler::movoop(Register dst, jobject obj) { 6338 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 6339 } 6340 6341 void MacroAssembler::movoop(Address dst, jobject obj) { 6342 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 6343 movq(dst, rscratch1); 6344 } 6345 6346 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 6347 mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 6348 } 6349 6350 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 6351 mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 6352 movq(dst, rscratch1); 6353 } 6354 6355 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 6356 if (src.is_lval()) { 6357 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 6358 } else { 6359 if (reachable(src)) { 6360 movq(dst, as_Address(src)); 6361 } else { 6362 lea(rscratch1, src); 6363 movq(dst, Address(rscratch1,0)); 6364 } 6365 } 6366 } 6367 6368 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 6369 movq(as_Address(dst), src); 6370 } 6371 6372 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 6373 movq(dst, as_Address(src)); 6374 } 6375 6376 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 6377 void MacroAssembler::movptr(Address dst, intptr_t src) { 6378 mov64(rscratch1, src); 6379 movq(dst, rscratch1); 6380 } 6381 6382 // These are mostly for initializing NULL 6383 void MacroAssembler::movptr(Address dst, int32_t src) { 6384 movslq(dst, src); 6385 } 6386 6387 void MacroAssembler::movptr(Register dst, int32_t src) { 6388 mov64(dst, (intptr_t)src); 6389 } 6390 6391 void MacroAssembler::pushoop(jobject obj) { 6392 movoop(rscratch1, obj); 6393 push(rscratch1); 6394 } 6395 6396 void MacroAssembler::pushklass(Metadata* obj) { 6397 mov_metadata(rscratch1, obj); 6398 push(rscratch1); 6399 } 6400 6401 void MacroAssembler::pushptr(AddressLiteral src) { 6402 lea(rscratch1, src); 6403 if (src.is_lval()) { 6404 push(rscratch1); 6405 } else { 6406 pushq(Address(rscratch1, 0)); 6407 } 6408 } 6409 6410 void MacroAssembler::reset_last_Java_frame(bool clear_fp, 6411 bool clear_pc) { 6412 // we must set sp to zero to clear frame 6413 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 6414 // must clear fp, so that compiled frames are not confused; it is 6415 // possible that we need it only for debugging 6416 if (clear_fp) { 6417 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 6418 } 6419 6420 if (clear_pc) { 6421 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 6422 } 6423 } 6424 6425 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 6426 Register last_java_fp, 6427 address last_java_pc) { 6428 // determine last_java_sp register 6429 if (!last_java_sp->is_valid()) { 6430 last_java_sp = rsp; 6431 } 6432 6433 // last_java_fp is optional 6434 if (last_java_fp->is_valid()) { 6435 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 6436 last_java_fp); 6437 } 6438 6439 // last_java_pc is optional 6440 if (last_java_pc != NULL) { 6441 Address java_pc(r15_thread, 6442 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 6443 lea(rscratch1, InternalAddress(last_java_pc)); 6444 movptr(java_pc, rscratch1); 6445 } 6446 6447 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 6448 } 6449 6450 static void pass_arg0(MacroAssembler* masm, Register arg) { 6451 if (c_rarg0 != arg ) { 6452 masm->mov(c_rarg0, arg); 6453 } 6454 } 6455 6456 static void pass_arg1(MacroAssembler* masm, Register arg) { 6457 if (c_rarg1 != arg ) { 6458 masm->mov(c_rarg1, arg); 6459 } 6460 } 6461 6462 static void pass_arg2(MacroAssembler* masm, Register arg) { 6463 if (c_rarg2 != arg ) { 6464 masm->mov(c_rarg2, arg); 6465 } 6466 } 6467 6468 static void pass_arg3(MacroAssembler* masm, Register arg) { 6469 if (c_rarg3 != arg ) { 6470 masm->mov(c_rarg3, arg); 6471 } 6472 } 6473 6474 void MacroAssembler::stop(const char* msg) { 6475 address rip = pc(); 6476 pusha(); // get regs on stack 6477 lea(c_rarg0, ExternalAddress((address) msg)); 6478 lea(c_rarg1, InternalAddress(rip)); 6479 movq(c_rarg2, rsp); // pass pointer to regs array 6480 andq(rsp, -16); // align stack as required by ABI 6481 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 6482 hlt(); 6483 } 6484 6485 void MacroAssembler::warn(const char* msg) { 6486 push(rbp); 6487 movq(rbp, rsp); 6488 andq(rsp, -16); // align stack as required by push_CPU_state and call 6489 push_CPU_state(); // keeps alignment at 16 bytes 6490 lea(c_rarg0, ExternalAddress((address) msg)); 6491 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 6492 pop_CPU_state(); 6493 mov(rsp, rbp); 6494 pop(rbp); 6495 } 6496 6497 void MacroAssembler::print_state() { 6498 address rip = pc(); 6499 pusha(); // get regs on stack 6500 push(rbp); 6501 movq(rbp, rsp); 6502 andq(rsp, -16); // align stack as required by push_CPU_state and call 6503 push_CPU_state(); // keeps alignment at 16 bytes 6504 6505 lea(c_rarg0, InternalAddress(rip)); 6506 lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array 6507 call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1); 6508 6509 pop_CPU_state(); 6510 mov(rsp, rbp); 6511 pop(rbp); 6512 popa(); 6513 } 6514 6515 #ifndef PRODUCT 6516 extern "C" void findpc(intptr_t x); 6517 #endif 6518 6519 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 6520 // In order to get locks to work, we need to fake a in_VM state 6521 if (ShowMessageBoxOnError) { 6522 JavaThread* thread = JavaThread::current(); 6523 JavaThreadState saved_state = thread->thread_state(); 6524 thread->set_thread_state(_thread_in_vm); 6525 #ifndef PRODUCT 6526 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 6527 ttyLocker ttyl; 6528 BytecodeCounter::print(); 6529 } 6530 #endif 6531 // To see where a verify_oop failed, get $ebx+40/X for this frame. 6532 // XXX correct this offset for amd64 6533 // This is the value of eip which points to where verify_oop will return. 6534 if (os::message_box(msg, "Execution stopped, print registers?")) { 6535 print_state64(pc, regs); 6536 BREAKPOINT; 6537 assert(false, "start up GDB"); 6538 } 6539 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 6540 } else { 6541 ttyLocker ttyl; 6542 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 6543 msg); 6544 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 6545 } 6546 } 6547 6548 void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) { 6549 ttyLocker ttyl; 6550 FlagSetting fs(Debugging, true); 6551 tty->print_cr("rip = 0x%016lx", pc); 6552 #ifndef PRODUCT 6553 tty->cr(); 6554 findpc(pc); 6555 tty->cr(); 6556 #endif 6557 #define PRINT_REG(rax, value) \ 6558 { tty->print("%s = ", #rax); os::print_location(tty, value); } 6559 PRINT_REG(rax, regs[15]); 6560 PRINT_REG(rbx, regs[12]); 6561 PRINT_REG(rcx, regs[14]); 6562 PRINT_REG(rdx, regs[13]); 6563 PRINT_REG(rdi, regs[8]); 6564 PRINT_REG(rsi, regs[9]); 6565 PRINT_REG(rbp, regs[10]); 6566 PRINT_REG(rsp, regs[11]); 6567 PRINT_REG(r8 , regs[7]); 6568 PRINT_REG(r9 , regs[6]); 6569 PRINT_REG(r10, regs[5]); 6570 PRINT_REG(r11, regs[4]); 6571 PRINT_REG(r12, regs[3]); 6572 PRINT_REG(r13, regs[2]); 6573 PRINT_REG(r14, regs[1]); 6574 PRINT_REG(r15, regs[0]); 6575 #undef PRINT_REG 6576 // Print some words near top of staack. 6577 int64_t* rsp = (int64_t*) regs[11]; 6578 int64_t* dump_sp = rsp; 6579 for (int col1 = 0; col1 < 8; col1++) { 6580 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); 6581 os::print_location(tty, *dump_sp++); 6582 } 6583 for (int row = 0; row < 25; row++) { 6584 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); 6585 for (int col = 0; col < 4; col++) { 6586 tty->print(" 0x%016lx", *dump_sp++); 6587 } 6588 tty->cr(); 6589 } 6590 // Print some instructions around pc: 6591 Disassembler::decode((address)pc-64, (address)pc); 6592 tty->print_cr("--------"); 6593 Disassembler::decode((address)pc, (address)pc+32); 6594 } 6595 6596 #endif // _LP64 6597 6598 // Now versions that are common to 32/64 bit 6599 6600 void MacroAssembler::addptr(Register dst, int32_t imm32) { 6601 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 6602 } 6603 6604 void MacroAssembler::addptr(Register dst, Register src) { 6605 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 6606 } 6607 6608 void MacroAssembler::addptr(Address dst, Register src) { 6609 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 6610 } 6611 6612 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { 6613 if (reachable(src)) { 6614 Assembler::addsd(dst, as_Address(src)); 6615 } else { 6616 lea(rscratch1, src); 6617 Assembler::addsd(dst, Address(rscratch1, 0)); 6618 } 6619 } 6620 6621 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { 6622 if (reachable(src)) { 6623 addss(dst, as_Address(src)); 6624 } else { 6625 lea(rscratch1, src); 6626 addss(dst, Address(rscratch1, 0)); 6627 } 6628 } 6629 6630 void MacroAssembler::align(int modulus) { 6631 if (offset() % modulus != 0) { 6632 nop(modulus - (offset() % modulus)); 6633 } 6634 } 6635 6636 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 6637 // Used in sign-masking with aligned address. 6638 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 6639 if (reachable(src)) { 6640 Assembler::andpd(dst, as_Address(src)); 6641 } else { 6642 lea(rscratch1, src); 6643 Assembler::andpd(dst, Address(rscratch1, 0)); 6644 } 6645 } 6646 6647 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { 6648 // Used in sign-masking with aligned address. 6649 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 6650 if (reachable(src)) { 6651 Assembler::andps(dst, as_Address(src)); 6652 } else { 6653 lea(rscratch1, src); 6654 Assembler::andps(dst, Address(rscratch1, 0)); 6655 } 6656 } 6657 6658 void MacroAssembler::andptr(Register dst, int32_t imm32) { 6659 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 6660 } 6661 6662 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 6663 pushf(); 6664 if (os::is_MP()) 6665 lock(); 6666 incrementl(counter_addr); 6667 popf(); 6668 } 6669 6670 // Writes to stack successive pages until offset reached to check for 6671 // stack overflow + shadow pages. This clobbers tmp. 6672 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 6673 movptr(tmp, rsp); 6674 // Bang stack for total size given plus shadow page size. 6675 // Bang one page at a time because large size can bang beyond yellow and 6676 // red zones. 6677 Label loop; 6678 bind(loop); 6679 movl(Address(tmp, (-os::vm_page_size())), size ); 6680 subptr(tmp, os::vm_page_size()); 6681 subl(size, os::vm_page_size()); 6682 jcc(Assembler::greater, loop); 6683 6684 // Bang down shadow pages too. 6685 // The -1 because we already subtracted 1 page. 6686 for (int i = 0; i< StackShadowPages-1; i++) { 6687 // this could be any sized move but this is can be a debugging crumb 6688 // so the bigger the better. 6689 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 6690 } 6691 } 6692 6693 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 6694 assert(UseBiasedLocking, "why call this otherwise?"); 6695 6696 // Check for biased locking unlock case, which is a no-op 6697 // Note: we do not have to check the thread ID for two reasons. 6698 // First, the interpreter checks for IllegalMonitorStateException at 6699 // a higher level. Second, if the bias was revoked while we held the 6700 // lock, the object could not be rebiased toward another thread, so 6701 // the bias bit would be clear. 6702 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 6703 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 6704 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 6705 jcc(Assembler::equal, done); 6706 } 6707 6708 void MacroAssembler::c2bool(Register x) { 6709 // implements x == 0 ? 0 : 1 6710 // note: must only look at least-significant byte of x 6711 // since C-style booleans are stored in one byte 6712 // only! (was bug) 6713 andl(x, 0xFF); 6714 setb(Assembler::notZero, x); 6715 } 6716 6717 // Wouldn't need if AddressLiteral version had new name 6718 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 6719 Assembler::call(L, rtype); 6720 } 6721 6722 void MacroAssembler::call(Register entry) { 6723 Assembler::call(entry); 6724 } 6725 6726 void MacroAssembler::call(AddressLiteral entry) { 6727 if (reachable(entry)) { 6728 Assembler::call_literal(entry.target(), entry.rspec()); 6729 } else { 6730 lea(rscratch1, entry); 6731 Assembler::call(rscratch1); 6732 } 6733 } 6734 6735 void MacroAssembler::ic_call(address entry) { 6736 RelocationHolder rh = virtual_call_Relocation::spec(pc()); 6737 movptr(rax, (intptr_t)Universe::non_oop_word()); 6738 call(AddressLiteral(entry, rh)); 6739 } 6740 6741 // Implementation of call_VM versions 6742 6743 void MacroAssembler::call_VM(Register oop_result, 6744 address entry_point, 6745 bool check_exceptions) { 6746 Label C, E; 6747 call(C, relocInfo::none); 6748 jmp(E); 6749 6750 bind(C); 6751 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 6752 ret(0); 6753 6754 bind(E); 6755 } 6756 6757 void MacroAssembler::call_VM(Register oop_result, 6758 address entry_point, 6759 Register arg_1, 6760 bool check_exceptions) { 6761 Label C, E; 6762 call(C, relocInfo::none); 6763 jmp(E); 6764 6765 bind(C); 6766 pass_arg1(this, arg_1); 6767 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 6768 ret(0); 6769 6770 bind(E); 6771 } 6772 6773 void MacroAssembler::call_VM(Register oop_result, 6774 address entry_point, 6775 Register arg_1, 6776 Register arg_2, 6777 bool check_exceptions) { 6778 Label C, E; 6779 call(C, relocInfo::none); 6780 jmp(E); 6781 6782 bind(C); 6783 6784 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6785 6786 pass_arg2(this, arg_2); 6787 pass_arg1(this, arg_1); 6788 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 6789 ret(0); 6790 6791 bind(E); 6792 } 6793 6794 void MacroAssembler::call_VM(Register oop_result, 6795 address entry_point, 6796 Register arg_1, 6797 Register arg_2, 6798 Register arg_3, 6799 bool check_exceptions) { 6800 Label C, E; 6801 call(C, relocInfo::none); 6802 jmp(E); 6803 6804 bind(C); 6805 6806 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6807 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6808 pass_arg3(this, arg_3); 6809 6810 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6811 pass_arg2(this, arg_2); 6812 6813 pass_arg1(this, arg_1); 6814 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 6815 ret(0); 6816 6817 bind(E); 6818 } 6819 6820 void MacroAssembler::call_VM(Register oop_result, 6821 Register last_java_sp, 6822 address entry_point, 6823 int number_of_arguments, 6824 bool check_exceptions) { 6825 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6826 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6827 } 6828 6829 void MacroAssembler::call_VM(Register oop_result, 6830 Register last_java_sp, 6831 address entry_point, 6832 Register arg_1, 6833 bool check_exceptions) { 6834 pass_arg1(this, arg_1); 6835 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6836 } 6837 6838 void MacroAssembler::call_VM(Register oop_result, 6839 Register last_java_sp, 6840 address entry_point, 6841 Register arg_1, 6842 Register arg_2, 6843 bool check_exceptions) { 6844 6845 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6846 pass_arg2(this, arg_2); 6847 pass_arg1(this, arg_1); 6848 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6849 } 6850 6851 void MacroAssembler::call_VM(Register oop_result, 6852 Register last_java_sp, 6853 address entry_point, 6854 Register arg_1, 6855 Register arg_2, 6856 Register arg_3, 6857 bool check_exceptions) { 6858 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6859 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6860 pass_arg3(this, arg_3); 6861 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6862 pass_arg2(this, arg_2); 6863 pass_arg1(this, arg_1); 6864 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6865 } 6866 6867 void MacroAssembler::super_call_VM(Register oop_result, 6868 Register last_java_sp, 6869 address entry_point, 6870 int number_of_arguments, 6871 bool check_exceptions) { 6872 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6873 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6874 } 6875 6876 void MacroAssembler::super_call_VM(Register oop_result, 6877 Register last_java_sp, 6878 address entry_point, 6879 Register arg_1, 6880 bool check_exceptions) { 6881 pass_arg1(this, arg_1); 6882 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6883 } 6884 6885 void MacroAssembler::super_call_VM(Register oop_result, 6886 Register last_java_sp, 6887 address entry_point, 6888 Register arg_1, 6889 Register arg_2, 6890 bool check_exceptions) { 6891 6892 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6893 pass_arg2(this, arg_2); 6894 pass_arg1(this, arg_1); 6895 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6896 } 6897 6898 void MacroAssembler::super_call_VM(Register oop_result, 6899 Register last_java_sp, 6900 address entry_point, 6901 Register arg_1, 6902 Register arg_2, 6903 Register arg_3, 6904 bool check_exceptions) { 6905 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6906 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6907 pass_arg3(this, arg_3); 6908 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6909 pass_arg2(this, arg_2); 6910 pass_arg1(this, arg_1); 6911 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6912 } 6913 6914 void MacroAssembler::call_VM_base(Register oop_result, 6915 Register java_thread, 6916 Register last_java_sp, 6917 address entry_point, 6918 int number_of_arguments, 6919 bool check_exceptions) { 6920 // determine java_thread register 6921 if (!java_thread->is_valid()) { 6922 #ifdef _LP64 6923 java_thread = r15_thread; 6924 #else 6925 java_thread = rdi; 6926 get_thread(java_thread); 6927 #endif // LP64 6928 } 6929 // determine last_java_sp register 6930 if (!last_java_sp->is_valid()) { 6931 last_java_sp = rsp; 6932 } 6933 // debugging support 6934 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 6935 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 6936 #ifdef ASSERT 6937 // TraceBytecodes does not use r12 but saves it over the call, so don't verify 6938 // r12 is the heapbase. 6939 LP64_ONLY(if (UseCompressedOops && !TraceBytecodes) verify_heapbase("call_VM_base");) 6940 #endif // ASSERT 6941 6942 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 6943 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 6944 6945 // push java thread (becomes first argument of C function) 6946 6947 NOT_LP64(push(java_thread); number_of_arguments++); 6948 LP64_ONLY(mov(c_rarg0, r15_thread)); 6949 6950 // set last Java frame before call 6951 assert(last_java_sp != rbp, "can't use ebp/rbp"); 6952 6953 // Only interpreter should have to set fp 6954 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 6955 6956 // do the call, remove parameters 6957 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 6958 6959 // restore the thread (cannot use the pushed argument since arguments 6960 // may be overwritten by C code generated by an optimizing compiler); 6961 // however can use the register value directly if it is callee saved. 6962 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 6963 // rdi & rsi (also r15) are callee saved -> nothing to do 6964 #ifdef ASSERT 6965 guarantee(java_thread != rax, "change this code"); 6966 push(rax); 6967 { Label L; 6968 get_thread(rax); 6969 cmpptr(java_thread, rax); 6970 jcc(Assembler::equal, L); 6971 STOP("MacroAssembler::call_VM_base: rdi not callee saved?"); 6972 bind(L); 6973 } 6974 pop(rax); 6975 #endif 6976 } else { 6977 get_thread(java_thread); 6978 } 6979 // reset last Java frame 6980 // Only interpreter should have to clear fp 6981 reset_last_Java_frame(java_thread, true, false); 6982 6983 #ifndef CC_INTERP 6984 // C++ interp handles this in the interpreter 6985 check_and_handle_popframe(java_thread); 6986 check_and_handle_earlyret(java_thread); 6987 #endif /* CC_INTERP */ 6988 6989 if (check_exceptions) { 6990 // check for pending exceptions (java_thread is set upon return) 6991 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 6992 #ifndef _LP64 6993 jump_cc(Assembler::notEqual, 6994 RuntimeAddress(StubRoutines::forward_exception_entry())); 6995 #else 6996 // This used to conditionally jump to forward_exception however it is 6997 // possible if we relocate that the branch will not reach. So we must jump 6998 // around so we can always reach 6999 7000 Label ok; 7001 jcc(Assembler::equal, ok); 7002 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 7003 bind(ok); 7004 #endif // LP64 7005 } 7006 7007 // get oop result if there is one and reset the value in the thread 7008 if (oop_result->is_valid()) { 7009 get_vm_result(oop_result, java_thread); 7010 } 7011 } 7012 7013 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 7014 7015 // Calculate the value for last_Java_sp 7016 // somewhat subtle. call_VM does an intermediate call 7017 // which places a return address on the stack just under the 7018 // stack pointer as the user finsihed with it. This allows 7019 // use to retrieve last_Java_pc from last_Java_sp[-1]. 7020 // On 32bit we then have to push additional args on the stack to accomplish 7021 // the actual requested call. On 64bit call_VM only can use register args 7022 // so the only extra space is the return address that call_VM created. 7023 // This hopefully explains the calculations here. 7024 7025 #ifdef _LP64 7026 // We've pushed one address, correct last_Java_sp 7027 lea(rax, Address(rsp, wordSize)); 7028 #else 7029 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 7030 #endif // LP64 7031 7032 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 7033 7034 } 7035 7036 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 7037 call_VM_leaf_base(entry_point, number_of_arguments); 7038 } 7039 7040 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 7041 pass_arg0(this, arg_0); 7042 call_VM_leaf(entry_point, 1); 7043 } 7044 7045 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 7046 7047 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7048 pass_arg1(this, arg_1); 7049 pass_arg0(this, arg_0); 7050 call_VM_leaf(entry_point, 2); 7051 } 7052 7053 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 7054 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 7055 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 7056 pass_arg2(this, arg_2); 7057 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7058 pass_arg1(this, arg_1); 7059 pass_arg0(this, arg_0); 7060 call_VM_leaf(entry_point, 3); 7061 } 7062 7063 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 7064 pass_arg0(this, arg_0); 7065 MacroAssembler::call_VM_leaf_base(entry_point, 1); 7066 } 7067 7068 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 7069 7070 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7071 pass_arg1(this, arg_1); 7072 pass_arg0(this, arg_0); 7073 MacroAssembler::call_VM_leaf_base(entry_point, 2); 7074 } 7075 7076 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 7077 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 7078 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 7079 pass_arg2(this, arg_2); 7080 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7081 pass_arg1(this, arg_1); 7082 pass_arg0(this, arg_0); 7083 MacroAssembler::call_VM_leaf_base(entry_point, 3); 7084 } 7085 7086 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 7087 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); 7088 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 7089 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 7090 pass_arg3(this, arg_3); 7091 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 7092 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 7093 pass_arg2(this, arg_2); 7094 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7095 pass_arg1(this, arg_1); 7096 pass_arg0(this, arg_0); 7097 MacroAssembler::call_VM_leaf_base(entry_point, 4); 7098 } 7099 7100 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 7101 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 7102 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 7103 verify_oop(oop_result, "broken oop in call_VM_base"); 7104 } 7105 7106 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 7107 movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 7108 movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD); 7109 } 7110 7111 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 7112 } 7113 7114 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 7115 } 7116 7117 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 7118 if (reachable(src1)) { 7119 cmpl(as_Address(src1), imm); 7120 } else { 7121 lea(rscratch1, src1); 7122 cmpl(Address(rscratch1, 0), imm); 7123 } 7124 } 7125 7126 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 7127 assert(!src2.is_lval(), "use cmpptr"); 7128 if (reachable(src2)) { 7129 cmpl(src1, as_Address(src2)); 7130 } else { 7131 lea(rscratch1, src2); 7132 cmpl(src1, Address(rscratch1, 0)); 7133 } 7134 } 7135 7136 void MacroAssembler::cmp32(Register src1, int32_t imm) { 7137 Assembler::cmpl(src1, imm); 7138 } 7139 7140 void MacroAssembler::cmp32(Register src1, Address src2) { 7141 Assembler::cmpl(src1, src2); 7142 } 7143 7144 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 7145 ucomisd(opr1, opr2); 7146 7147 Label L; 7148 if (unordered_is_less) { 7149 movl(dst, -1); 7150 jcc(Assembler::parity, L); 7151 jcc(Assembler::below , L); 7152 movl(dst, 0); 7153 jcc(Assembler::equal , L); 7154 increment(dst); 7155 } else { // unordered is greater 7156 movl(dst, 1); 7157 jcc(Assembler::parity, L); 7158 jcc(Assembler::above , L); 7159 movl(dst, 0); 7160 jcc(Assembler::equal , L); 7161 decrementl(dst); 7162 } 7163 bind(L); 7164 } 7165 7166 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 7167 ucomiss(opr1, opr2); 7168 7169 Label L; 7170 if (unordered_is_less) { 7171 movl(dst, -1); 7172 jcc(Assembler::parity, L); 7173 jcc(Assembler::below , L); 7174 movl(dst, 0); 7175 jcc(Assembler::equal , L); 7176 increment(dst); 7177 } else { // unordered is greater 7178 movl(dst, 1); 7179 jcc(Assembler::parity, L); 7180 jcc(Assembler::above , L); 7181 movl(dst, 0); 7182 jcc(Assembler::equal , L); 7183 decrementl(dst); 7184 } 7185 bind(L); 7186 } 7187 7188 7189 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 7190 if (reachable(src1)) { 7191 cmpb(as_Address(src1), imm); 7192 } else { 7193 lea(rscratch1, src1); 7194 cmpb(Address(rscratch1, 0), imm); 7195 } 7196 } 7197 7198 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 7199 #ifdef _LP64 7200 if (src2.is_lval()) { 7201 movptr(rscratch1, src2); 7202 Assembler::cmpq(src1, rscratch1); 7203 } else if (reachable(src2)) { 7204 cmpq(src1, as_Address(src2)); 7205 } else { 7206 lea(rscratch1, src2); 7207 Assembler::cmpq(src1, Address(rscratch1, 0)); 7208 } 7209 #else 7210 if (src2.is_lval()) { 7211 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 7212 } else { 7213 cmpl(src1, as_Address(src2)); 7214 } 7215 #endif // _LP64 7216 } 7217 7218 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 7219 assert(src2.is_lval(), "not a mem-mem compare"); 7220 #ifdef _LP64 7221 // moves src2's literal address 7222 movptr(rscratch1, src2); 7223 Assembler::cmpq(src1, rscratch1); 7224 #else 7225 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 7226 #endif // _LP64 7227 } 7228 7229 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 7230 if (reachable(adr)) { 7231 if (os::is_MP()) 7232 lock(); 7233 cmpxchgptr(reg, as_Address(adr)); 7234 } else { 7235 lea(rscratch1, adr); 7236 if (os::is_MP()) 7237 lock(); 7238 cmpxchgptr(reg, Address(rscratch1, 0)); 7239 } 7240 } 7241 7242 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 7243 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 7244 } 7245 7246 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 7247 if (reachable(src)) { 7248 Assembler::comisd(dst, as_Address(src)); 7249 } else { 7250 lea(rscratch1, src); 7251 Assembler::comisd(dst, Address(rscratch1, 0)); 7252 } 7253 } 7254 7255 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 7256 if (reachable(src)) { 7257 Assembler::comiss(dst, as_Address(src)); 7258 } else { 7259 lea(rscratch1, src); 7260 Assembler::comiss(dst, Address(rscratch1, 0)); 7261 } 7262 } 7263 7264 7265 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 7266 Condition negated_cond = negate_condition(cond); 7267 Label L; 7268 jcc(negated_cond, L); 7269 atomic_incl(counter_addr); 7270 bind(L); 7271 } 7272 7273 int MacroAssembler::corrected_idivl(Register reg) { 7274 // Full implementation of Java idiv and irem; checks for 7275 // special case as described in JVM spec., p.243 & p.271. 7276 // The function returns the (pc) offset of the idivl 7277 // instruction - may be needed for implicit exceptions. 7278 // 7279 // normal case special case 7280 // 7281 // input : rax,: dividend min_int 7282 // reg: divisor (may not be rax,/rdx) -1 7283 // 7284 // output: rax,: quotient (= rax, idiv reg) min_int 7285 // rdx: remainder (= rax, irem reg) 0 7286 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 7287 const int min_int = 0x80000000; 7288 Label normal_case, special_case; 7289 7290 // check for special case 7291 cmpl(rax, min_int); 7292 jcc(Assembler::notEqual, normal_case); 7293 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 7294 cmpl(reg, -1); 7295 jcc(Assembler::equal, special_case); 7296 7297 // handle normal case 7298 bind(normal_case); 7299 cdql(); 7300 int idivl_offset = offset(); 7301 idivl(reg); 7302 7303 // normal and special case exit 7304 bind(special_case); 7305 7306 return idivl_offset; 7307 } 7308 7309 7310 7311 void MacroAssembler::decrementl(Register reg, int value) { 7312 if (value == min_jint) {subl(reg, value) ; return; } 7313 if (value < 0) { incrementl(reg, -value); return; } 7314 if (value == 0) { ; return; } 7315 if (value == 1 && UseIncDec) { decl(reg) ; return; } 7316 /* else */ { subl(reg, value) ; return; } 7317 } 7318 7319 void MacroAssembler::decrementl(Address dst, int value) { 7320 if (value == min_jint) {subl(dst, value) ; return; } 7321 if (value < 0) { incrementl(dst, -value); return; } 7322 if (value == 0) { ; return; } 7323 if (value == 1 && UseIncDec) { decl(dst) ; return; } 7324 /* else */ { subl(dst, value) ; return; } 7325 } 7326 7327 void MacroAssembler::division_with_shift (Register reg, int shift_value) { 7328 assert (shift_value > 0, "illegal shift value"); 7329 Label _is_positive; 7330 testl (reg, reg); 7331 jcc (Assembler::positive, _is_positive); 7332 int offset = (1 << shift_value) - 1 ; 7333 7334 if (offset == 1) { 7335 incrementl(reg); 7336 } else { 7337 addl(reg, offset); 7338 } 7339 7340 bind (_is_positive); 7341 sarl(reg, shift_value); 7342 } 7343 7344 void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { 7345 if (reachable(src)) { 7346 Assembler::divsd(dst, as_Address(src)); 7347 } else { 7348 lea(rscratch1, src); 7349 Assembler::divsd(dst, Address(rscratch1, 0)); 7350 } 7351 } 7352 7353 void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { 7354 if (reachable(src)) { 7355 Assembler::divss(dst, as_Address(src)); 7356 } else { 7357 lea(rscratch1, src); 7358 Assembler::divss(dst, Address(rscratch1, 0)); 7359 } 7360 } 7361 7362 // !defined(COMPILER2) is because of stupid core builds 7363 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 7364 void MacroAssembler::empty_FPU_stack() { 7365 if (VM_Version::supports_mmx()) { 7366 emms(); 7367 } else { 7368 for (int i = 8; i-- > 0; ) ffree(i); 7369 } 7370 } 7371 #endif // !LP64 || C1 || !C2 7372 7373 7374 // Defines obj, preserves var_size_in_bytes 7375 void MacroAssembler::eden_allocate(Register obj, 7376 Register var_size_in_bytes, 7377 int con_size_in_bytes, 7378 Register t1, 7379 Label& slow_case) { 7380 assert(obj == rax, "obj must be in rax, for cmpxchg"); 7381 assert_different_registers(obj, var_size_in_bytes, t1); 7382 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 7383 jmp(slow_case); 7384 } else { 7385 Register end = t1; 7386 Label retry; 7387 bind(retry); 7388 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 7389 movptr(obj, heap_top); 7390 if (var_size_in_bytes == noreg) { 7391 lea(end, Address(obj, con_size_in_bytes)); 7392 } else { 7393 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 7394 } 7395 // if end < obj then we wrapped around => object too long => slow case 7396 cmpptr(end, obj); 7397 jcc(Assembler::below, slow_case); 7398 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 7399 jcc(Assembler::above, slow_case); 7400 // Compare obj with the top addr, and if still equal, store the new top addr in 7401 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 7402 // it otherwise. Use lock prefix for atomicity on MPs. 7403 locked_cmpxchgptr(end, heap_top); 7404 jcc(Assembler::notEqual, retry); 7405 } 7406 } 7407 7408 void MacroAssembler::enter() { 7409 push(rbp); 7410 mov(rbp, rsp); 7411 } 7412 7413 // A 5 byte nop that is safe for patching (see patch_verified_entry) 7414 void MacroAssembler::fat_nop() { 7415 if (UseAddressNop) { 7416 addr_nop_5(); 7417 } else { 7418 emit_byte(0x26); // es: 7419 emit_byte(0x2e); // cs: 7420 emit_byte(0x64); // fs: 7421 emit_byte(0x65); // gs: 7422 emit_byte(0x90); 7423 } 7424 } 7425 7426 void MacroAssembler::fcmp(Register tmp) { 7427 fcmp(tmp, 1, true, true); 7428 } 7429 7430 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 7431 assert(!pop_right || pop_left, "usage error"); 7432 if (VM_Version::supports_cmov()) { 7433 assert(tmp == noreg, "unneeded temp"); 7434 if (pop_left) { 7435 fucomip(index); 7436 } else { 7437 fucomi(index); 7438 } 7439 if (pop_right) { 7440 fpop(); 7441 } 7442 } else { 7443 assert(tmp != noreg, "need temp"); 7444 if (pop_left) { 7445 if (pop_right) { 7446 fcompp(); 7447 } else { 7448 fcomp(index); 7449 } 7450 } else { 7451 fcom(index); 7452 } 7453 // convert FPU condition into eflags condition via rax, 7454 save_rax(tmp); 7455 fwait(); fnstsw_ax(); 7456 sahf(); 7457 restore_rax(tmp); 7458 } 7459 // condition codes set as follows: 7460 // 7461 // CF (corresponds to C0) if x < y 7462 // PF (corresponds to C2) if unordered 7463 // ZF (corresponds to C3) if x = y 7464 } 7465 7466 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 7467 fcmp2int(dst, unordered_is_less, 1, true, true); 7468 } 7469 7470 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 7471 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 7472 Label L; 7473 if (unordered_is_less) { 7474 movl(dst, -1); 7475 jcc(Assembler::parity, L); 7476 jcc(Assembler::below , L); 7477 movl(dst, 0); 7478 jcc(Assembler::equal , L); 7479 increment(dst); 7480 } else { // unordered is greater 7481 movl(dst, 1); 7482 jcc(Assembler::parity, L); 7483 jcc(Assembler::above , L); 7484 movl(dst, 0); 7485 jcc(Assembler::equal , L); 7486 decrementl(dst); 7487 } 7488 bind(L); 7489 } 7490 7491 void MacroAssembler::fld_d(AddressLiteral src) { 7492 fld_d(as_Address(src)); 7493 } 7494 7495 void MacroAssembler::fld_s(AddressLiteral src) { 7496 fld_s(as_Address(src)); 7497 } 7498 7499 void MacroAssembler::fld_x(AddressLiteral src) { 7500 Assembler::fld_x(as_Address(src)); 7501 } 7502 7503 void MacroAssembler::fldcw(AddressLiteral src) { 7504 Assembler::fldcw(as_Address(src)); 7505 } 7506 7507 void MacroAssembler::pow_exp_core_encoding() { 7508 // kills rax, rcx, rdx 7509 subptr(rsp,sizeof(jdouble)); 7510 // computes 2^X. Stack: X ... 7511 // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and 7512 // keep it on the thread's stack to compute 2^int(X) later 7513 // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) 7514 // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) 7515 fld_s(0); // Stack: X X ... 7516 frndint(); // Stack: int(X) X ... 7517 fsuba(1); // Stack: int(X) X-int(X) ... 7518 fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... 7519 f2xm1(); // Stack: 2^(X-int(X))-1 ... 7520 fld1(); // Stack: 1 2^(X-int(X))-1 ... 7521 faddp(1); // Stack: 2^(X-int(X)) 7522 // computes 2^(int(X)): add exponent bias (1023) to int(X), then 7523 // shift int(X)+1023 to exponent position. 7524 // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 7525 // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent 7526 // values so detect them and set result to NaN. 7527 movl(rax,Address(rsp,0)); 7528 movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding 7529 addl(rax, 1023); 7530 movl(rdx,rax); 7531 shll(rax,20); 7532 // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. 7533 addl(rdx,1); 7534 // Check that 1 < int(X)+1023+1 < 2048 7535 // in 3 steps: 7536 // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 7537 // 2- (int(X)+1023+1)&-2048 != 0 7538 // 3- (int(X)+1023+1)&-2048 != 1 7539 // Do 2- first because addl just updated the flags. 7540 cmov32(Assembler::equal,rax,rcx); 7541 cmpl(rdx,1); 7542 cmov32(Assembler::equal,rax,rcx); 7543 testl(rdx,rcx); 7544 cmov32(Assembler::notEqual,rax,rcx); 7545 movl(Address(rsp,4),rax); 7546 movl(Address(rsp,0),0); 7547 fmul_d(Address(rsp,0)); // Stack: 2^X ... 7548 addptr(rsp,sizeof(jdouble)); 7549 } 7550 7551 void MacroAssembler::increase_precision() { 7552 subptr(rsp, BytesPerWord); 7553 fnstcw(Address(rsp, 0)); 7554 movl(rax, Address(rsp, 0)); 7555 orl(rax, 0x300); 7556 push(rax); 7557 fldcw(Address(rsp, 0)); 7558 pop(rax); 7559 } 7560 7561 void MacroAssembler::restore_precision() { 7562 fldcw(Address(rsp, 0)); 7563 addptr(rsp, BytesPerWord); 7564 } 7565 7566 void MacroAssembler::fast_pow() { 7567 // computes X^Y = 2^(Y * log2(X)) 7568 // if fast computation is not possible, result is NaN. Requires 7569 // fallback from user of this macro. 7570 // increase precision for intermediate steps of the computation 7571 increase_precision(); 7572 fyl2x(); // Stack: (Y*log2(X)) ... 7573 pow_exp_core_encoding(); // Stack: exp(X) ... 7574 restore_precision(); 7575 } 7576 7577 void MacroAssembler::fast_exp() { 7578 // computes exp(X) = 2^(X * log2(e)) 7579 // if fast computation is not possible, result is NaN. Requires 7580 // fallback from user of this macro. 7581 // increase precision for intermediate steps of the computation 7582 increase_precision(); 7583 fldl2e(); // Stack: log2(e) X ... 7584 fmulp(1); // Stack: (X*log2(e)) ... 7585 pow_exp_core_encoding(); // Stack: exp(X) ... 7586 restore_precision(); 7587 } 7588 7589 void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { 7590 // kills rax, rcx, rdx 7591 // pow and exp needs 2 extra registers on the fpu stack. 7592 Label slow_case, done; 7593 Register tmp = noreg; 7594 if (!VM_Version::supports_cmov()) { 7595 // fcmp needs a temporary so preserve rdx, 7596 tmp = rdx; 7597 } 7598 Register tmp2 = rax; 7599 Register tmp3 = rcx; 7600 7601 if (is_exp) { 7602 // Stack: X 7603 fld_s(0); // duplicate argument for runtime call. Stack: X X 7604 fast_exp(); // Stack: exp(X) X 7605 fcmp(tmp, 0, false, false); // Stack: exp(X) X 7606 // exp(X) not equal to itself: exp(X) is NaN go to slow case. 7607 jcc(Assembler::parity, slow_case); 7608 // get rid of duplicate argument. Stack: exp(X) 7609 if (num_fpu_regs_in_use > 0) { 7610 fxch(); 7611 fpop(); 7612 } else { 7613 ffree(1); 7614 } 7615 jmp(done); 7616 } else { 7617 // Stack: X Y 7618 Label x_negative, y_odd; 7619 7620 fldz(); // Stack: 0 X Y 7621 fcmp(tmp, 1, true, false); // Stack: X Y 7622 jcc(Assembler::above, x_negative); 7623 7624 // X >= 0 7625 7626 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 7627 fld_s(1); // Stack: X Y X Y 7628 fast_pow(); // Stack: X^Y X Y 7629 fcmp(tmp, 0, false, false); // Stack: X^Y X Y 7630 // X^Y not equal to itself: X^Y is NaN go to slow case. 7631 jcc(Assembler::parity, slow_case); 7632 // get rid of duplicate arguments. Stack: X^Y 7633 if (num_fpu_regs_in_use > 0) { 7634 fxch(); fpop(); 7635 fxch(); fpop(); 7636 } else { 7637 ffree(2); 7638 ffree(1); 7639 } 7640 jmp(done); 7641 7642 // X <= 0 7643 bind(x_negative); 7644 7645 fld_s(1); // Stack: Y X Y 7646 frndint(); // Stack: int(Y) X Y 7647 fcmp(tmp, 2, false, false); // Stack: int(Y) X Y 7648 jcc(Assembler::notEqual, slow_case); 7649 7650 subptr(rsp, 8); 7651 7652 // For X^Y, when X < 0, Y has to be an integer and the final 7653 // result depends on whether it's odd or even. We just checked 7654 // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit 7655 // integer to test its parity. If int(Y) is huge and doesn't fit 7656 // in the 64 bit integer range, the integer indefinite value will 7657 // end up in the gp registers. Huge numbers are all even, the 7658 // integer indefinite number is even so it's fine. 7659 7660 #ifdef ASSERT 7661 // Let's check we don't end up with an integer indefinite number 7662 // when not expected. First test for huge numbers: check whether 7663 // int(Y)+1 == int(Y) which is true for very large numbers and 7664 // those are all even. A 64 bit integer is guaranteed to not 7665 // overflow for numbers where y+1 != y (when precision is set to 7666 // double precision). 7667 Label y_not_huge; 7668 7669 fld1(); // Stack: 1 int(Y) X Y 7670 fadd(1); // Stack: 1+int(Y) int(Y) X Y 7671 7672 #ifdef _LP64 7673 // trip to memory to force the precision down from double extended 7674 // precision 7675 fstp_d(Address(rsp, 0)); 7676 fld_d(Address(rsp, 0)); 7677 #endif 7678 7679 fcmp(tmp, 1, true, false); // Stack: int(Y) X Y 7680 #endif 7681 7682 // move int(Y) as 64 bit integer to thread's stack 7683 fistp_d(Address(rsp,0)); // Stack: X Y 7684 7685 #ifdef ASSERT 7686 jcc(Assembler::notEqual, y_not_huge); 7687 7688 // Y is huge so we know it's even. It may not fit in a 64 bit 7689 // integer and we don't want the debug code below to see the 7690 // integer indefinite value so overwrite int(Y) on the thread's 7691 // stack with 0. 7692 movl(Address(rsp, 0), 0); 7693 movl(Address(rsp, 4), 0); 7694 7695 bind(y_not_huge); 7696 #endif 7697 7698 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 7699 fld_s(1); // Stack: X Y X Y 7700 fabs(); // Stack: abs(X) Y X Y 7701 fast_pow(); // Stack: abs(X)^Y X Y 7702 fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y 7703 // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. 7704 7705 pop(tmp2); 7706 NOT_LP64(pop(tmp3)); 7707 jcc(Assembler::parity, slow_case); 7708 7709 #ifdef ASSERT 7710 // Check that int(Y) is not integer indefinite value (int 7711 // overflow). Shouldn't happen because for values that would 7712 // overflow, 1+int(Y)==Y which was tested earlier. 7713 #ifndef _LP64 7714 { 7715 Label integer; 7716 testl(tmp2, tmp2); 7717 jcc(Assembler::notZero, integer); 7718 cmpl(tmp3, 0x80000000); 7719 jcc(Assembler::notZero, integer); 7720 STOP("integer indefinite value shouldn't be seen here"); 7721 bind(integer); 7722 } 7723 #else 7724 { 7725 Label integer; 7726 mov(tmp3, tmp2); // preserve tmp2 for parity check below 7727 shlq(tmp3, 1); 7728 jcc(Assembler::carryClear, integer); 7729 jcc(Assembler::notZero, integer); 7730 STOP("integer indefinite value shouldn't be seen here"); 7731 bind(integer); 7732 } 7733 #endif 7734 #endif 7735 7736 // get rid of duplicate arguments. Stack: X^Y 7737 if (num_fpu_regs_in_use > 0) { 7738 fxch(); fpop(); 7739 fxch(); fpop(); 7740 } else { 7741 ffree(2); 7742 ffree(1); 7743 } 7744 7745 testl(tmp2, 1); 7746 jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y 7747 // X <= 0, Y even: X^Y = -abs(X)^Y 7748 7749 fchs(); // Stack: -abs(X)^Y Y 7750 jmp(done); 7751 } 7752 7753 // slow case: runtime call 7754 bind(slow_case); 7755 7756 fpop(); // pop incorrect result or int(Y) 7757 7758 fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 7759 is_exp ? 1 : 2, num_fpu_regs_in_use); 7760 7761 // Come here with result in F-TOS 7762 bind(done); 7763 } 7764 7765 void MacroAssembler::fpop() { 7766 ffree(); 7767 fincstp(); 7768 } 7769 7770 void MacroAssembler::fremr(Register tmp) { 7771 save_rax(tmp); 7772 { Label L; 7773 bind(L); 7774 fprem(); 7775 fwait(); fnstsw_ax(); 7776 #ifdef _LP64 7777 testl(rax, 0x400); 7778 jcc(Assembler::notEqual, L); 7779 #else 7780 sahf(); 7781 jcc(Assembler::parity, L); 7782 #endif // _LP64 7783 } 7784 restore_rax(tmp); 7785 // Result is in ST0. 7786 // Note: fxch & fpop to get rid of ST1 7787 // (otherwise FPU stack could overflow eventually) 7788 fxch(1); 7789 fpop(); 7790 } 7791 7792 7793 void MacroAssembler::incrementl(AddressLiteral dst) { 7794 if (reachable(dst)) { 7795 incrementl(as_Address(dst)); 7796 } else { 7797 lea(rscratch1, dst); 7798 incrementl(Address(rscratch1, 0)); 7799 } 7800 } 7801 7802 void MacroAssembler::incrementl(ArrayAddress dst) { 7803 incrementl(as_Address(dst)); 7804 } 7805 7806 void MacroAssembler::incrementl(Register reg, int value) { 7807 if (value == min_jint) {addl(reg, value) ; return; } 7808 if (value < 0) { decrementl(reg, -value); return; } 7809 if (value == 0) { ; return; } 7810 if (value == 1 && UseIncDec) { incl(reg) ; return; } 7811 /* else */ { addl(reg, value) ; return; } 7812 } 7813 7814 void MacroAssembler::incrementl(Address dst, int value) { 7815 if (value == min_jint) {addl(dst, value) ; return; } 7816 if (value < 0) { decrementl(dst, -value); return; } 7817 if (value == 0) { ; return; } 7818 if (value == 1 && UseIncDec) { incl(dst) ; return; } 7819 /* else */ { addl(dst, value) ; return; } 7820 } 7821 7822 void MacroAssembler::jump(AddressLiteral dst) { 7823 if (reachable(dst)) { 7824 jmp_literal(dst.target(), dst.rspec()); 7825 } else { 7826 lea(rscratch1, dst); 7827 jmp(rscratch1); 7828 } 7829 } 7830 7831 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 7832 if (reachable(dst)) { 7833 InstructionMark im(this); 7834 relocate(dst.reloc()); 7835 const int short_size = 2; 7836 const int long_size = 6; 7837 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos); 7838 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 7839 // 0111 tttn #8-bit disp 7840 emit_byte(0x70 | cc); 7841 emit_byte((offs - short_size) & 0xFF); 7842 } else { 7843 // 0000 1111 1000 tttn #32-bit disp 7844 emit_byte(0x0F); 7845 emit_byte(0x80 | cc); 7846 emit_long(offs - long_size); 7847 } 7848 } else { 7849 #ifdef ASSERT 7850 warning("reversing conditional branch"); 7851 #endif /* ASSERT */ 7852 Label skip; 7853 jccb(reverse[cc], skip); 7854 lea(rscratch1, dst); 7855 Assembler::jmp(rscratch1); 7856 bind(skip); 7857 } 7858 } 7859 7860 void MacroAssembler::ldmxcsr(AddressLiteral src) { 7861 if (reachable(src)) { 7862 Assembler::ldmxcsr(as_Address(src)); 7863 } else { 7864 lea(rscratch1, src); 7865 Assembler::ldmxcsr(Address(rscratch1, 0)); 7866 } 7867 } 7868 7869 int MacroAssembler::load_signed_byte(Register dst, Address src) { 7870 int off; 7871 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7872 off = offset(); 7873 movsbl(dst, src); // movsxb 7874 } else { 7875 off = load_unsigned_byte(dst, src); 7876 shll(dst, 24); 7877 sarl(dst, 24); 7878 } 7879 return off; 7880 } 7881 7882 // Note: load_signed_short used to be called load_signed_word. 7883 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler 7884 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. 7885 // The term "word" in HotSpot means a 32- or 64-bit machine word. 7886 int MacroAssembler::load_signed_short(Register dst, Address src) { 7887 int off; 7888 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7889 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 7890 // version but this is what 64bit has always done. This seems to imply 7891 // that users are only using 32bits worth. 7892 off = offset(); 7893 movswl(dst, src); // movsxw 7894 } else { 7895 off = load_unsigned_short(dst, src); 7896 shll(dst, 16); 7897 sarl(dst, 16); 7898 } 7899 return off; 7900 } 7901 7902 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 7903 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 7904 // and "3.9 Partial Register Penalties", p. 22). 7905 int off; 7906 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 7907 off = offset(); 7908 movzbl(dst, src); // movzxb 7909 } else { 7910 xorl(dst, dst); 7911 off = offset(); 7912 movb(dst, src); 7913 } 7914 return off; 7915 } 7916 7917 // Note: load_unsigned_short used to be called load_unsigned_word. 7918 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 7919 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 7920 // and "3.9 Partial Register Penalties", p. 22). 7921 int off; 7922 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 7923 off = offset(); 7924 movzwl(dst, src); // movzxw 7925 } else { 7926 xorl(dst, dst); 7927 off = offset(); 7928 movw(dst, src); 7929 } 7930 return off; 7931 } 7932 7933 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 7934 switch (size_in_bytes) { 7935 #ifndef _LP64 7936 case 8: 7937 assert(dst2 != noreg, "second dest register required"); 7938 movl(dst, src); 7939 movl(dst2, src.plus_disp(BytesPerInt)); 7940 break; 7941 #else 7942 case 8: movq(dst, src); break; 7943 #endif 7944 case 4: movl(dst, src); break; 7945 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 7946 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 7947 default: ShouldNotReachHere(); 7948 } 7949 } 7950 7951 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 7952 switch (size_in_bytes) { 7953 #ifndef _LP64 7954 case 8: 7955 assert(src2 != noreg, "second source register required"); 7956 movl(dst, src); 7957 movl(dst.plus_disp(BytesPerInt), src2); 7958 break; 7959 #else 7960 case 8: movq(dst, src); break; 7961 #endif 7962 case 4: movl(dst, src); break; 7963 case 2: movw(dst, src); break; 7964 case 1: movb(dst, src); break; 7965 default: ShouldNotReachHere(); 7966 } 7967 } 7968 7969 void MacroAssembler::mov32(AddressLiteral dst, Register src) { 7970 if (reachable(dst)) { 7971 movl(as_Address(dst), src); 7972 } else { 7973 lea(rscratch1, dst); 7974 movl(Address(rscratch1, 0), src); 7975 } 7976 } 7977 7978 void MacroAssembler::mov32(Register dst, AddressLiteral src) { 7979 if (reachable(src)) { 7980 movl(dst, as_Address(src)); 7981 } else { 7982 lea(rscratch1, src); 7983 movl(dst, Address(rscratch1, 0)); 7984 } 7985 } 7986 7987 // C++ bool manipulation 7988 7989 void MacroAssembler::movbool(Register dst, Address src) { 7990 if(sizeof(bool) == 1) 7991 movb(dst, src); 7992 else if(sizeof(bool) == 2) 7993 movw(dst, src); 7994 else if(sizeof(bool) == 4) 7995 movl(dst, src); 7996 else 7997 // unsupported 7998 ShouldNotReachHere(); 7999 } 8000 8001 void MacroAssembler::movbool(Address dst, bool boolconst) { 8002 if(sizeof(bool) == 1) 8003 movb(dst, (int) boolconst); 8004 else if(sizeof(bool) == 2) 8005 movw(dst, (int) boolconst); 8006 else if(sizeof(bool) == 4) 8007 movl(dst, (int) boolconst); 8008 else 8009 // unsupported 8010 ShouldNotReachHere(); 8011 } 8012 8013 void MacroAssembler::movbool(Address dst, Register src) { 8014 if(sizeof(bool) == 1) 8015 movb(dst, src); 8016 else if(sizeof(bool) == 2) 8017 movw(dst, src); 8018 else if(sizeof(bool) == 4) 8019 movl(dst, src); 8020 else 8021 // unsupported 8022 ShouldNotReachHere(); 8023 } 8024 8025 void MacroAssembler::movbyte(ArrayAddress dst, int src) { 8026 movb(as_Address(dst), src); 8027 } 8028 8029 void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) { 8030 if (reachable(src)) { 8031 movdl(dst, as_Address(src)); 8032 } else { 8033 lea(rscratch1, src); 8034 movdl(dst, Address(rscratch1, 0)); 8035 } 8036 } 8037 8038 void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) { 8039 if (reachable(src)) { 8040 movq(dst, as_Address(src)); 8041 } else { 8042 lea(rscratch1, src); 8043 movq(dst, Address(rscratch1, 0)); 8044 } 8045 } 8046 8047 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 8048 if (reachable(src)) { 8049 if (UseXmmLoadAndClearUpper) { 8050 movsd (dst, as_Address(src)); 8051 } else { 8052 movlpd(dst, as_Address(src)); 8053 } 8054 } else { 8055 lea(rscratch1, src); 8056 if (UseXmmLoadAndClearUpper) { 8057 movsd (dst, Address(rscratch1, 0)); 8058 } else { 8059 movlpd(dst, Address(rscratch1, 0)); 8060 } 8061 } 8062 } 8063 8064 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 8065 if (reachable(src)) { 8066 movss(dst, as_Address(src)); 8067 } else { 8068 lea(rscratch1, src); 8069 movss(dst, Address(rscratch1, 0)); 8070 } 8071 } 8072 8073 void MacroAssembler::movptr(Register dst, Register src) { 8074 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 8075 } 8076 8077 void MacroAssembler::movptr(Register dst, Address src) { 8078 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 8079 } 8080 8081 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 8082 void MacroAssembler::movptr(Register dst, intptr_t src) { 8083 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 8084 } 8085 8086 void MacroAssembler::movptr(Address dst, Register src) { 8087 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 8088 } 8089 8090 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 8091 if (reachable(src)) { 8092 Assembler::movsd(dst, as_Address(src)); 8093 } else { 8094 lea(rscratch1, src); 8095 Assembler::movsd(dst, Address(rscratch1, 0)); 8096 } 8097 } 8098 8099 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 8100 if (reachable(src)) { 8101 Assembler::movss(dst, as_Address(src)); 8102 } else { 8103 lea(rscratch1, src); 8104 Assembler::movss(dst, Address(rscratch1, 0)); 8105 } 8106 } 8107 8108 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { 8109 if (reachable(src)) { 8110 Assembler::mulsd(dst, as_Address(src)); 8111 } else { 8112 lea(rscratch1, src); 8113 Assembler::mulsd(dst, Address(rscratch1, 0)); 8114 } 8115 } 8116 8117 void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { 8118 if (reachable(src)) { 8119 Assembler::mulss(dst, as_Address(src)); 8120 } else { 8121 lea(rscratch1, src); 8122 Assembler::mulss(dst, Address(rscratch1, 0)); 8123 } 8124 } 8125 8126 void MacroAssembler::null_check(Register reg, int offset) { 8127 if (needs_explicit_null_check(offset)) { 8128 // provoke OS NULL exception if reg = NULL by 8129 // accessing M[reg] w/o changing any (non-CC) registers 8130 // NOTE: cmpl is plenty here to provoke a segv 8131 cmpptr(rax, Address(reg, 0)); 8132 // Note: should probably use testl(rax, Address(reg, 0)); 8133 // may be shorter code (however, this version of 8134 // testl needs to be implemented first) 8135 } else { 8136 // nothing to do, (later) access of M[reg + offset] 8137 // will provoke OS NULL exception if reg = NULL 8138 } 8139 } 8140 8141 void MacroAssembler::os_breakpoint() { 8142 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 8143 // (e.g., MSVC can't call ps() otherwise) 8144 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 8145 } 8146 8147 void MacroAssembler::pop_CPU_state() { 8148 pop_FPU_state(); 8149 pop_IU_state(); 8150 } 8151 8152 void MacroAssembler::pop_FPU_state() { 8153 NOT_LP64(frstor(Address(rsp, 0));) 8154 LP64_ONLY(fxrstor(Address(rsp, 0));) 8155 addptr(rsp, FPUStateSizeInWords * wordSize); 8156 } 8157 8158 void MacroAssembler::pop_IU_state() { 8159 popa(); 8160 LP64_ONLY(addq(rsp, 8)); 8161 popf(); 8162 } 8163 8164 // Save Integer and Float state 8165 // Warning: Stack must be 16 byte aligned (64bit) 8166 void MacroAssembler::push_CPU_state() { 8167 push_IU_state(); 8168 push_FPU_state(); 8169 } 8170 8171 void MacroAssembler::push_FPU_state() { 8172 subptr(rsp, FPUStateSizeInWords * wordSize); 8173 #ifndef _LP64 8174 fnsave(Address(rsp, 0)); 8175 fwait(); 8176 #else 8177 fxsave(Address(rsp, 0)); 8178 #endif // LP64 8179 } 8180 8181 void MacroAssembler::push_IU_state() { 8182 // Push flags first because pusha kills them 8183 pushf(); 8184 // Make sure rsp stays 16-byte aligned 8185 LP64_ONLY(subq(rsp, 8)); 8186 pusha(); 8187 } 8188 8189 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 8190 // determine java_thread register 8191 if (!java_thread->is_valid()) { 8192 java_thread = rdi; 8193 get_thread(java_thread); 8194 } 8195 // we must set sp to zero to clear frame 8196 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 8197 if (clear_fp) { 8198 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 8199 } 8200 8201 if (clear_pc) 8202 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 8203 8204 } 8205 8206 void MacroAssembler::restore_rax(Register tmp) { 8207 if (tmp == noreg) pop(rax); 8208 else if (tmp != rax) mov(rax, tmp); 8209 } 8210 8211 void MacroAssembler::round_to(Register reg, int modulus) { 8212 addptr(reg, modulus - 1); 8213 andptr(reg, -modulus); 8214 } 8215 8216 void MacroAssembler::save_rax(Register tmp) { 8217 if (tmp == noreg) push(rax); 8218 else if (tmp != rax) mov(tmp, rax); 8219 } 8220 8221 // Write serialization page so VM thread can do a pseudo remote membar. 8222 // We use the current thread pointer to calculate a thread specific 8223 // offset to write to within the page. This minimizes bus traffic 8224 // due to cache line collision. 8225 void MacroAssembler::serialize_memory(Register thread, Register tmp) { 8226 movl(tmp, thread); 8227 shrl(tmp, os::get_serialize_page_shift_count()); 8228 andl(tmp, (os::vm_page_size() - sizeof(int))); 8229 8230 Address index(noreg, tmp, Address::times_1); 8231 ExternalAddress page(os::get_memory_serialize_page()); 8232 8233 // Size of store must match masking code above 8234 movl(as_Address(ArrayAddress(page, index)), tmp); 8235 } 8236 8237 // Calls to C land 8238 // 8239 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded 8240 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 8241 // has to be reset to 0. This is required to allow proper stack traversal. 8242 void MacroAssembler::set_last_Java_frame(Register java_thread, 8243 Register last_java_sp, 8244 Register last_java_fp, 8245 address last_java_pc) { 8246 // determine java_thread register 8247 if (!java_thread->is_valid()) { 8248 java_thread = rdi; 8249 get_thread(java_thread); 8250 } 8251 // determine last_java_sp register 8252 if (!last_java_sp->is_valid()) { 8253 last_java_sp = rsp; 8254 } 8255 8256 // last_java_fp is optional 8257 8258 if (last_java_fp->is_valid()) { 8259 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 8260 } 8261 8262 // last_java_pc is optional 8263 8264 if (last_java_pc != NULL) { 8265 lea(Address(java_thread, 8266 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 8267 InternalAddress(last_java_pc)); 8268 8269 } 8270 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 8271 } 8272 8273 void MacroAssembler::shlptr(Register dst, int imm8) { 8274 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 8275 } 8276 8277 void MacroAssembler::shrptr(Register dst, int imm8) { 8278 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 8279 } 8280 8281 void MacroAssembler::sign_extend_byte(Register reg) { 8282 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 8283 movsbl(reg, reg); // movsxb 8284 } else { 8285 shll(reg, 24); 8286 sarl(reg, 24); 8287 } 8288 } 8289 8290 void MacroAssembler::sign_extend_short(Register reg) { 8291 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 8292 movswl(reg, reg); // movsxw 8293 } else { 8294 shll(reg, 16); 8295 sarl(reg, 16); 8296 } 8297 } 8298 8299 void MacroAssembler::testl(Register dst, AddressLiteral src) { 8300 assert(reachable(src), "Address should be reachable"); 8301 testl(dst, as_Address(src)); 8302 } 8303 8304 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { 8305 if (reachable(src)) { 8306 Assembler::sqrtsd(dst, as_Address(src)); 8307 } else { 8308 lea(rscratch1, src); 8309 Assembler::sqrtsd(dst, Address(rscratch1, 0)); 8310 } 8311 } 8312 8313 void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { 8314 if (reachable(src)) { 8315 Assembler::sqrtss(dst, as_Address(src)); 8316 } else { 8317 lea(rscratch1, src); 8318 Assembler::sqrtss(dst, Address(rscratch1, 0)); 8319 } 8320 } 8321 8322 void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { 8323 if (reachable(src)) { 8324 Assembler::subsd(dst, as_Address(src)); 8325 } else { 8326 lea(rscratch1, src); 8327 Assembler::subsd(dst, Address(rscratch1, 0)); 8328 } 8329 } 8330 8331 void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { 8332 if (reachable(src)) { 8333 Assembler::subss(dst, as_Address(src)); 8334 } else { 8335 lea(rscratch1, src); 8336 Assembler::subss(dst, Address(rscratch1, 0)); 8337 } 8338 } 8339 8340 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 8341 if (reachable(src)) { 8342 Assembler::ucomisd(dst, as_Address(src)); 8343 } else { 8344 lea(rscratch1, src); 8345 Assembler::ucomisd(dst, Address(rscratch1, 0)); 8346 } 8347 } 8348 8349 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 8350 if (reachable(src)) { 8351 Assembler::ucomiss(dst, as_Address(src)); 8352 } else { 8353 lea(rscratch1, src); 8354 Assembler::ucomiss(dst, Address(rscratch1, 0)); 8355 } 8356 } 8357 8358 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 8359 // Used in sign-bit flipping with aligned address. 8360 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 8361 if (reachable(src)) { 8362 Assembler::xorpd(dst, as_Address(src)); 8363 } else { 8364 lea(rscratch1, src); 8365 Assembler::xorpd(dst, Address(rscratch1, 0)); 8366 } 8367 } 8368 8369 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 8370 // Used in sign-bit flipping with aligned address. 8371 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 8372 if (reachable(src)) { 8373 Assembler::xorps(dst, as_Address(src)); 8374 } else { 8375 lea(rscratch1, src); 8376 Assembler::xorps(dst, Address(rscratch1, 0)); 8377 } 8378 } 8379 8380 // AVX 3-operands instructions 8381 8382 void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8383 if (reachable(src)) { 8384 vaddsd(dst, nds, as_Address(src)); 8385 } else { 8386 lea(rscratch1, src); 8387 vaddsd(dst, nds, Address(rscratch1, 0)); 8388 } 8389 } 8390 8391 void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8392 if (reachable(src)) { 8393 vaddss(dst, nds, as_Address(src)); 8394 } else { 8395 lea(rscratch1, src); 8396 vaddss(dst, nds, Address(rscratch1, 0)); 8397 } 8398 } 8399 8400 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8401 if (reachable(src)) { 8402 vandpd(dst, nds, as_Address(src), vector256); 8403 } else { 8404 lea(rscratch1, src); 8405 vandpd(dst, nds, Address(rscratch1, 0), vector256); 8406 } 8407 } 8408 8409 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8410 if (reachable(src)) { 8411 vandps(dst, nds, as_Address(src), vector256); 8412 } else { 8413 lea(rscratch1, src); 8414 vandps(dst, nds, Address(rscratch1, 0), vector256); 8415 } 8416 } 8417 8418 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8419 if (reachable(src)) { 8420 vdivsd(dst, nds, as_Address(src)); 8421 } else { 8422 lea(rscratch1, src); 8423 vdivsd(dst, nds, Address(rscratch1, 0)); 8424 } 8425 } 8426 8427 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8428 if (reachable(src)) { 8429 vdivss(dst, nds, as_Address(src)); 8430 } else { 8431 lea(rscratch1, src); 8432 vdivss(dst, nds, Address(rscratch1, 0)); 8433 } 8434 } 8435 8436 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8437 if (reachable(src)) { 8438 vmulsd(dst, nds, as_Address(src)); 8439 } else { 8440 lea(rscratch1, src); 8441 vmulsd(dst, nds, Address(rscratch1, 0)); 8442 } 8443 } 8444 8445 void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8446 if (reachable(src)) { 8447 vmulss(dst, nds, as_Address(src)); 8448 } else { 8449 lea(rscratch1, src); 8450 vmulss(dst, nds, Address(rscratch1, 0)); 8451 } 8452 } 8453 8454 void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8455 if (reachable(src)) { 8456 vsubsd(dst, nds, as_Address(src)); 8457 } else { 8458 lea(rscratch1, src); 8459 vsubsd(dst, nds, Address(rscratch1, 0)); 8460 } 8461 } 8462 8463 void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8464 if (reachable(src)) { 8465 vsubss(dst, nds, as_Address(src)); 8466 } else { 8467 lea(rscratch1, src); 8468 vsubss(dst, nds, Address(rscratch1, 0)); 8469 } 8470 } 8471 8472 void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8473 if (reachable(src)) { 8474 vxorpd(dst, nds, as_Address(src), vector256); 8475 } else { 8476 lea(rscratch1, src); 8477 vxorpd(dst, nds, Address(rscratch1, 0), vector256); 8478 } 8479 } 8480 8481 void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8482 if (reachable(src)) { 8483 vxorps(dst, nds, as_Address(src), vector256); 8484 } else { 8485 lea(rscratch1, src); 8486 vxorps(dst, nds, Address(rscratch1, 0), vector256); 8487 } 8488 } 8489 8490 8491 ////////////////////////////////////////////////////////////////////////////////// 8492 #ifndef SERIALGC 8493 8494 void MacroAssembler::g1_write_barrier_pre(Register obj, 8495 Register pre_val, 8496 Register thread, 8497 Register tmp, 8498 bool tosca_live, 8499 bool expand_call) { 8500 8501 // If expand_call is true then we expand the call_VM_leaf macro 8502 // directly to skip generating the check by 8503 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 8504 8505 #ifdef _LP64 8506 assert(thread == r15_thread, "must be"); 8507 #endif // _LP64 8508 8509 Label done; 8510 Label runtime; 8511 8512 assert(pre_val != noreg, "check this code"); 8513 8514 if (obj != noreg) { 8515 assert_different_registers(obj, pre_val, tmp); 8516 assert(pre_val != rax, "check this code"); 8517 } 8518 8519 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 8520 PtrQueue::byte_offset_of_active())); 8521 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 8522 PtrQueue::byte_offset_of_index())); 8523 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 8524 PtrQueue::byte_offset_of_buf())); 8525 8526 8527 // Is marking active? 8528 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 8529 cmpl(in_progress, 0); 8530 } else { 8531 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 8532 cmpb(in_progress, 0); 8533 } 8534 jcc(Assembler::equal, done); 8535 8536 // Do we need to load the previous value? 8537 if (obj != noreg) { 8538 load_heap_oop(pre_val, Address(obj, 0)); 8539 } 8540 8541 // Is the previous value null? 8542 cmpptr(pre_val, (int32_t) NULL_WORD); 8543 jcc(Assembler::equal, done); 8544 8545 // Can we store original value in the thread's buffer? 8546 // Is index == 0? 8547 // (The index field is typed as size_t.) 8548 8549 movptr(tmp, index); // tmp := *index_adr 8550 cmpptr(tmp, 0); // tmp == 0? 8551 jcc(Assembler::equal, runtime); // If yes, goto runtime 8552 8553 subptr(tmp, wordSize); // tmp := tmp - wordSize 8554 movptr(index, tmp); // *index_adr := tmp 8555 addptr(tmp, buffer); // tmp := tmp + *buffer_adr 8556 8557 // Record the previous value 8558 movptr(Address(tmp, 0), pre_val); 8559 jmp(done); 8560 8561 bind(runtime); 8562 // save the live input values 8563 if(tosca_live) push(rax); 8564 8565 if (obj != noreg && obj != rax) 8566 push(obj); 8567 8568 if (pre_val != rax) 8569 push(pre_val); 8570 8571 // Calling the runtime using the regular call_VM_leaf mechanism generates 8572 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 8573 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 8574 // 8575 // If we care generating the pre-barrier without a frame (e.g. in the 8576 // intrinsified Reference.get() routine) then ebp might be pointing to 8577 // the caller frame and so this check will most likely fail at runtime. 8578 // 8579 // Expanding the call directly bypasses the generation of the check. 8580 // So when we do not have have a full interpreter frame on the stack 8581 // expand_call should be passed true. 8582 8583 NOT_LP64( push(thread); ) 8584 8585 if (expand_call) { 8586 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 8587 pass_arg1(this, thread); 8588 pass_arg0(this, pre_val); 8589 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 8590 } else { 8591 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 8592 } 8593 8594 NOT_LP64( pop(thread); ) 8595 8596 // save the live input values 8597 if (pre_val != rax) 8598 pop(pre_val); 8599 8600 if (obj != noreg && obj != rax) 8601 pop(obj); 8602 8603 if(tosca_live) pop(rax); 8604 8605 bind(done); 8606 } 8607 8608 void MacroAssembler::g1_write_barrier_post(Register store_addr, 8609 Register new_val, 8610 Register thread, 8611 Register tmp, 8612 Register tmp2) { 8613 #ifdef _LP64 8614 assert(thread == r15_thread, "must be"); 8615 #endif // _LP64 8616 8617 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 8618 PtrQueue::byte_offset_of_index())); 8619 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 8620 PtrQueue::byte_offset_of_buf())); 8621 8622 BarrierSet* bs = Universe::heap()->barrier_set(); 8623 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 8624 Label done; 8625 Label runtime; 8626 8627 // Does store cross heap regions? 8628 8629 movptr(tmp, store_addr); 8630 xorptr(tmp, new_val); 8631 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 8632 jcc(Assembler::equal, done); 8633 8634 // crosses regions, storing NULL? 8635 8636 cmpptr(new_val, (int32_t) NULL_WORD); 8637 jcc(Assembler::equal, done); 8638 8639 // storing region crossing non-NULL, is card already dirty? 8640 8641 ExternalAddress cardtable((address) ct->byte_map_base); 8642 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 8643 #ifdef _LP64 8644 const Register card_addr = tmp; 8645 8646 movq(card_addr, store_addr); 8647 shrq(card_addr, CardTableModRefBS::card_shift); 8648 8649 lea(tmp2, cardtable); 8650 8651 // get the address of the card 8652 addq(card_addr, tmp2); 8653 #else 8654 const Register card_index = tmp; 8655 8656 movl(card_index, store_addr); 8657 shrl(card_index, CardTableModRefBS::card_shift); 8658 8659 Address index(noreg, card_index, Address::times_1); 8660 const Register card_addr = tmp; 8661 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 8662 #endif 8663 cmpb(Address(card_addr, 0), 0); 8664 jcc(Assembler::equal, done); 8665 8666 // storing a region crossing, non-NULL oop, card is clean. 8667 // dirty card and log. 8668 8669 movb(Address(card_addr, 0), 0); 8670 8671 cmpl(queue_index, 0); 8672 jcc(Assembler::equal, runtime); 8673 subl(queue_index, wordSize); 8674 movptr(tmp2, buffer); 8675 #ifdef _LP64 8676 movslq(rscratch1, queue_index); 8677 addq(tmp2, rscratch1); 8678 movq(Address(tmp2, 0), card_addr); 8679 #else 8680 addl(tmp2, queue_index); 8681 movl(Address(tmp2, 0), card_index); 8682 #endif 8683 jmp(done); 8684 8685 bind(runtime); 8686 // save the live input values 8687 push(store_addr); 8688 push(new_val); 8689 #ifdef _LP64 8690 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 8691 #else 8692 push(thread); 8693 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 8694 pop(thread); 8695 #endif 8696 pop(new_val); 8697 pop(store_addr); 8698 8699 bind(done); 8700 } 8701 8702 #endif // SERIALGC 8703 ////////////////////////////////////////////////////////////////////////////////// 8704 8705 8706 void MacroAssembler::store_check(Register obj) { 8707 // Does a store check for the oop in register obj. The content of 8708 // register obj is destroyed afterwards. 8709 store_check_part_1(obj); 8710 store_check_part_2(obj); 8711 } 8712 8713 void MacroAssembler::store_check(Register obj, Address dst) { 8714 store_check(obj); 8715 } 8716 8717 8718 // split the store check operation so that other instructions can be scheduled inbetween 8719 void MacroAssembler::store_check_part_1(Register obj) { 8720 BarrierSet* bs = Universe::heap()->barrier_set(); 8721 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 8722 shrptr(obj, CardTableModRefBS::card_shift); 8723 } 8724 8725 void MacroAssembler::store_check_part_2(Register obj) { 8726 BarrierSet* bs = Universe::heap()->barrier_set(); 8727 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 8728 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 8729 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 8730 8731 // The calculation for byte_map_base is as follows: 8732 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 8733 // So this essentially converts an address to a displacement and 8734 // it will never need to be relocated. On 64bit however the value may be too 8735 // large for a 32bit displacement 8736 8737 intptr_t disp = (intptr_t) ct->byte_map_base; 8738 if (is_simm32(disp)) { 8739 Address cardtable(noreg, obj, Address::times_1, disp); 8740 movb(cardtable, 0); 8741 } else { 8742 // By doing it as an ExternalAddress disp could be converted to a rip-relative 8743 // displacement and done in a single instruction given favorable mapping and 8744 // a smarter version of as_Address. Worst case it is two instructions which 8745 // is no worse off then loading disp into a register and doing as a simple 8746 // Address() as above. 8747 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 8748 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 8749 // in some cases we'll get a single instruction version. 8750 8751 ExternalAddress cardtable((address)disp); 8752 Address index(noreg, obj, Address::times_1); 8753 movb(as_Address(ArrayAddress(cardtable, index)), 0); 8754 } 8755 } 8756 8757 void MacroAssembler::subptr(Register dst, int32_t imm32) { 8758 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 8759 } 8760 8761 // Force generation of a 4 byte immediate value even if it fits into 8bit 8762 void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { 8763 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); 8764 } 8765 8766 void MacroAssembler::subptr(Register dst, Register src) { 8767 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 8768 } 8769 8770 // C++ bool manipulation 8771 void MacroAssembler::testbool(Register dst) { 8772 if(sizeof(bool) == 1) 8773 testb(dst, 0xff); 8774 else if(sizeof(bool) == 2) { 8775 // testw implementation needed for two byte bools 8776 ShouldNotReachHere(); 8777 } else if(sizeof(bool) == 4) 8778 testl(dst, dst); 8779 else 8780 // unsupported 8781 ShouldNotReachHere(); 8782 } 8783 8784 void MacroAssembler::testptr(Register dst, Register src) { 8785 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 8786 } 8787 8788 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 8789 void MacroAssembler::tlab_allocate(Register obj, 8790 Register var_size_in_bytes, 8791 int con_size_in_bytes, 8792 Register t1, 8793 Register t2, 8794 Label& slow_case) { 8795 assert_different_registers(obj, t1, t2); 8796 assert_different_registers(obj, var_size_in_bytes, t1); 8797 Register end = t2; 8798 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 8799 8800 verify_tlab(); 8801 8802 NOT_LP64(get_thread(thread)); 8803 8804 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 8805 if (var_size_in_bytes == noreg) { 8806 lea(end, Address(obj, con_size_in_bytes)); 8807 } else { 8808 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 8809 } 8810 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 8811 jcc(Assembler::above, slow_case); 8812 8813 // update the tlab top pointer 8814 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 8815 8816 // recover var_size_in_bytes if necessary 8817 if (var_size_in_bytes == end) { 8818 subptr(var_size_in_bytes, obj); 8819 } 8820 verify_tlab(); 8821 } 8822 8823 // Preserves rbx, and rdx. 8824 Register MacroAssembler::tlab_refill(Label& retry, 8825 Label& try_eden, 8826 Label& slow_case) { 8827 Register top = rax; 8828 Register t1 = rcx; 8829 Register t2 = rsi; 8830 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 8831 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 8832 Label do_refill, discard_tlab; 8833 8834 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 8835 // No allocation in the shared eden. 8836 jmp(slow_case); 8837 } 8838 8839 NOT_LP64(get_thread(thread_reg)); 8840 8841 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8842 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 8843 8844 // calculate amount of free space 8845 subptr(t1, top); 8846 shrptr(t1, LogHeapWordSize); 8847 8848 // Retain tlab and allocate object in shared space if 8849 // the amount free in the tlab is too large to discard. 8850 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 8851 jcc(Assembler::lessEqual, discard_tlab); 8852 8853 // Retain 8854 // %%% yuck as movptr... 8855 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 8856 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 8857 if (TLABStats) { 8858 // increment number of slow_allocations 8859 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 8860 } 8861 jmp(try_eden); 8862 8863 bind(discard_tlab); 8864 if (TLABStats) { 8865 // increment number of refills 8866 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 8867 // accumulate wastage -- t1 is amount free in tlab 8868 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 8869 } 8870 8871 // if tlab is currently allocated (top or end != null) then 8872 // fill [top, end + alignment_reserve) with array object 8873 testptr(top, top); 8874 jcc(Assembler::zero, do_refill); 8875 8876 // set up the mark word 8877 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 8878 // set the length to the remaining space 8879 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 8880 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 8881 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 8882 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 8883 // set klass to intArrayKlass 8884 // dubious reloc why not an oop reloc? 8885 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); 8886 // store klass last. concurrent gcs assumes klass length is valid if 8887 // klass field is not null. 8888 store_klass(top, t1); 8889 8890 movptr(t1, top); 8891 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 8892 incr_allocated_bytes(thread_reg, t1, 0); 8893 8894 // refill the tlab with an eden allocation 8895 bind(do_refill); 8896 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 8897 shlptr(t1, LogHeapWordSize); 8898 // allocate new tlab, address returned in top 8899 eden_allocate(top, t1, 0, t2, slow_case); 8900 8901 // Check that t1 was preserved in eden_allocate. 8902 #ifdef ASSERT 8903 if (UseTLAB) { 8904 Label ok; 8905 Register tsize = rsi; 8906 assert_different_registers(tsize, thread_reg, t1); 8907 push(tsize); 8908 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 8909 shlptr(tsize, LogHeapWordSize); 8910 cmpptr(t1, tsize); 8911 jcc(Assembler::equal, ok); 8912 STOP("assert(t1 != tlab size)"); 8913 should_not_reach_here(); 8914 8915 bind(ok); 8916 pop(tsize); 8917 } 8918 #endif 8919 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 8920 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 8921 addptr(top, t1); 8922 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 8923 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 8924 verify_tlab(); 8925 jmp(retry); 8926 8927 return thread_reg; // for use by caller 8928 } 8929 8930 void MacroAssembler::incr_allocated_bytes(Register thread, 8931 Register var_size_in_bytes, 8932 int con_size_in_bytes, 8933 Register t1) { 8934 if (!thread->is_valid()) { 8935 #ifdef _LP64 8936 thread = r15_thread; 8937 #else 8938 assert(t1->is_valid(), "need temp reg"); 8939 thread = t1; 8940 get_thread(thread); 8941 #endif 8942 } 8943 8944 #ifdef _LP64 8945 if (var_size_in_bytes->is_valid()) { 8946 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 8947 } else { 8948 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 8949 } 8950 #else 8951 if (var_size_in_bytes->is_valid()) { 8952 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 8953 } else { 8954 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 8955 } 8956 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); 8957 #endif 8958 } 8959 8960 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { 8961 pusha(); 8962 8963 // if we are coming from c1, xmm registers may be live 8964 int off = 0; 8965 if (UseSSE == 1) { 8966 subptr(rsp, sizeof(jdouble)*8); 8967 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); 8968 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); 8969 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); 8970 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); 8971 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); 8972 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); 8973 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); 8974 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); 8975 } else if (UseSSE >= 2) { 8976 #ifdef COMPILER2 8977 if (MaxVectorSize > 16) { 8978 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); 8979 // Save upper half of YMM registes 8980 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 8981 vextractf128h(Address(rsp, 0),xmm0); 8982 vextractf128h(Address(rsp, 16),xmm1); 8983 vextractf128h(Address(rsp, 32),xmm2); 8984 vextractf128h(Address(rsp, 48),xmm3); 8985 vextractf128h(Address(rsp, 64),xmm4); 8986 vextractf128h(Address(rsp, 80),xmm5); 8987 vextractf128h(Address(rsp, 96),xmm6); 8988 vextractf128h(Address(rsp,112),xmm7); 8989 #ifdef _LP64 8990 vextractf128h(Address(rsp,128),xmm8); 8991 vextractf128h(Address(rsp,144),xmm9); 8992 vextractf128h(Address(rsp,160),xmm10); 8993 vextractf128h(Address(rsp,176),xmm11); 8994 vextractf128h(Address(rsp,192),xmm12); 8995 vextractf128h(Address(rsp,208),xmm13); 8996 vextractf128h(Address(rsp,224),xmm14); 8997 vextractf128h(Address(rsp,240),xmm15); 8998 #endif 8999 } 9000 #endif 9001 // Save whole 128bit (16 bytes) XMM regiters 9002 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 9003 movdqu(Address(rsp,off++*16),xmm0); 9004 movdqu(Address(rsp,off++*16),xmm1); 9005 movdqu(Address(rsp,off++*16),xmm2); 9006 movdqu(Address(rsp,off++*16),xmm3); 9007 movdqu(Address(rsp,off++*16),xmm4); 9008 movdqu(Address(rsp,off++*16),xmm5); 9009 movdqu(Address(rsp,off++*16),xmm6); 9010 movdqu(Address(rsp,off++*16),xmm7); 9011 #ifdef _LP64 9012 movdqu(Address(rsp,off++*16),xmm8); 9013 movdqu(Address(rsp,off++*16),xmm9); 9014 movdqu(Address(rsp,off++*16),xmm10); 9015 movdqu(Address(rsp,off++*16),xmm11); 9016 movdqu(Address(rsp,off++*16),xmm12); 9017 movdqu(Address(rsp,off++*16),xmm13); 9018 movdqu(Address(rsp,off++*16),xmm14); 9019 movdqu(Address(rsp,off++*16),xmm15); 9020 #endif 9021 } 9022 9023 // Preserve registers across runtime call 9024 int incoming_argument_and_return_value_offset = -1; 9025 if (num_fpu_regs_in_use > 1) { 9026 // Must preserve all other FPU regs (could alternatively convert 9027 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash 9028 // FPU state, but can not trust C compiler) 9029 NEEDS_CLEANUP; 9030 // NOTE that in this case we also push the incoming argument(s) to 9031 // the stack and restore it later; we also use this stack slot to 9032 // hold the return value from dsin, dcos etc. 9033 for (int i = 0; i < num_fpu_regs_in_use; i++) { 9034 subptr(rsp, sizeof(jdouble)); 9035 fstp_d(Address(rsp, 0)); 9036 } 9037 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 9038 for (int i = nb_args-1; i >= 0; i--) { 9039 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); 9040 } 9041 } 9042 9043 subptr(rsp, nb_args*sizeof(jdouble)); 9044 for (int i = 0; i < nb_args; i++) { 9045 fstp_d(Address(rsp, i*sizeof(jdouble))); 9046 } 9047 9048 #ifdef _LP64 9049 if (nb_args > 0) { 9050 movdbl(xmm0, Address(rsp, 0)); 9051 } 9052 if (nb_args > 1) { 9053 movdbl(xmm1, Address(rsp, sizeof(jdouble))); 9054 } 9055 assert(nb_args <= 2, "unsupported number of args"); 9056 #endif // _LP64 9057 9058 // NOTE: we must not use call_VM_leaf here because that requires a 9059 // complete interpreter frame in debug mode -- same bug as 4387334 9060 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 9061 // do proper 64bit abi 9062 9063 NEEDS_CLEANUP; 9064 // Need to add stack banging before this runtime call if it needs to 9065 // be taken; however, there is no generic stack banging routine at 9066 // the MacroAssembler level 9067 9068 MacroAssembler::call_VM_leaf_base(runtime_entry, 0); 9069 9070 #ifdef _LP64 9071 movsd(Address(rsp, 0), xmm0); 9072 fld_d(Address(rsp, 0)); 9073 #endif // _LP64 9074 addptr(rsp, sizeof(jdouble) * nb_args); 9075 if (num_fpu_regs_in_use > 1) { 9076 // Must save return value to stack and then restore entire FPU 9077 // stack except incoming arguments 9078 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 9079 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { 9080 fld_d(Address(rsp, 0)); 9081 addptr(rsp, sizeof(jdouble)); 9082 } 9083 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); 9084 addptr(rsp, sizeof(jdouble) * nb_args); 9085 } 9086 9087 off = 0; 9088 if (UseSSE == 1) { 9089 movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); 9090 movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); 9091 movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); 9092 movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); 9093 movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); 9094 movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); 9095 movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); 9096 movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); 9097 addptr(rsp, sizeof(jdouble)*8); 9098 } else if (UseSSE >= 2) { 9099 // Restore whole 128bit (16 bytes) XMM regiters 9100 movdqu(xmm0, Address(rsp,off++*16)); 9101 movdqu(xmm1, Address(rsp,off++*16)); 9102 movdqu(xmm2, Address(rsp,off++*16)); 9103 movdqu(xmm3, Address(rsp,off++*16)); 9104 movdqu(xmm4, Address(rsp,off++*16)); 9105 movdqu(xmm5, Address(rsp,off++*16)); 9106 movdqu(xmm6, Address(rsp,off++*16)); 9107 movdqu(xmm7, Address(rsp,off++*16)); 9108 #ifdef _LP64 9109 movdqu(xmm8, Address(rsp,off++*16)); 9110 movdqu(xmm9, Address(rsp,off++*16)); 9111 movdqu(xmm10, Address(rsp,off++*16)); 9112 movdqu(xmm11, Address(rsp,off++*16)); 9113 movdqu(xmm12, Address(rsp,off++*16)); 9114 movdqu(xmm13, Address(rsp,off++*16)); 9115 movdqu(xmm14, Address(rsp,off++*16)); 9116 movdqu(xmm15, Address(rsp,off++*16)); 9117 #endif 9118 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 9119 #ifdef COMPILER2 9120 if (MaxVectorSize > 16) { 9121 // Restore upper half of YMM registes. 9122 vinsertf128h(xmm0, Address(rsp, 0)); 9123 vinsertf128h(xmm1, Address(rsp, 16)); 9124 vinsertf128h(xmm2, Address(rsp, 32)); 9125 vinsertf128h(xmm3, Address(rsp, 48)); 9126 vinsertf128h(xmm4, Address(rsp, 64)); 9127 vinsertf128h(xmm5, Address(rsp, 80)); 9128 vinsertf128h(xmm6, Address(rsp, 96)); 9129 vinsertf128h(xmm7, Address(rsp,112)); 9130 #ifdef _LP64 9131 vinsertf128h(xmm8, Address(rsp,128)); 9132 vinsertf128h(xmm9, Address(rsp,144)); 9133 vinsertf128h(xmm10, Address(rsp,160)); 9134 vinsertf128h(xmm11, Address(rsp,176)); 9135 vinsertf128h(xmm12, Address(rsp,192)); 9136 vinsertf128h(xmm13, Address(rsp,208)); 9137 vinsertf128h(xmm14, Address(rsp,224)); 9138 vinsertf128h(xmm15, Address(rsp,240)); 9139 #endif 9140 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 9141 } 9142 #endif 9143 } 9144 popa(); 9145 } 9146 9147 static const double pi_4 = 0.7853981633974483; 9148 9149 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 9150 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 9151 // was attempted in this code; unfortunately it appears that the 9152 // switch to 80-bit precision and back causes this to be 9153 // unprofitable compared with simply performing a runtime call if 9154 // the argument is out of the (-pi/4, pi/4) range. 9155 9156 Register tmp = noreg; 9157 if (!VM_Version::supports_cmov()) { 9158 // fcmp needs a temporary so preserve rbx, 9159 tmp = rbx; 9160 push(tmp); 9161 } 9162 9163 Label slow_case, done; 9164 9165 ExternalAddress pi4_adr = (address)&pi_4; 9166 if (reachable(pi4_adr)) { 9167 // x ?<= pi/4 9168 fld_d(pi4_adr); 9169 fld_s(1); // Stack: X PI/4 X 9170 fabs(); // Stack: |X| PI/4 X 9171 fcmp(tmp); 9172 jcc(Assembler::above, slow_case); 9173 9174 // fastest case: -pi/4 <= x <= pi/4 9175 switch(trig) { 9176 case 's': 9177 fsin(); 9178 break; 9179 case 'c': 9180 fcos(); 9181 break; 9182 case 't': 9183 ftan(); 9184 break; 9185 default: 9186 assert(false, "bad intrinsic"); 9187 break; 9188 } 9189 jmp(done); 9190 } 9191 9192 // slow case: runtime call 9193 bind(slow_case); 9194 9195 switch(trig) { 9196 case 's': 9197 { 9198 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); 9199 } 9200 break; 9201 case 'c': 9202 { 9203 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); 9204 } 9205 break; 9206 case 't': 9207 { 9208 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); 9209 } 9210 break; 9211 default: 9212 assert(false, "bad intrinsic"); 9213 break; 9214 } 9215 9216 // Come here with result in F-TOS 9217 bind(done); 9218 9219 if (tmp != noreg) { 9220 pop(tmp); 9221 } 9222 } 9223 9224 9225 // Look up the method for a megamorphic invokeinterface call. 9226 // The target method is determined by <intf_klass, itable_index>. 9227 // The receiver klass is in recv_klass. 9228 // On success, the result will be in method_result, and execution falls through. 9229 // On failure, execution transfers to the given label. 9230 void MacroAssembler::lookup_interface_method(Register recv_klass, 9231 Register intf_klass, 9232 RegisterOrConstant itable_index, 9233 Register method_result, 9234 Register scan_temp, 9235 Label& L_no_such_interface) { 9236 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 9237 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 9238 "caller must use same register for non-constant itable index as for method"); 9239 9240 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 9241 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; 9242 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 9243 int scan_step = itableOffsetEntry::size() * wordSize; 9244 int vte_size = vtableEntry::size() * wordSize; 9245 Address::ScaleFactor times_vte_scale = Address::times_ptr; 9246 assert(vte_size == wordSize, "else adjust times_vte_scale"); 9247 9248 movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); 9249 9250 // %%% Could store the aligned, prescaled offset in the klassoop. 9251 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 9252 if (HeapWordsPerLong > 1) { 9253 // Round up to align_object_offset boundary 9254 // see code for InstanceKlass::start_of_itable! 9255 round_to(scan_temp, BytesPerLong); 9256 } 9257 9258 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 9259 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 9260 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 9261 9262 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 9263 // if (scan->interface() == intf) { 9264 // result = (klass + scan->offset() + itable_index); 9265 // } 9266 // } 9267 Label search, found_method; 9268 9269 for (int peel = 1; peel >= 0; peel--) { 9270 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 9271 cmpptr(intf_klass, method_result); 9272 9273 if (peel) { 9274 jccb(Assembler::equal, found_method); 9275 } else { 9276 jccb(Assembler::notEqual, search); 9277 // (invert the test to fall through to found_method...) 9278 } 9279 9280 if (!peel) break; 9281 9282 bind(search); 9283 9284 // Check that the previous entry is non-null. A null entry means that 9285 // the receiver class doesn't implement the interface, and wasn't the 9286 // same as when the caller was compiled. 9287 testptr(method_result, method_result); 9288 jcc(Assembler::zero, L_no_such_interface); 9289 addptr(scan_temp, scan_step); 9290 } 9291 9292 bind(found_method); 9293 9294 // Got a hit. 9295 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 9296 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 9297 } 9298 9299 9300 // virtual method calling 9301 void MacroAssembler::lookup_virtual_method(Register recv_klass, 9302 RegisterOrConstant vtable_index, 9303 Register method_result) { 9304 const int base = InstanceKlass::vtable_start_offset() * wordSize; 9305 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); 9306 Address vtable_entry_addr(recv_klass, 9307 vtable_index, Address::times_ptr, 9308 base + vtableEntry::method_offset_in_bytes()); 9309 movptr(method_result, vtable_entry_addr); 9310 } 9311 9312 9313 void MacroAssembler::check_klass_subtype(Register sub_klass, 9314 Register super_klass, 9315 Register temp_reg, 9316 Label& L_success) { 9317 Label L_failure; 9318 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 9319 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 9320 bind(L_failure); 9321 } 9322 9323 9324 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 9325 Register super_klass, 9326 Register temp_reg, 9327 Label* L_success, 9328 Label* L_failure, 9329 Label* L_slow_path, 9330 RegisterOrConstant super_check_offset) { 9331 assert_different_registers(sub_klass, super_klass, temp_reg); 9332 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 9333 if (super_check_offset.is_register()) { 9334 assert_different_registers(sub_klass, super_klass, 9335 super_check_offset.as_register()); 9336 } else if (must_load_sco) { 9337 assert(temp_reg != noreg, "supply either a temp or a register offset"); 9338 } 9339 9340 Label L_fallthrough; 9341 int label_nulls = 0; 9342 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 9343 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 9344 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 9345 assert(label_nulls <= 1, "at most one NULL in the batch"); 9346 9347 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 9348 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 9349 Address super_check_offset_addr(super_klass, sco_offset); 9350 9351 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 9352 // range of a jccb. If this routine grows larger, reconsider at 9353 // least some of these. 9354 #define local_jcc(assembler_cond, label) \ 9355 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 9356 else jcc( assembler_cond, label) /*omit semi*/ 9357 9358 // Hacked jmp, which may only be used just before L_fallthrough. 9359 #define final_jmp(label) \ 9360 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 9361 else jmp(label) /*omit semi*/ 9362 9363 // If the pointers are equal, we are done (e.g., String[] elements). 9364 // This self-check enables sharing of secondary supertype arrays among 9365 // non-primary types such as array-of-interface. Otherwise, each such 9366 // type would need its own customized SSA. 9367 // We move this check to the front of the fast path because many 9368 // type checks are in fact trivially successful in this manner, 9369 // so we get a nicely predicted branch right at the start of the check. 9370 cmpptr(sub_klass, super_klass); 9371 local_jcc(Assembler::equal, *L_success); 9372 9373 // Check the supertype display: 9374 if (must_load_sco) { 9375 // Positive movl does right thing on LP64. 9376 movl(temp_reg, super_check_offset_addr); 9377 super_check_offset = RegisterOrConstant(temp_reg); 9378 } 9379 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 9380 cmpptr(super_klass, super_check_addr); // load displayed supertype 9381 9382 // This check has worked decisively for primary supers. 9383 // Secondary supers are sought in the super_cache ('super_cache_addr'). 9384 // (Secondary supers are interfaces and very deeply nested subtypes.) 9385 // This works in the same check above because of a tricky aliasing 9386 // between the super_cache and the primary super display elements. 9387 // (The 'super_check_addr' can address either, as the case requires.) 9388 // Note that the cache is updated below if it does not help us find 9389 // what we need immediately. 9390 // So if it was a primary super, we can just fail immediately. 9391 // Otherwise, it's the slow path for us (no success at this point). 9392 9393 if (super_check_offset.is_register()) { 9394 local_jcc(Assembler::equal, *L_success); 9395 cmpl(super_check_offset.as_register(), sc_offset); 9396 if (L_failure == &L_fallthrough) { 9397 local_jcc(Assembler::equal, *L_slow_path); 9398 } else { 9399 local_jcc(Assembler::notEqual, *L_failure); 9400 final_jmp(*L_slow_path); 9401 } 9402 } else if (super_check_offset.as_constant() == sc_offset) { 9403 // Need a slow path; fast failure is impossible. 9404 if (L_slow_path == &L_fallthrough) { 9405 local_jcc(Assembler::equal, *L_success); 9406 } else { 9407 local_jcc(Assembler::notEqual, *L_slow_path); 9408 final_jmp(*L_success); 9409 } 9410 } else { 9411 // No slow path; it's a fast decision. 9412 if (L_failure == &L_fallthrough) { 9413 local_jcc(Assembler::equal, *L_success); 9414 } else { 9415 local_jcc(Assembler::notEqual, *L_failure); 9416 final_jmp(*L_success); 9417 } 9418 } 9419 9420 bind(L_fallthrough); 9421 9422 #undef local_jcc 9423 #undef final_jmp 9424 } 9425 9426 9427 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 9428 Register super_klass, 9429 Register temp_reg, 9430 Register temp2_reg, 9431 Label* L_success, 9432 Label* L_failure, 9433 bool set_cond_codes) { 9434 assert_different_registers(sub_klass, super_klass, temp_reg); 9435 if (temp2_reg != noreg) 9436 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 9437 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 9438 9439 Label L_fallthrough; 9440 int label_nulls = 0; 9441 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 9442 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 9443 assert(label_nulls <= 1, "at most one NULL in the batch"); 9444 9445 // a couple of useful fields in sub_klass: 9446 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 9447 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 9448 Address secondary_supers_addr(sub_klass, ss_offset); 9449 Address super_cache_addr( sub_klass, sc_offset); 9450 9451 // Do a linear scan of the secondary super-klass chain. 9452 // This code is rarely used, so simplicity is a virtue here. 9453 // The repne_scan instruction uses fixed registers, which we must spill. 9454 // Don't worry too much about pre-existing connections with the input regs. 9455 9456 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 9457 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 9458 9459 // Get super_klass value into rax (even if it was in rdi or rcx). 9460 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 9461 if (super_klass != rax || UseCompressedOops) { 9462 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 9463 mov(rax, super_klass); 9464 } 9465 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 9466 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 9467 9468 #ifndef PRODUCT 9469 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 9470 ExternalAddress pst_counter_addr((address) pst_counter); 9471 NOT_LP64( incrementl(pst_counter_addr) ); 9472 LP64_ONLY( lea(rcx, pst_counter_addr) ); 9473 LP64_ONLY( incrementl(Address(rcx, 0)) ); 9474 #endif //PRODUCT 9475 9476 // We will consult the secondary-super array. 9477 movptr(rdi, secondary_supers_addr); 9478 // Load the array length. (Positive movl does right thing on LP64.) 9479 movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes())); 9480 // Skip to start of data. 9481 addptr(rdi, Array<Klass*>::base_offset_in_bytes()); 9482 9483 // Scan RCX words at [RDI] for an occurrence of RAX. 9484 // Set NZ/Z based on last compare. 9485 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 9486 // not change flags (only scas instruction which is repeated sets flags). 9487 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 9488 9489 testptr(rax,rax); // Set Z = 0 9490 repne_scan(); 9491 9492 // Unspill the temp. registers: 9493 if (pushed_rdi) pop(rdi); 9494 if (pushed_rcx) pop(rcx); 9495 if (pushed_rax) pop(rax); 9496 9497 if (set_cond_codes) { 9498 // Special hack for the AD files: rdi is guaranteed non-zero. 9499 assert(!pushed_rdi, "rdi must be left non-NULL"); 9500 // Also, the condition codes are properly set Z/NZ on succeed/failure. 9501 } 9502 9503 if (L_failure == &L_fallthrough) 9504 jccb(Assembler::notEqual, *L_failure); 9505 else jcc(Assembler::notEqual, *L_failure); 9506 9507 // Success. Cache the super we found and proceed in triumph. 9508 movptr(super_cache_addr, super_klass); 9509 9510 if (L_success != &L_fallthrough) { 9511 jmp(*L_success); 9512 } 9513 9514 #undef IS_A_TEMP 9515 9516 bind(L_fallthrough); 9517 } 9518 9519 9520 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 9521 if (VM_Version::supports_cmov()) { 9522 cmovl(cc, dst, src); 9523 } else { 9524 Label L; 9525 jccb(negate_condition(cc), L); 9526 movl(dst, src); 9527 bind(L); 9528 } 9529 } 9530 9531 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { 9532 if (VM_Version::supports_cmov()) { 9533 cmovl(cc, dst, src); 9534 } else { 9535 Label L; 9536 jccb(negate_condition(cc), L); 9537 movl(dst, src); 9538 bind(L); 9539 } 9540 } 9541 9542 void MacroAssembler::verify_oop(Register reg, const char* s) { 9543 if (!VerifyOops) return; 9544 9545 // Pass register number to verify_oop_subroutine 9546 char* b = new char[strlen(s) + 50]; 9547 sprintf(b, "verify_oop: %s: %s", reg->name(), s); 9548 BLOCK_COMMENT("verify_oop {"); 9549 #ifdef _LP64 9550 push(rscratch1); // save r10, trashed by movptr() 9551 #endif 9552 push(rax); // save rax, 9553 push(reg); // pass register argument 9554 ExternalAddress buffer((address) b); 9555 // avoid using pushptr, as it modifies scratch registers 9556 // and our contract is not to modify anything 9557 movptr(rax, buffer.addr()); 9558 push(rax); 9559 // call indirectly to solve generation ordering problem 9560 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 9561 call(rax); 9562 // Caller pops the arguments (oop, message) and restores rax, r10 9563 BLOCK_COMMENT("} verify_oop"); 9564 } 9565 9566 9567 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 9568 Register tmp, 9569 int offset) { 9570 intptr_t value = *delayed_value_addr; 9571 if (value != 0) 9572 return RegisterOrConstant(value + offset); 9573 9574 // load indirectly to solve generation ordering problem 9575 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 9576 9577 #ifdef ASSERT 9578 { Label L; 9579 testptr(tmp, tmp); 9580 if (WizardMode) { 9581 jcc(Assembler::notZero, L); 9582 char* buf = new char[40]; 9583 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 9584 STOP(buf); 9585 } else { 9586 jccb(Assembler::notZero, L); 9587 hlt(); 9588 } 9589 bind(L); 9590 } 9591 #endif 9592 9593 if (offset != 0) 9594 addptr(tmp, offset); 9595 9596 return RegisterOrConstant(tmp); 9597 } 9598 9599 9600 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 9601 int extra_slot_offset) { 9602 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 9603 int stackElementSize = Interpreter::stackElementSize; 9604 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 9605 #ifdef ASSERT 9606 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 9607 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 9608 #endif 9609 Register scale_reg = noreg; 9610 Address::ScaleFactor scale_factor = Address::no_scale; 9611 if (arg_slot.is_constant()) { 9612 offset += arg_slot.as_constant() * stackElementSize; 9613 } else { 9614 scale_reg = arg_slot.as_register(); 9615 scale_factor = Address::times(stackElementSize); 9616 } 9617 offset += wordSize; // return PC is on stack 9618 return Address(rsp, scale_reg, scale_factor, offset); 9619 } 9620 9621 9622 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 9623 if (!VerifyOops) return; 9624 9625 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 9626 // Pass register number to verify_oop_subroutine 9627 char* b = new char[strlen(s) + 50]; 9628 sprintf(b, "verify_oop_addr: %s", s); 9629 9630 #ifdef _LP64 9631 push(rscratch1); // save r10, trashed by movptr() 9632 #endif 9633 push(rax); // save rax, 9634 // addr may contain rsp so we will have to adjust it based on the push 9635 // we just did (and on 64 bit we do two pushes) 9636 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 9637 // stores rax into addr which is backwards of what was intended. 9638 if (addr.uses(rsp)) { 9639 lea(rax, addr); 9640 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); 9641 } else { 9642 pushptr(addr); 9643 } 9644 9645 ExternalAddress buffer((address) b); 9646 // pass msg argument 9647 // avoid using pushptr, as it modifies scratch registers 9648 // and our contract is not to modify anything 9649 movptr(rax, buffer.addr()); 9650 push(rax); 9651 9652 // call indirectly to solve generation ordering problem 9653 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 9654 call(rax); 9655 // Caller pops the arguments (addr, message) and restores rax, r10. 9656 } 9657 9658 void MacroAssembler::verify_tlab() { 9659 #ifdef ASSERT 9660 if (UseTLAB && VerifyOops) { 9661 Label next, ok; 9662 Register t1 = rsi; 9663 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 9664 9665 push(t1); 9666 NOT_LP64(push(thread_reg)); 9667 NOT_LP64(get_thread(thread_reg)); 9668 9669 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 9670 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 9671 jcc(Assembler::aboveEqual, next); 9672 STOP("assert(top >= start)"); 9673 should_not_reach_here(); 9674 9675 bind(next); 9676 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 9677 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 9678 jcc(Assembler::aboveEqual, ok); 9679 STOP("assert(top <= end)"); 9680 should_not_reach_here(); 9681 9682 bind(ok); 9683 NOT_LP64(pop(thread_reg)); 9684 pop(t1); 9685 } 9686 #endif 9687 } 9688 9689 class ControlWord { 9690 public: 9691 int32_t _value; 9692 9693 int rounding_control() const { return (_value >> 10) & 3 ; } 9694 int precision_control() const { return (_value >> 8) & 3 ; } 9695 bool precision() const { return ((_value >> 5) & 1) != 0; } 9696 bool underflow() const { return ((_value >> 4) & 1) != 0; } 9697 bool overflow() const { return ((_value >> 3) & 1) != 0; } 9698 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 9699 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 9700 bool invalid() const { return ((_value >> 0) & 1) != 0; } 9701 9702 void print() const { 9703 // rounding control 9704 const char* rc; 9705 switch (rounding_control()) { 9706 case 0: rc = "round near"; break; 9707 case 1: rc = "round down"; break; 9708 case 2: rc = "round up "; break; 9709 case 3: rc = "chop "; break; 9710 }; 9711 // precision control 9712 const char* pc; 9713 switch (precision_control()) { 9714 case 0: pc = "24 bits "; break; 9715 case 1: pc = "reserved"; break; 9716 case 2: pc = "53 bits "; break; 9717 case 3: pc = "64 bits "; break; 9718 }; 9719 // flags 9720 char f[9]; 9721 f[0] = ' '; 9722 f[1] = ' '; 9723 f[2] = (precision ()) ? 'P' : 'p'; 9724 f[3] = (underflow ()) ? 'U' : 'u'; 9725 f[4] = (overflow ()) ? 'O' : 'o'; 9726 f[5] = (zero_divide ()) ? 'Z' : 'z'; 9727 f[6] = (denormalized()) ? 'D' : 'd'; 9728 f[7] = (invalid ()) ? 'I' : 'i'; 9729 f[8] = '\x0'; 9730 // output 9731 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 9732 } 9733 9734 }; 9735 9736 class StatusWord { 9737 public: 9738 int32_t _value; 9739 9740 bool busy() const { return ((_value >> 15) & 1) != 0; } 9741 bool C3() const { return ((_value >> 14) & 1) != 0; } 9742 bool C2() const { return ((_value >> 10) & 1) != 0; } 9743 bool C1() const { return ((_value >> 9) & 1) != 0; } 9744 bool C0() const { return ((_value >> 8) & 1) != 0; } 9745 int top() const { return (_value >> 11) & 7 ; } 9746 bool error_status() const { return ((_value >> 7) & 1) != 0; } 9747 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 9748 bool precision() const { return ((_value >> 5) & 1) != 0; } 9749 bool underflow() const { return ((_value >> 4) & 1) != 0; } 9750 bool overflow() const { return ((_value >> 3) & 1) != 0; } 9751 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 9752 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 9753 bool invalid() const { return ((_value >> 0) & 1) != 0; } 9754 9755 void print() const { 9756 // condition codes 9757 char c[5]; 9758 c[0] = (C3()) ? '3' : '-'; 9759 c[1] = (C2()) ? '2' : '-'; 9760 c[2] = (C1()) ? '1' : '-'; 9761 c[3] = (C0()) ? '0' : '-'; 9762 c[4] = '\x0'; 9763 // flags 9764 char f[9]; 9765 f[0] = (error_status()) ? 'E' : '-'; 9766 f[1] = (stack_fault ()) ? 'S' : '-'; 9767 f[2] = (precision ()) ? 'P' : '-'; 9768 f[3] = (underflow ()) ? 'U' : '-'; 9769 f[4] = (overflow ()) ? 'O' : '-'; 9770 f[5] = (zero_divide ()) ? 'Z' : '-'; 9771 f[6] = (denormalized()) ? 'D' : '-'; 9772 f[7] = (invalid ()) ? 'I' : '-'; 9773 f[8] = '\x0'; 9774 // output 9775 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 9776 } 9777 9778 }; 9779 9780 class TagWord { 9781 public: 9782 int32_t _value; 9783 9784 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 9785 9786 void print() const { 9787 printf("%04x", _value & 0xFFFF); 9788 } 9789 9790 }; 9791 9792 class FPU_Register { 9793 public: 9794 int32_t _m0; 9795 int32_t _m1; 9796 int16_t _ex; 9797 9798 bool is_indefinite() const { 9799 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 9800 } 9801 9802 void print() const { 9803 char sign = (_ex < 0) ? '-' : '+'; 9804 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 9805 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 9806 }; 9807 9808 }; 9809 9810 class FPU_State { 9811 public: 9812 enum { 9813 register_size = 10, 9814 number_of_registers = 8, 9815 register_mask = 7 9816 }; 9817 9818 ControlWord _control_word; 9819 StatusWord _status_word; 9820 TagWord _tag_word; 9821 int32_t _error_offset; 9822 int32_t _error_selector; 9823 int32_t _data_offset; 9824 int32_t _data_selector; 9825 int8_t _register[register_size * number_of_registers]; 9826 9827 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 9828 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 9829 9830 const char* tag_as_string(int tag) const { 9831 switch (tag) { 9832 case 0: return "valid"; 9833 case 1: return "zero"; 9834 case 2: return "special"; 9835 case 3: return "empty"; 9836 } 9837 ShouldNotReachHere(); 9838 return NULL; 9839 } 9840 9841 void print() const { 9842 // print computation registers 9843 { int t = _status_word.top(); 9844 for (int i = 0; i < number_of_registers; i++) { 9845 int j = (i - t) & register_mask; 9846 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 9847 st(j)->print(); 9848 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 9849 } 9850 } 9851 printf("\n"); 9852 // print control registers 9853 printf("ctrl = "); _control_word.print(); printf("\n"); 9854 printf("stat = "); _status_word .print(); printf("\n"); 9855 printf("tags = "); _tag_word .print(); printf("\n"); 9856 } 9857 9858 }; 9859 9860 class Flag_Register { 9861 public: 9862 int32_t _value; 9863 9864 bool overflow() const { return ((_value >> 11) & 1) != 0; } 9865 bool direction() const { return ((_value >> 10) & 1) != 0; } 9866 bool sign() const { return ((_value >> 7) & 1) != 0; } 9867 bool zero() const { return ((_value >> 6) & 1) != 0; } 9868 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 9869 bool parity() const { return ((_value >> 2) & 1) != 0; } 9870 bool carry() const { return ((_value >> 0) & 1) != 0; } 9871 9872 void print() const { 9873 // flags 9874 char f[8]; 9875 f[0] = (overflow ()) ? 'O' : '-'; 9876 f[1] = (direction ()) ? 'D' : '-'; 9877 f[2] = (sign ()) ? 'S' : '-'; 9878 f[3] = (zero ()) ? 'Z' : '-'; 9879 f[4] = (auxiliary_carry()) ? 'A' : '-'; 9880 f[5] = (parity ()) ? 'P' : '-'; 9881 f[6] = (carry ()) ? 'C' : '-'; 9882 f[7] = '\x0'; 9883 // output 9884 printf("%08x flags = %s", _value, f); 9885 } 9886 9887 }; 9888 9889 class IU_Register { 9890 public: 9891 int32_t _value; 9892 9893 void print() const { 9894 printf("%08x %11d", _value, _value); 9895 } 9896 9897 }; 9898 9899 class IU_State { 9900 public: 9901 Flag_Register _eflags; 9902 IU_Register _rdi; 9903 IU_Register _rsi; 9904 IU_Register _rbp; 9905 IU_Register _rsp; 9906 IU_Register _rbx; 9907 IU_Register _rdx; 9908 IU_Register _rcx; 9909 IU_Register _rax; 9910 9911 void print() const { 9912 // computation registers 9913 printf("rax, = "); _rax.print(); printf("\n"); 9914 printf("rbx, = "); _rbx.print(); printf("\n"); 9915 printf("rcx = "); _rcx.print(); printf("\n"); 9916 printf("rdx = "); _rdx.print(); printf("\n"); 9917 printf("rdi = "); _rdi.print(); printf("\n"); 9918 printf("rsi = "); _rsi.print(); printf("\n"); 9919 printf("rbp, = "); _rbp.print(); printf("\n"); 9920 printf("rsp = "); _rsp.print(); printf("\n"); 9921 printf("\n"); 9922 // control registers 9923 printf("flgs = "); _eflags.print(); printf("\n"); 9924 } 9925 }; 9926 9927 9928 class CPU_State { 9929 public: 9930 FPU_State _fpu_state; 9931 IU_State _iu_state; 9932 9933 void print() const { 9934 printf("--------------------------------------------------\n"); 9935 _iu_state .print(); 9936 printf("\n"); 9937 _fpu_state.print(); 9938 printf("--------------------------------------------------\n"); 9939 } 9940 9941 }; 9942 9943 9944 static void _print_CPU_state(CPU_State* state) { 9945 state->print(); 9946 }; 9947 9948 9949 void MacroAssembler::print_CPU_state() { 9950 push_CPU_state(); 9951 push(rsp); // pass CPU state 9952 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 9953 addptr(rsp, wordSize); // discard argument 9954 pop_CPU_state(); 9955 } 9956 9957 9958 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 9959 static int counter = 0; 9960 FPU_State* fs = &state->_fpu_state; 9961 counter++; 9962 // For leaf calls, only verify that the top few elements remain empty. 9963 // We only need 1 empty at the top for C2 code. 9964 if( stack_depth < 0 ) { 9965 if( fs->tag_for_st(7) != 3 ) { 9966 printf("FPR7 not empty\n"); 9967 state->print(); 9968 assert(false, "error"); 9969 return false; 9970 } 9971 return true; // All other stack states do not matter 9972 } 9973 9974 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 9975 "bad FPU control word"); 9976 9977 // compute stack depth 9978 int i = 0; 9979 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 9980 int d = i; 9981 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 9982 // verify findings 9983 if (i != FPU_State::number_of_registers) { 9984 // stack not contiguous 9985 printf("%s: stack not contiguous at ST%d\n", s, i); 9986 state->print(); 9987 assert(false, "error"); 9988 return false; 9989 } 9990 // check if computed stack depth corresponds to expected stack depth 9991 if (stack_depth < 0) { 9992 // expected stack depth is -stack_depth or less 9993 if (d > -stack_depth) { 9994 // too many elements on the stack 9995 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 9996 state->print(); 9997 assert(false, "error"); 9998 return false; 9999 } 10000 } else { 10001 // expected stack depth is stack_depth 10002 if (d != stack_depth) { 10003 // wrong stack depth 10004 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 10005 state->print(); 10006 assert(false, "error"); 10007 return false; 10008 } 10009 } 10010 // everything is cool 10011 return true; 10012 } 10013 10014 10015 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 10016 if (!VerifyFPU) return; 10017 push_CPU_state(); 10018 push(rsp); // pass CPU state 10019 ExternalAddress msg((address) s); 10020 // pass message string s 10021 pushptr(msg.addr()); 10022 push(stack_depth); // pass stack depth 10023 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 10024 addptr(rsp, 3 * wordSize); // discard arguments 10025 // check for error 10026 { Label L; 10027 testl(rax, rax); 10028 jcc(Assembler::notZero, L); 10029 int3(); // break if error condition 10030 bind(L); 10031 } 10032 pop_CPU_state(); 10033 } 10034 10035 void MacroAssembler::load_klass(Register dst, Register src) { 10036 #ifdef _LP64 10037 if (UseCompressedKlassPointers) { 10038 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 10039 decode_heap_oop_not_null(dst); 10040 } else 10041 #endif 10042 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 10043 } 10044 10045 void MacroAssembler::load_prototype_header(Register dst, Register src) { 10046 #ifdef _LP64 10047 if (UseCompressedKlassPointers) { 10048 assert (Universe::heap() != NULL, "java heap should be initialized"); 10049 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 10050 if (Universe::narrow_oop_shift() != 0) { 10051 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10052 if (LogMinObjAlignmentInBytes == Address::times_8) { 10053 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset())); 10054 } else { 10055 // OK to use shift since we don't need to preserve flags. 10056 shlq(dst, LogMinObjAlignmentInBytes); 10057 movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset())); 10058 } 10059 } else { 10060 movq(dst, Address(dst, Klass::prototype_header_offset())); 10061 } 10062 } else 10063 #endif 10064 { 10065 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 10066 movptr(dst, Address(dst, Klass::prototype_header_offset())); 10067 } 10068 } 10069 10070 void MacroAssembler::store_klass(Register dst, Register src) { 10071 #ifdef _LP64 10072 if (UseCompressedKlassPointers) { 10073 encode_heap_oop_not_null(src); 10074 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 10075 } else 10076 #endif 10077 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 10078 } 10079 10080 void MacroAssembler::load_heap_oop(Register dst, Address src) { 10081 #ifdef _LP64 10082 // FIXME: Must change all places where we try to load the klass. 10083 if (UseCompressedOops) { 10084 movl(dst, src); 10085 decode_heap_oop(dst); 10086 } else 10087 #endif 10088 movptr(dst, src); 10089 } 10090 10091 // Doesn't do verfication, generates fixed size code 10092 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { 10093 #ifdef _LP64 10094 if (UseCompressedOops) { 10095 movl(dst, src); 10096 decode_heap_oop_not_null(dst); 10097 } else 10098 #endif 10099 movptr(dst, src); 10100 } 10101 10102 void MacroAssembler::store_heap_oop(Address dst, Register src) { 10103 #ifdef _LP64 10104 if (UseCompressedOops) { 10105 assert(!dst.uses(src), "not enough registers"); 10106 encode_heap_oop(src); 10107 movl(dst, src); 10108 } else 10109 #endif 10110 movptr(dst, src); 10111 } 10112 10113 void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) { 10114 assert_different_registers(src1, tmp); 10115 #ifdef _LP64 10116 if (UseCompressedOops) { 10117 bool did_push = false; 10118 if (tmp == noreg) { 10119 tmp = rax; 10120 push(tmp); 10121 did_push = true; 10122 assert(!src2.uses(rsp), "can't push"); 10123 } 10124 load_heap_oop(tmp, src2); 10125 cmpptr(src1, tmp); 10126 if (did_push) pop(tmp); 10127 } else 10128 #endif 10129 cmpptr(src1, src2); 10130 } 10131 10132 // Used for storing NULLs. 10133 void MacroAssembler::store_heap_oop_null(Address dst) { 10134 #ifdef _LP64 10135 if (UseCompressedOops) { 10136 movl(dst, (int32_t)NULL_WORD); 10137 } else { 10138 movslq(dst, (int32_t)NULL_WORD); 10139 } 10140 #else 10141 movl(dst, (int32_t)NULL_WORD); 10142 #endif 10143 } 10144 10145 #ifdef _LP64 10146 void MacroAssembler::store_klass_gap(Register dst, Register src) { 10147 if (UseCompressedKlassPointers) { 10148 // Store to klass gap in destination 10149 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 10150 } 10151 } 10152 10153 #ifdef ASSERT 10154 void MacroAssembler::verify_heapbase(const char* msg) { 10155 assert (UseCompressedOops, "should be compressed"); 10156 assert (Universe::heap() != NULL, "java heap should be initialized"); 10157 if (CheckCompressedOops) { 10158 Label ok; 10159 push(rscratch1); // cmpptr trashes rscratch1 10160 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 10161 jcc(Assembler::equal, ok); 10162 STOP(msg); 10163 bind(ok); 10164 pop(rscratch1); 10165 } 10166 } 10167 #endif 10168 10169 // Algorithm must match oop.inline.hpp encode_heap_oop. 10170 void MacroAssembler::encode_heap_oop(Register r) { 10171 #ifdef ASSERT 10172 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 10173 #endif 10174 verify_oop(r, "broken oop in encode_heap_oop"); 10175 if (Universe::narrow_oop_base() == NULL) { 10176 if (Universe::narrow_oop_shift() != 0) { 10177 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10178 shrq(r, LogMinObjAlignmentInBytes); 10179 } 10180 return; 10181 } 10182 testq(r, r); 10183 cmovq(Assembler::equal, r, r12_heapbase); 10184 subq(r, r12_heapbase); 10185 shrq(r, LogMinObjAlignmentInBytes); 10186 } 10187 10188 void MacroAssembler::encode_heap_oop_not_null(Register r) { 10189 #ifdef ASSERT 10190 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 10191 if (CheckCompressedOops) { 10192 Label ok; 10193 testq(r, r); 10194 jcc(Assembler::notEqual, ok); 10195 STOP("null oop passed to encode_heap_oop_not_null"); 10196 bind(ok); 10197 } 10198 #endif 10199 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 10200 if (Universe::narrow_oop_base() != NULL) { 10201 subq(r, r12_heapbase); 10202 } 10203 if (Universe::narrow_oop_shift() != 0) { 10204 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10205 shrq(r, LogMinObjAlignmentInBytes); 10206 } 10207 } 10208 10209 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 10210 #ifdef ASSERT 10211 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 10212 if (CheckCompressedOops) { 10213 Label ok; 10214 testq(src, src); 10215 jcc(Assembler::notEqual, ok); 10216 STOP("null oop passed to encode_heap_oop_not_null2"); 10217 bind(ok); 10218 } 10219 #endif 10220 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 10221 if (dst != src) { 10222 movq(dst, src); 10223 } 10224 if (Universe::narrow_oop_base() != NULL) { 10225 subq(dst, r12_heapbase); 10226 } 10227 if (Universe::narrow_oop_shift() != 0) { 10228 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10229 shrq(dst, LogMinObjAlignmentInBytes); 10230 } 10231 } 10232 10233 void MacroAssembler::decode_heap_oop(Register r) { 10234 #ifdef ASSERT 10235 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 10236 #endif 10237 if (Universe::narrow_oop_base() == NULL) { 10238 if (Universe::narrow_oop_shift() != 0) { 10239 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10240 shlq(r, LogMinObjAlignmentInBytes); 10241 } 10242 } else { 10243 Label done; 10244 shlq(r, LogMinObjAlignmentInBytes); 10245 jccb(Assembler::equal, done); 10246 addq(r, r12_heapbase); 10247 bind(done); 10248 } 10249 verify_oop(r, "broken oop in decode_heap_oop"); 10250 } 10251 10252 void MacroAssembler::decode_heap_oop_not_null(Register r) { 10253 // Note: it will change flags 10254 assert (UseCompressedOops, "should only be used for compressed headers"); 10255 assert (Universe::heap() != NULL, "java heap should be initialized"); 10256 // Cannot assert, unverified entry point counts instructions (see .ad file) 10257 // vtableStubs also counts instructions in pd_code_size_limit. 10258 // Also do not verify_oop as this is called by verify_oop. 10259 if (Universe::narrow_oop_shift() != 0) { 10260 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10261 shlq(r, LogMinObjAlignmentInBytes); 10262 if (Universe::narrow_oop_base() != NULL) { 10263 addq(r, r12_heapbase); 10264 } 10265 } else { 10266 assert (Universe::narrow_oop_base() == NULL, "sanity"); 10267 } 10268 } 10269 10270 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 10271 // Note: it will change flags 10272 assert (UseCompressedOops, "should only be used for compressed headers"); 10273 assert (Universe::heap() != NULL, "java heap should be initialized"); 10274 // Cannot assert, unverified entry point counts instructions (see .ad file) 10275 // vtableStubs also counts instructions in pd_code_size_limit. 10276 // Also do not verify_oop as this is called by verify_oop. 10277 if (Universe::narrow_oop_shift() != 0) { 10278 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10279 if (LogMinObjAlignmentInBytes == Address::times_8) { 10280 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 10281 } else { 10282 if (dst != src) { 10283 movq(dst, src); 10284 } 10285 shlq(dst, LogMinObjAlignmentInBytes); 10286 if (Universe::narrow_oop_base() != NULL) { 10287 addq(dst, r12_heapbase); 10288 } 10289 } 10290 } else { 10291 assert (Universe::narrow_oop_base() == NULL, "sanity"); 10292 if (dst != src) { 10293 movq(dst, src); 10294 } 10295 } 10296 } 10297 10298 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 10299 assert (UseCompressedOops, "should only be used for compressed headers"); 10300 assert (Universe::heap() != NULL, "java heap should be initialized"); 10301 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10302 int oop_index = oop_recorder()->find_index(obj); 10303 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10304 mov_narrow_oop(dst, oop_index, rspec); 10305 } 10306 10307 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 10308 assert (UseCompressedOops, "should only be used for compressed headers"); 10309 assert (Universe::heap() != NULL, "java heap should be initialized"); 10310 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10311 int oop_index = oop_recorder()->find_index(obj); 10312 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10313 mov_narrow_oop(dst, oop_index, rspec); 10314 } 10315 10316 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 10317 assert (UseCompressedOops, "should only be used for compressed headers"); 10318 assert (Universe::heap() != NULL, "java heap should be initialized"); 10319 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10320 int oop_index = oop_recorder()->find_index(obj); 10321 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10322 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 10323 } 10324 10325 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 10326 assert (UseCompressedOops, "should only be used for compressed headers"); 10327 assert (Universe::heap() != NULL, "java heap should be initialized"); 10328 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10329 int oop_index = oop_recorder()->find_index(obj); 10330 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10331 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 10332 } 10333 10334 void MacroAssembler::reinit_heapbase() { 10335 if (UseCompressedOops) { 10336 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 10337 } 10338 } 10339 #endif // _LP64 10340 10341 10342 // C2 compiled method's prolog code. 10343 void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { 10344 10345 // WARNING: Initial instruction MUST be 5 bytes or longer so that 10346 // NativeJump::patch_verified_entry will be able to patch out the entry 10347 // code safely. The push to verify stack depth is ok at 5 bytes, 10348 // the frame allocation can be either 3 or 6 bytes. So if we don't do 10349 // stack bang then we must use the 6 byte frame allocation even if 10350 // we have no frame. :-( 10351 10352 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 10353 // Remove word for return addr 10354 framesize -= wordSize; 10355 10356 // Calls to C2R adapters often do not accept exceptional returns. 10357 // We require that their callers must bang for them. But be careful, because 10358 // some VM calls (such as call site linkage) can use several kilobytes of 10359 // stack. But the stack safety zone should account for that. 10360 // See bugs 4446381, 4468289, 4497237. 10361 if (stack_bang) { 10362 generate_stack_overflow_check(framesize); 10363 10364 // We always push rbp, so that on return to interpreter rbp, will be 10365 // restored correctly and we can correct the stack. 10366 push(rbp); 10367 // Remove word for ebp 10368 framesize -= wordSize; 10369 10370 // Create frame 10371 if (framesize) { 10372 subptr(rsp, framesize); 10373 } 10374 } else { 10375 // Create frame (force generation of a 4 byte immediate value) 10376 subptr_imm32(rsp, framesize); 10377 10378 // Save RBP register now. 10379 framesize -= wordSize; 10380 movptr(Address(rsp, framesize), rbp); 10381 } 10382 10383 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth 10384 framesize -= wordSize; 10385 movptr(Address(rsp, framesize), (int32_t)0xbadb100d); 10386 } 10387 10388 #ifndef _LP64 10389 // If method sets FPU control word do it now 10390 if (fp_mode_24b) { 10391 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10392 } 10393 if (UseSSE >= 2 && VerifyFPU) { 10394 verify_FPU(0, "FPU stack must be clean on entry"); 10395 } 10396 #endif 10397 10398 #ifdef ASSERT 10399 if (VerifyStackAtCalls) { 10400 Label L; 10401 push(rax); 10402 mov(rax, rsp); 10403 andptr(rax, StackAlignmentInBytes-1); 10404 cmpptr(rax, StackAlignmentInBytes-wordSize); 10405 pop(rax); 10406 jcc(Assembler::equal, L); 10407 STOP("Stack is not properly aligned!"); 10408 bind(L); 10409 } 10410 #endif 10411 10412 } 10413 10414 10415 // IndexOf for constant substrings with size >= 8 chars 10416 // which don't need to be loaded through stack. 10417 void MacroAssembler::string_indexofC8(Register str1, Register str2, 10418 Register cnt1, Register cnt2, 10419 int int_cnt2, Register result, 10420 XMMRegister vec, Register tmp) { 10421 ShortBranchVerifier sbv(this); 10422 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 10423 10424 // This method uses pcmpestri inxtruction with bound registers 10425 // inputs: 10426 // xmm - substring 10427 // rax - substring length (elements count) 10428 // mem - scanned string 10429 // rdx - string length (elements count) 10430 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 10431 // outputs: 10432 // rcx - matched index in string 10433 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 10434 10435 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, 10436 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, 10437 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; 10438 10439 // Note, inline_string_indexOf() generates checks: 10440 // if (substr.count > string.count) return -1; 10441 // if (substr.count == 0) return 0; 10442 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); 10443 10444 // Load substring. 10445 movdqu(vec, Address(str2, 0)); 10446 movl(cnt2, int_cnt2); 10447 movptr(result, str1); // string addr 10448 10449 if (int_cnt2 > 8) { 10450 jmpb(SCAN_TO_SUBSTR); 10451 10452 // Reload substr for rescan, this code 10453 // is executed only for large substrings (> 8 chars) 10454 bind(RELOAD_SUBSTR); 10455 movdqu(vec, Address(str2, 0)); 10456 negptr(cnt2); // Jumped here with negative cnt2, convert to positive 10457 10458 bind(RELOAD_STR); 10459 // We came here after the beginning of the substring was 10460 // matched but the rest of it was not so we need to search 10461 // again. Start from the next element after the previous match. 10462 10463 // cnt2 is number of substring reminding elements and 10464 // cnt1 is number of string reminding elements when cmp failed. 10465 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 10466 subl(cnt1, cnt2); 10467 addl(cnt1, int_cnt2); 10468 movl(cnt2, int_cnt2); // Now restore cnt2 10469 10470 decrementl(cnt1); // Shift to next element 10471 cmpl(cnt1, cnt2); 10472 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10473 10474 addptr(result, 2); 10475 10476 } // (int_cnt2 > 8) 10477 10478 // Scan string for start of substr in 16-byte vectors 10479 bind(SCAN_TO_SUBSTR); 10480 pcmpestri(vec, Address(result, 0), 0x0d); 10481 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 10482 subl(cnt1, 8); 10483 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 10484 cmpl(cnt1, cnt2); 10485 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10486 addptr(result, 16); 10487 jmpb(SCAN_TO_SUBSTR); 10488 10489 // Found a potential substr 10490 bind(FOUND_CANDIDATE); 10491 // Matched whole vector if first element matched (tmp(rcx) == 0). 10492 if (int_cnt2 == 8) { 10493 jccb(Assembler::overflow, RET_FOUND); // OF == 1 10494 } else { // int_cnt2 > 8 10495 jccb(Assembler::overflow, FOUND_SUBSTR); 10496 } 10497 // After pcmpestri tmp(rcx) contains matched element index 10498 // Compute start addr of substr 10499 lea(result, Address(result, tmp, Address::times_2)); 10500 10501 // Make sure string is still long enough 10502 subl(cnt1, tmp); 10503 cmpl(cnt1, cnt2); 10504 if (int_cnt2 == 8) { 10505 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 10506 } else { // int_cnt2 > 8 10507 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); 10508 } 10509 // Left less then substring. 10510 10511 bind(RET_NOT_FOUND); 10512 movl(result, -1); 10513 jmpb(EXIT); 10514 10515 if (int_cnt2 > 8) { 10516 // This code is optimized for the case when whole substring 10517 // is matched if its head is matched. 10518 bind(MATCH_SUBSTR_HEAD); 10519 pcmpestri(vec, Address(result, 0), 0x0d); 10520 // Reload only string if does not match 10521 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 10522 10523 Label CONT_SCAN_SUBSTR; 10524 // Compare the rest of substring (> 8 chars). 10525 bind(FOUND_SUBSTR); 10526 // First 8 chars are already matched. 10527 negptr(cnt2); 10528 addptr(cnt2, 8); 10529 10530 bind(SCAN_SUBSTR); 10531 subl(cnt1, 8); 10532 cmpl(cnt2, -8); // Do not read beyond substring 10533 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); 10534 // Back-up strings to avoid reading beyond substring: 10535 // cnt1 = cnt1 - cnt2 + 8 10536 addl(cnt1, cnt2); // cnt2 is negative 10537 addl(cnt1, 8); 10538 movl(cnt2, 8); negptr(cnt2); 10539 bind(CONT_SCAN_SUBSTR); 10540 if (int_cnt2 < (int)G) { 10541 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); 10542 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); 10543 } else { 10544 // calculate index in register to avoid integer overflow (int_cnt2*2) 10545 movl(tmp, int_cnt2); 10546 addptr(tmp, cnt2); 10547 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); 10548 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 10549 } 10550 // Need to reload strings pointers if not matched whole vector 10551 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 10552 addptr(cnt2, 8); 10553 jcc(Assembler::negative, SCAN_SUBSTR); 10554 // Fall through if found full substring 10555 10556 } // (int_cnt2 > 8) 10557 10558 bind(RET_FOUND); 10559 // Found result if we matched full small substring. 10560 // Compute substr offset 10561 subptr(result, str1); 10562 shrl(result, 1); // index 10563 bind(EXIT); 10564 10565 } // string_indexofC8 10566 10567 // Small strings are loaded through stack if they cross page boundary. 10568 void MacroAssembler::string_indexof(Register str1, Register str2, 10569 Register cnt1, Register cnt2, 10570 int int_cnt2, Register result, 10571 XMMRegister vec, Register tmp) { 10572 ShortBranchVerifier sbv(this); 10573 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 10574 // 10575 // int_cnt2 is length of small (< 8 chars) constant substring 10576 // or (-1) for non constant substring in which case its length 10577 // is in cnt2 register. 10578 // 10579 // Note, inline_string_indexOf() generates checks: 10580 // if (substr.count > string.count) return -1; 10581 // if (substr.count == 0) return 0; 10582 // 10583 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); 10584 10585 // This method uses pcmpestri inxtruction with bound registers 10586 // inputs: 10587 // xmm - substring 10588 // rax - substring length (elements count) 10589 // mem - scanned string 10590 // rdx - string length (elements count) 10591 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 10592 // outputs: 10593 // rcx - matched index in string 10594 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 10595 10596 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, 10597 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, 10598 FOUND_CANDIDATE; 10599 10600 { //======================================================== 10601 // We don't know where these strings are located 10602 // and we can't read beyond them. Load them through stack. 10603 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; 10604 10605 movptr(tmp, rsp); // save old SP 10606 10607 if (int_cnt2 > 0) { // small (< 8 chars) constant substring 10608 if (int_cnt2 == 1) { // One char 10609 load_unsigned_short(result, Address(str2, 0)); 10610 movdl(vec, result); // move 32 bits 10611 } else if (int_cnt2 == 2) { // Two chars 10612 movdl(vec, Address(str2, 0)); // move 32 bits 10613 } else if (int_cnt2 == 4) { // Four chars 10614 movq(vec, Address(str2, 0)); // move 64 bits 10615 } else { // cnt2 = { 3, 5, 6, 7 } 10616 // Array header size is 12 bytes in 32-bit VM 10617 // + 6 bytes for 3 chars == 18 bytes, 10618 // enough space to load vec and shift. 10619 assert(HeapWordSize*typeArrayKlass::header_size() >= 12,"sanity"); 10620 movdqu(vec, Address(str2, (int_cnt2*2)-16)); 10621 psrldq(vec, 16-(int_cnt2*2)); 10622 } 10623 } else { // not constant substring 10624 cmpl(cnt2, 8); 10625 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough 10626 10627 // We can read beyond string if srt+16 does not cross page boundary 10628 // since heaps are aligned and mapped by pages. 10629 assert(os::vm_page_size() < (int)G, "default page should be small"); 10630 movl(result, str2); // We need only low 32 bits 10631 andl(result, (os::vm_page_size()-1)); 10632 cmpl(result, (os::vm_page_size()-16)); 10633 jccb(Assembler::belowEqual, CHECK_STR); 10634 10635 // Move small strings to stack to allow load 16 bytes into vec. 10636 subptr(rsp, 16); 10637 int stk_offset = wordSize-2; 10638 push(cnt2); 10639 10640 bind(COPY_SUBSTR); 10641 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); 10642 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 10643 decrement(cnt2); 10644 jccb(Assembler::notZero, COPY_SUBSTR); 10645 10646 pop(cnt2); 10647 movptr(str2, rsp); // New substring address 10648 } // non constant 10649 10650 bind(CHECK_STR); 10651 cmpl(cnt1, 8); 10652 jccb(Assembler::aboveEqual, BIG_STRINGS); 10653 10654 // Check cross page boundary. 10655 movl(result, str1); // We need only low 32 bits 10656 andl(result, (os::vm_page_size()-1)); 10657 cmpl(result, (os::vm_page_size()-16)); 10658 jccb(Assembler::belowEqual, BIG_STRINGS); 10659 10660 subptr(rsp, 16); 10661 int stk_offset = -2; 10662 if (int_cnt2 < 0) { // not constant 10663 push(cnt2); 10664 stk_offset += wordSize; 10665 } 10666 movl(cnt2, cnt1); 10667 10668 bind(COPY_STR); 10669 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); 10670 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 10671 decrement(cnt2); 10672 jccb(Assembler::notZero, COPY_STR); 10673 10674 if (int_cnt2 < 0) { // not constant 10675 pop(cnt2); 10676 } 10677 movptr(str1, rsp); // New string address 10678 10679 bind(BIG_STRINGS); 10680 // Load substring. 10681 if (int_cnt2 < 0) { // -1 10682 movdqu(vec, Address(str2, 0)); 10683 push(cnt2); // substr count 10684 push(str2); // substr addr 10685 push(str1); // string addr 10686 } else { 10687 // Small (< 8 chars) constant substrings are loaded already. 10688 movl(cnt2, int_cnt2); 10689 } 10690 push(tmp); // original SP 10691 10692 } // Finished loading 10693 10694 //======================================================== 10695 // Start search 10696 // 10697 10698 movptr(result, str1); // string addr 10699 10700 if (int_cnt2 < 0) { // Only for non constant substring 10701 jmpb(SCAN_TO_SUBSTR); 10702 10703 // SP saved at sp+0 10704 // String saved at sp+1*wordSize 10705 // Substr saved at sp+2*wordSize 10706 // Substr count saved at sp+3*wordSize 10707 10708 // Reload substr for rescan, this code 10709 // is executed only for large substrings (> 8 chars) 10710 bind(RELOAD_SUBSTR); 10711 movptr(str2, Address(rsp, 2*wordSize)); 10712 movl(cnt2, Address(rsp, 3*wordSize)); 10713 movdqu(vec, Address(str2, 0)); 10714 // We came here after the beginning of the substring was 10715 // matched but the rest of it was not so we need to search 10716 // again. Start from the next element after the previous match. 10717 subptr(str1, result); // Restore counter 10718 shrl(str1, 1); 10719 addl(cnt1, str1); 10720 decrementl(cnt1); // Shift to next element 10721 cmpl(cnt1, cnt2); 10722 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10723 10724 addptr(result, 2); 10725 } // non constant 10726 10727 // Scan string for start of substr in 16-byte vectors 10728 bind(SCAN_TO_SUBSTR); 10729 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 10730 pcmpestri(vec, Address(result, 0), 0x0d); 10731 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 10732 subl(cnt1, 8); 10733 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 10734 cmpl(cnt1, cnt2); 10735 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10736 addptr(result, 16); 10737 10738 bind(ADJUST_STR); 10739 cmpl(cnt1, 8); // Do not read beyond string 10740 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 10741 // Back-up string to avoid reading beyond string. 10742 lea(result, Address(result, cnt1, Address::times_2, -16)); 10743 movl(cnt1, 8); 10744 jmpb(SCAN_TO_SUBSTR); 10745 10746 // Found a potential substr 10747 bind(FOUND_CANDIDATE); 10748 // After pcmpestri tmp(rcx) contains matched element index 10749 10750 // Make sure string is still long enough 10751 subl(cnt1, tmp); 10752 cmpl(cnt1, cnt2); 10753 jccb(Assembler::greaterEqual, FOUND_SUBSTR); 10754 // Left less then substring. 10755 10756 bind(RET_NOT_FOUND); 10757 movl(result, -1); 10758 jmpb(CLEANUP); 10759 10760 bind(FOUND_SUBSTR); 10761 // Compute start addr of substr 10762 lea(result, Address(result, tmp, Address::times_2)); 10763 10764 if (int_cnt2 > 0) { // Constant substring 10765 // Repeat search for small substring (< 8 chars) 10766 // from new point without reloading substring. 10767 // Have to check that we don't read beyond string. 10768 cmpl(tmp, 8-int_cnt2); 10769 jccb(Assembler::greater, ADJUST_STR); 10770 // Fall through if matched whole substring. 10771 } else { // non constant 10772 assert(int_cnt2 == -1, "should be != 0"); 10773 10774 addl(tmp, cnt2); 10775 // Found result if we matched whole substring. 10776 cmpl(tmp, 8); 10777 jccb(Assembler::lessEqual, RET_FOUND); 10778 10779 // Repeat search for small substring (<= 8 chars) 10780 // from new point 'str1' without reloading substring. 10781 cmpl(cnt2, 8); 10782 // Have to check that we don't read beyond string. 10783 jccb(Assembler::lessEqual, ADJUST_STR); 10784 10785 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; 10786 // Compare the rest of substring (> 8 chars). 10787 movptr(str1, result); 10788 10789 cmpl(tmp, cnt2); 10790 // First 8 chars are already matched. 10791 jccb(Assembler::equal, CHECK_NEXT); 10792 10793 bind(SCAN_SUBSTR); 10794 pcmpestri(vec, Address(str1, 0), 0x0d); 10795 // Need to reload strings pointers if not matched whole vector 10796 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 10797 10798 bind(CHECK_NEXT); 10799 subl(cnt2, 8); 10800 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring 10801 addptr(str1, 16); 10802 addptr(str2, 16); 10803 subl(cnt1, 8); 10804 cmpl(cnt2, 8); // Do not read beyond substring 10805 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); 10806 // Back-up strings to avoid reading beyond substring. 10807 lea(str2, Address(str2, cnt2, Address::times_2, -16)); 10808 lea(str1, Address(str1, cnt2, Address::times_2, -16)); 10809 subl(cnt1, cnt2); 10810 movl(cnt2, 8); 10811 addl(cnt1, 8); 10812 bind(CONT_SCAN_SUBSTR); 10813 movdqu(vec, Address(str2, 0)); 10814 jmpb(SCAN_SUBSTR); 10815 10816 bind(RET_FOUND_LONG); 10817 movptr(str1, Address(rsp, wordSize)); 10818 } // non constant 10819 10820 bind(RET_FOUND); 10821 // Compute substr offset 10822 subptr(result, str1); 10823 shrl(result, 1); // index 10824 10825 bind(CLEANUP); 10826 pop(rsp); // restore SP 10827 10828 } // string_indexof 10829 10830 // Compare strings. 10831 void MacroAssembler::string_compare(Register str1, Register str2, 10832 Register cnt1, Register cnt2, Register result, 10833 XMMRegister vec1) { 10834 ShortBranchVerifier sbv(this); 10835 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 10836 10837 // Compute the minimum of the string lengths and the 10838 // difference of the string lengths (stack). 10839 // Do the conditional move stuff 10840 movl(result, cnt1); 10841 subl(cnt1, cnt2); 10842 push(cnt1); 10843 cmov32(Assembler::lessEqual, cnt2, result); 10844 10845 // Is the minimum length zero? 10846 testl(cnt2, cnt2); 10847 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 10848 10849 // Load first characters 10850 load_unsigned_short(result, Address(str1, 0)); 10851 load_unsigned_short(cnt1, Address(str2, 0)); 10852 10853 // Compare first characters 10854 subl(result, cnt1); 10855 jcc(Assembler::notZero, POP_LABEL); 10856 decrementl(cnt2); 10857 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 10858 10859 { 10860 // Check after comparing first character to see if strings are equivalent 10861 Label LSkip2; 10862 // Check if the strings start at same location 10863 cmpptr(str1, str2); 10864 jccb(Assembler::notEqual, LSkip2); 10865 10866 // Check if the length difference is zero (from stack) 10867 cmpl(Address(rsp, 0), 0x0); 10868 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 10869 10870 // Strings might not be equivalent 10871 bind(LSkip2); 10872 } 10873 10874 Address::ScaleFactor scale = Address::times_2; 10875 int stride = 8; 10876 10877 // Advance to next element 10878 addptr(str1, 16/stride); 10879 addptr(str2, 16/stride); 10880 10881 if (UseSSE42Intrinsics) { 10882 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 10883 int pcmpmask = 0x19; 10884 // Setup to compare 16-byte vectors 10885 movl(result, cnt2); 10886 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 10887 jccb(Assembler::zero, COMPARE_TAIL); 10888 10889 lea(str1, Address(str1, result, scale)); 10890 lea(str2, Address(str2, result, scale)); 10891 negptr(result); 10892 10893 // pcmpestri 10894 // inputs: 10895 // vec1- substring 10896 // rax - negative string length (elements count) 10897 // mem - scaned string 10898 // rdx - string length (elements count) 10899 // pcmpmask - cmp mode: 11000 (string compare with negated result) 10900 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 10901 // outputs: 10902 // rcx - first mismatched element index 10903 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 10904 10905 bind(COMPARE_WIDE_VECTORS); 10906 movdqu(vec1, Address(str1, result, scale)); 10907 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 10908 // After pcmpestri cnt1(rcx) contains mismatched element index 10909 10910 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 10911 addptr(result, stride); 10912 subptr(cnt2, stride); 10913 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); 10914 10915 // compare wide vectors tail 10916 testl(result, result); 10917 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 10918 10919 movl(cnt2, stride); 10920 movl(result, stride); 10921 negptr(result); 10922 movdqu(vec1, Address(str1, result, scale)); 10923 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 10924 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); 10925 10926 // Mismatched characters in the vectors 10927 bind(VECTOR_NOT_EQUAL); 10928 addptr(result, cnt1); 10929 movptr(cnt2, result); 10930 load_unsigned_short(result, Address(str1, cnt2, scale)); 10931 load_unsigned_short(cnt1, Address(str2, cnt2, scale)); 10932 subl(result, cnt1); 10933 jmpb(POP_LABEL); 10934 10935 bind(COMPARE_TAIL); // limit is zero 10936 movl(cnt2, result); 10937 // Fallthru to tail compare 10938 } 10939 10940 // Shift str2 and str1 to the end of the arrays, negate min 10941 lea(str1, Address(str1, cnt2, scale, 0)); 10942 lea(str2, Address(str2, cnt2, scale, 0)); 10943 negptr(cnt2); 10944 10945 // Compare the rest of the elements 10946 bind(WHILE_HEAD_LABEL); 10947 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); 10948 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); 10949 subl(result, cnt1); 10950 jccb(Assembler::notZero, POP_LABEL); 10951 increment(cnt2); 10952 jccb(Assembler::notZero, WHILE_HEAD_LABEL); 10953 10954 // Strings are equal up to min length. Return the length difference. 10955 bind(LENGTH_DIFF_LABEL); 10956 pop(result); 10957 jmpb(DONE_LABEL); 10958 10959 // Discard the stored length difference 10960 bind(POP_LABEL); 10961 pop(cnt1); 10962 10963 // That's it 10964 bind(DONE_LABEL); 10965 } 10966 10967 // Compare char[] arrays aligned to 4 bytes or substrings. 10968 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 10969 Register limit, Register result, Register chr, 10970 XMMRegister vec1, XMMRegister vec2) { 10971 ShortBranchVerifier sbv(this); 10972 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 10973 10974 int length_offset = arrayOopDesc::length_offset_in_bytes(); 10975 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 10976 10977 // Check the input args 10978 cmpptr(ary1, ary2); 10979 jcc(Assembler::equal, TRUE_LABEL); 10980 10981 if (is_array_equ) { 10982 // Need additional checks for arrays_equals. 10983 testptr(ary1, ary1); 10984 jcc(Assembler::zero, FALSE_LABEL); 10985 testptr(ary2, ary2); 10986 jcc(Assembler::zero, FALSE_LABEL); 10987 10988 // Check the lengths 10989 movl(limit, Address(ary1, length_offset)); 10990 cmpl(limit, Address(ary2, length_offset)); 10991 jcc(Assembler::notEqual, FALSE_LABEL); 10992 } 10993 10994 // count == 0 10995 testl(limit, limit); 10996 jcc(Assembler::zero, TRUE_LABEL); 10997 10998 if (is_array_equ) { 10999 // Load array address 11000 lea(ary1, Address(ary1, base_offset)); 11001 lea(ary2, Address(ary2, base_offset)); 11002 } 11003 11004 shll(limit, 1); // byte count != 0 11005 movl(result, limit); // copy 11006 11007 if (UseSSE42Intrinsics) { 11008 // With SSE4.2, use double quad vector compare 11009 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 11010 11011 // Compare 16-byte vectors 11012 andl(result, 0x0000000e); // tail count (in bytes) 11013 andl(limit, 0xfffffff0); // vector count (in bytes) 11014 jccb(Assembler::zero, COMPARE_TAIL); 11015 11016 lea(ary1, Address(ary1, limit, Address::times_1)); 11017 lea(ary2, Address(ary2, limit, Address::times_1)); 11018 negptr(limit); 11019 11020 bind(COMPARE_WIDE_VECTORS); 11021 movdqu(vec1, Address(ary1, limit, Address::times_1)); 11022 movdqu(vec2, Address(ary2, limit, Address::times_1)); 11023 pxor(vec1, vec2); 11024 11025 ptest(vec1, vec1); 11026 jccb(Assembler::notZero, FALSE_LABEL); 11027 addptr(limit, 16); 11028 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 11029 11030 testl(result, result); 11031 jccb(Assembler::zero, TRUE_LABEL); 11032 11033 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); 11034 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); 11035 pxor(vec1, vec2); 11036 11037 ptest(vec1, vec1); 11038 jccb(Assembler::notZero, FALSE_LABEL); 11039 jmpb(TRUE_LABEL); 11040 11041 bind(COMPARE_TAIL); // limit is zero 11042 movl(limit, result); 11043 // Fallthru to tail compare 11044 } 11045 11046 // Compare 4-byte vectors 11047 andl(limit, 0xfffffffc); // vector count (in bytes) 11048 jccb(Assembler::zero, COMPARE_CHAR); 11049 11050 lea(ary1, Address(ary1, limit, Address::times_1)); 11051 lea(ary2, Address(ary2, limit, Address::times_1)); 11052 negptr(limit); 11053 11054 bind(COMPARE_VECTORS); 11055 movl(chr, Address(ary1, limit, Address::times_1)); 11056 cmpl(chr, Address(ary2, limit, Address::times_1)); 11057 jccb(Assembler::notEqual, FALSE_LABEL); 11058 addptr(limit, 4); 11059 jcc(Assembler::notZero, COMPARE_VECTORS); 11060 11061 // Compare trailing char (final 2 bytes), if any 11062 bind(COMPARE_CHAR); 11063 testl(result, 0x2); // tail char 11064 jccb(Assembler::zero, TRUE_LABEL); 11065 load_unsigned_short(chr, Address(ary1, 0)); 11066 load_unsigned_short(limit, Address(ary2, 0)); 11067 cmpl(chr, limit); 11068 jccb(Assembler::notEqual, FALSE_LABEL); 11069 11070 bind(TRUE_LABEL); 11071 movl(result, 1); // return true 11072 jmpb(DONE); 11073 11074 bind(FALSE_LABEL); 11075 xorl(result, result); // return false 11076 11077 // That's it 11078 bind(DONE); 11079 } 11080 11081 void MacroAssembler::generate_fill(BasicType t, bool aligned, 11082 Register to, Register value, Register count, 11083 Register rtmp, XMMRegister xtmp) { 11084 ShortBranchVerifier sbv(this); 11085 assert_different_registers(to, value, count, rtmp); 11086 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 11087 Label L_fill_2_bytes, L_fill_4_bytes; 11088 11089 int shift = -1; 11090 switch (t) { 11091 case T_BYTE: 11092 shift = 2; 11093 break; 11094 case T_SHORT: 11095 shift = 1; 11096 break; 11097 case T_INT: 11098 shift = 0; 11099 break; 11100 default: ShouldNotReachHere(); 11101 } 11102 11103 if (t == T_BYTE) { 11104 andl(value, 0xff); 11105 movl(rtmp, value); 11106 shll(rtmp, 8); 11107 orl(value, rtmp); 11108 } 11109 if (t == T_SHORT) { 11110 andl(value, 0xffff); 11111 } 11112 if (t == T_BYTE || t == T_SHORT) { 11113 movl(rtmp, value); 11114 shll(rtmp, 16); 11115 orl(value, rtmp); 11116 } 11117 11118 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 11119 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 11120 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 11121 // align source address at 4 bytes address boundary 11122 if (t == T_BYTE) { 11123 // One byte misalignment happens only for byte arrays 11124 testptr(to, 1); 11125 jccb(Assembler::zero, L_skip_align1); 11126 movb(Address(to, 0), value); 11127 increment(to); 11128 decrement(count); 11129 BIND(L_skip_align1); 11130 } 11131 // Two bytes misalignment happens only for byte and short (char) arrays 11132 testptr(to, 2); 11133 jccb(Assembler::zero, L_skip_align2); 11134 movw(Address(to, 0), value); 11135 addptr(to, 2); 11136 subl(count, 1<<(shift-1)); 11137 BIND(L_skip_align2); 11138 } 11139 if (UseSSE < 2) { 11140 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 11141 // Fill 32-byte chunks 11142 subl(count, 8 << shift); 11143 jcc(Assembler::less, L_check_fill_8_bytes); 11144 align(16); 11145 11146 BIND(L_fill_32_bytes_loop); 11147 11148 for (int i = 0; i < 32; i += 4) { 11149 movl(Address(to, i), value); 11150 } 11151 11152 addptr(to, 32); 11153 subl(count, 8 << shift); 11154 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 11155 BIND(L_check_fill_8_bytes); 11156 addl(count, 8 << shift); 11157 jccb(Assembler::zero, L_exit); 11158 jmpb(L_fill_8_bytes); 11159 11160 // 11161 // length is too short, just fill qwords 11162 // 11163 BIND(L_fill_8_bytes_loop); 11164 movl(Address(to, 0), value); 11165 movl(Address(to, 4), value); 11166 addptr(to, 8); 11167 BIND(L_fill_8_bytes); 11168 subl(count, 1 << (shift + 1)); 11169 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 11170 // fall through to fill 4 bytes 11171 } else { 11172 Label L_fill_32_bytes; 11173 if (!UseUnalignedLoadStores) { 11174 // align to 8 bytes, we know we are 4 byte aligned to start 11175 testptr(to, 4); 11176 jccb(Assembler::zero, L_fill_32_bytes); 11177 movl(Address(to, 0), value); 11178 addptr(to, 4); 11179 subl(count, 1<<shift); 11180 } 11181 BIND(L_fill_32_bytes); 11182 { 11183 assert( UseSSE >= 2, "supported cpu only" ); 11184 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 11185 // Fill 32-byte chunks 11186 movdl(xtmp, value); 11187 pshufd(xtmp, xtmp, 0); 11188 11189 subl(count, 8 << shift); 11190 jcc(Assembler::less, L_check_fill_8_bytes); 11191 align(16); 11192 11193 BIND(L_fill_32_bytes_loop); 11194 11195 if (UseUnalignedLoadStores) { 11196 movdqu(Address(to, 0), xtmp); 11197 movdqu(Address(to, 16), xtmp); 11198 } else { 11199 movq(Address(to, 0), xtmp); 11200 movq(Address(to, 8), xtmp); 11201 movq(Address(to, 16), xtmp); 11202 movq(Address(to, 24), xtmp); 11203 } 11204 11205 addptr(to, 32); 11206 subl(count, 8 << shift); 11207 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 11208 BIND(L_check_fill_8_bytes); 11209 addl(count, 8 << shift); 11210 jccb(Assembler::zero, L_exit); 11211 jmpb(L_fill_8_bytes); 11212 11213 // 11214 // length is too short, just fill qwords 11215 // 11216 BIND(L_fill_8_bytes_loop); 11217 movq(Address(to, 0), xtmp); 11218 addptr(to, 8); 11219 BIND(L_fill_8_bytes); 11220 subl(count, 1 << (shift + 1)); 11221 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 11222 } 11223 } 11224 // fill trailing 4 bytes 11225 BIND(L_fill_4_bytes); 11226 testl(count, 1<<shift); 11227 jccb(Assembler::zero, L_fill_2_bytes); 11228 movl(Address(to, 0), value); 11229 if (t == T_BYTE || t == T_SHORT) { 11230 addptr(to, 4); 11231 BIND(L_fill_2_bytes); 11232 // fill trailing 2 bytes 11233 testl(count, 1<<(shift-1)); 11234 jccb(Assembler::zero, L_fill_byte); 11235 movw(Address(to, 0), value); 11236 if (t == T_BYTE) { 11237 addptr(to, 2); 11238 BIND(L_fill_byte); 11239 // fill trailing byte 11240 testl(count, 1); 11241 jccb(Assembler::zero, L_exit); 11242 movb(Address(to, 0), value); 11243 } else { 11244 BIND(L_fill_byte); 11245 } 11246 } else { 11247 BIND(L_fill_2_bytes); 11248 } 11249 BIND(L_exit); 11250 } 11251 #undef BIND 11252 #undef BLOCK_COMMENT 11253 11254 11255 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 11256 switch (cond) { 11257 // Note some conditions are synonyms for others 11258 case Assembler::zero: return Assembler::notZero; 11259 case Assembler::notZero: return Assembler::zero; 11260 case Assembler::less: return Assembler::greaterEqual; 11261 case Assembler::lessEqual: return Assembler::greater; 11262 case Assembler::greater: return Assembler::lessEqual; 11263 case Assembler::greaterEqual: return Assembler::less; 11264 case Assembler::below: return Assembler::aboveEqual; 11265 case Assembler::belowEqual: return Assembler::above; 11266 case Assembler::above: return Assembler::belowEqual; 11267 case Assembler::aboveEqual: return Assembler::below; 11268 case Assembler::overflow: return Assembler::noOverflow; 11269 case Assembler::noOverflow: return Assembler::overflow; 11270 case Assembler::negative: return Assembler::positive; 11271 case Assembler::positive: return Assembler::negative; 11272 case Assembler::parity: return Assembler::noParity; 11273 case Assembler::noParity: return Assembler::parity; 11274 } 11275 ShouldNotReachHere(); return Assembler::overflow; 11276 } 11277 11278 SkipIfEqual::SkipIfEqual( 11279 MacroAssembler* masm, const bool* flag_addr, bool value) { 11280 _masm = masm; 11281 _masm->cmp8(ExternalAddress((address)flag_addr), value); 11282 _masm->jcc(Assembler::equal, _label); 11283 } 11284 11285 SkipIfEqual::~SkipIfEqual() { 11286 _masm->bind(_label); 11287 }