1 /* 2 * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/assembler.hpp" 27 #include "asm/assembler.inline.hpp" 28 #include "gc/shared/cardTableModRefBS.hpp" 29 #include "gc/shared/collectedHeap.inline.hpp" 30 #include "interpreter/interpreter.hpp" 31 #include "memory/resourceArea.hpp" 32 #include "prims/methodHandles.hpp" 33 #include "runtime/biasedLocking.hpp" 34 #include "runtime/interfaceSupport.hpp" 35 #include "runtime/objectMonitor.hpp" 36 #include "runtime/os.hpp" 37 #include "runtime/sharedRuntime.hpp" 38 #include "runtime/stubRoutines.hpp" 39 #include "utilities/macros.hpp" 40 #if INCLUDE_ALL_GCS 41 #include "gc/g1/g1CollectedHeap.inline.hpp" 42 #include "gc/g1/g1SATBCardTableModRefBS.hpp" 43 #include "gc/g1/heapRegion.hpp" 44 #endif // INCLUDE_ALL_GCS 45 46 #ifdef PRODUCT 47 #define BLOCK_COMMENT(str) /* nothing */ 48 #define STOP(error) stop(error) 49 #else 50 #define BLOCK_COMMENT(str) block_comment(str) 51 #define STOP(error) block_comment(error); stop(error) 52 #endif 53 54 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 55 // Implementation of AddressLiteral 56 57 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms. 58 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = { 59 // -----------------Table 4.5 -------------------- // 60 16, 32, 64, // EVEX_FV(0) 61 4, 4, 4, // EVEX_FV(1) - with Evex.b 62 16, 32, 64, // EVEX_FV(2) - with Evex.w 63 8, 8, 8, // EVEX_FV(3) - with Evex.w and Evex.b 64 8, 16, 32, // EVEX_HV(0) 65 4, 4, 4, // EVEX_HV(1) - with Evex.b 66 // -----------------Table 4.6 -------------------- // 67 16, 32, 64, // EVEX_FVM(0) 68 1, 1, 1, // EVEX_T1S(0) 69 2, 2, 2, // EVEX_T1S(1) 70 4, 4, 4, // EVEX_T1S(2) 71 8, 8, 8, // EVEX_T1S(3) 72 4, 4, 4, // EVEX_T1F(0) 73 8, 8, 8, // EVEX_T1F(1) 74 8, 8, 8, // EVEX_T2(0) 75 0, 16, 16, // EVEX_T2(1) 76 0, 16, 16, // EVEX_T4(0) 77 0, 0, 32, // EVEX_T4(1) 78 0, 0, 32, // EVEX_T8(0) 79 8, 16, 32, // EVEX_HVM(0) 80 4, 8, 16, // EVEX_QVM(0) 81 2, 4, 8, // EVEX_OVM(0) 82 16, 16, 16, // EVEX_M128(0) 83 8, 32, 64, // EVEX_DUP(0) 84 0, 0, 0 // EVEX_NTUP 85 }; 86 87 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { 88 _is_lval = false; 89 _target = target; 90 switch (rtype) { 91 case relocInfo::oop_type: 92 case relocInfo::metadata_type: 93 // Oops are a special case. Normally they would be their own section 94 // but in cases like icBuffer they are literals in the code stream that 95 // we don't have a section for. We use none so that we get a literal address 96 // which is always patchable. 97 break; 98 case relocInfo::external_word_type: 99 _rspec = external_word_Relocation::spec(target); 100 break; 101 case relocInfo::internal_word_type: 102 _rspec = internal_word_Relocation::spec(target); 103 break; 104 case relocInfo::opt_virtual_call_type: 105 _rspec = opt_virtual_call_Relocation::spec(); 106 break; 107 case relocInfo::static_call_type: 108 _rspec = static_call_Relocation::spec(); 109 break; 110 case relocInfo::runtime_call_type: 111 _rspec = runtime_call_Relocation::spec(); 112 break; 113 case relocInfo::poll_type: 114 case relocInfo::poll_return_type: 115 _rspec = Relocation::spec_simple(rtype); 116 break; 117 case relocInfo::none: 118 break; 119 default: 120 ShouldNotReachHere(); 121 break; 122 } 123 } 124 125 // Implementation of Address 126 127 #ifdef _LP64 128 129 Address Address::make_array(ArrayAddress adr) { 130 // Not implementable on 64bit machines 131 // Should have been handled higher up the call chain. 132 ShouldNotReachHere(); 133 return Address(); 134 } 135 136 // exceedingly dangerous constructor 137 Address::Address(int disp, address loc, relocInfo::relocType rtype) { 138 _base = noreg; 139 _index = noreg; 140 _scale = no_scale; 141 _disp = disp; 142 switch (rtype) { 143 case relocInfo::external_word_type: 144 _rspec = external_word_Relocation::spec(loc); 145 break; 146 case relocInfo::internal_word_type: 147 _rspec = internal_word_Relocation::spec(loc); 148 break; 149 case relocInfo::runtime_call_type: 150 // HMM 151 _rspec = runtime_call_Relocation::spec(); 152 break; 153 case relocInfo::poll_type: 154 case relocInfo::poll_return_type: 155 _rspec = Relocation::spec_simple(rtype); 156 break; 157 case relocInfo::none: 158 break; 159 default: 160 ShouldNotReachHere(); 161 } 162 } 163 #else // LP64 164 165 Address Address::make_array(ArrayAddress adr) { 166 AddressLiteral base = adr.base(); 167 Address index = adr.index(); 168 assert(index._disp == 0, "must not have disp"); // maybe it can? 169 Address array(index._base, index._index, index._scale, (intptr_t) base.target()); 170 array._rspec = base._rspec; 171 return array; 172 } 173 174 // exceedingly dangerous constructor 175 Address::Address(address loc, RelocationHolder spec) { 176 _base = noreg; 177 _index = noreg; 178 _scale = no_scale; 179 _disp = (intptr_t) loc; 180 _rspec = spec; 181 } 182 183 #endif // _LP64 184 185 186 187 // Convert the raw encoding form into the form expected by the constructor for 188 // Address. An index of 4 (rsp) corresponds to having no index, so convert 189 // that to noreg for the Address constructor. 190 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) { 191 RelocationHolder rspec; 192 if (disp_reloc != relocInfo::none) { 193 rspec = Relocation::spec_simple(disp_reloc); 194 } 195 bool valid_index = index != rsp->encoding(); 196 if (valid_index) { 197 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); 198 madr._rspec = rspec; 199 return madr; 200 } else { 201 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); 202 madr._rspec = rspec; 203 return madr; 204 } 205 } 206 207 // Implementation of Assembler 208 209 int AbstractAssembler::code_fill_byte() { 210 return (u_char)'\xF4'; // hlt 211 } 212 213 // make this go away someday 214 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { 215 if (rtype == relocInfo::none) 216 emit_int32(data); 217 else 218 emit_data(data, Relocation::spec_simple(rtype), format); 219 } 220 221 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { 222 assert(imm_operand == 0, "default format must be immediate in this file"); 223 assert(inst_mark() != NULL, "must be inside InstructionMark"); 224 if (rspec.type() != relocInfo::none) { 225 #ifdef ASSERT 226 check_relocation(rspec, format); 227 #endif 228 // Do not use AbstractAssembler::relocate, which is not intended for 229 // embedded words. Instead, relocate to the enclosing instruction. 230 231 // hack. call32 is too wide for mask so use disp32 232 if (format == call32_operand) 233 code_section()->relocate(inst_mark(), rspec, disp32_operand); 234 else 235 code_section()->relocate(inst_mark(), rspec, format); 236 } 237 emit_int32(data); 238 } 239 240 static int encode(Register r) { 241 int enc = r->encoding(); 242 if (enc >= 8) { 243 enc -= 8; 244 } 245 return enc; 246 } 247 248 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { 249 assert(dst->has_byte_register(), "must have byte register"); 250 assert(isByte(op1) && isByte(op2), "wrong opcode"); 251 assert(isByte(imm8), "not a byte"); 252 assert((op1 & 0x01) == 0, "should be 8bit operation"); 253 emit_int8(op1); 254 emit_int8(op2 | encode(dst)); 255 emit_int8(imm8); 256 } 257 258 259 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 260 assert(isByte(op1) && isByte(op2), "wrong opcode"); 261 assert((op1 & 0x01) == 1, "should be 32bit operation"); 262 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 263 if (is8bit(imm32)) { 264 emit_int8(op1 | 0x02); // set sign bit 265 emit_int8(op2 | encode(dst)); 266 emit_int8(imm32 & 0xFF); 267 } else { 268 emit_int8(op1); 269 emit_int8(op2 | encode(dst)); 270 emit_int32(imm32); 271 } 272 } 273 274 // Force generation of a 4 byte immediate value even if it fits into 8bit 275 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) { 276 assert(isByte(op1) && isByte(op2), "wrong opcode"); 277 assert((op1 & 0x01) == 1, "should be 32bit operation"); 278 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 279 emit_int8(op1); 280 emit_int8(op2 | encode(dst)); 281 emit_int32(imm32); 282 } 283 284 // immediate-to-memory forms 285 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 286 assert((op1 & 0x01) == 1, "should be 32bit operation"); 287 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 288 if (is8bit(imm32)) { 289 emit_int8(op1 | 0x02); // set sign bit 290 emit_operand(rm, adr, 1); 291 emit_int8(imm32 & 0xFF); 292 } else { 293 emit_int8(op1); 294 emit_operand(rm, adr, 4); 295 emit_int32(imm32); 296 } 297 } 298 299 300 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { 301 assert(isByte(op1) && isByte(op2), "wrong opcode"); 302 emit_int8(op1); 303 emit_int8(op2 | encode(dst) << 3 | encode(src)); 304 } 305 306 307 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len, 308 int cur_tuple_type, int in_size_in_bits, int cur_encoding) { 309 int mod_idx = 0; 310 // We will test if the displacement fits the compressed format and if so 311 // apply the compression to the displacment iff the result is8bit. 312 if (VM_Version::supports_evex() && is_evex_inst) { 313 switch (cur_tuple_type) { 314 case EVEX_FV: 315 if ((cur_encoding & VEX_W) == VEX_W) { 316 mod_idx += 2 + ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; 317 } else { 318 mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; 319 } 320 break; 321 322 case EVEX_HV: 323 mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; 324 break; 325 326 case EVEX_FVM: 327 break; 328 329 case EVEX_T1S: 330 switch (in_size_in_bits) { 331 case EVEX_8bit: 332 break; 333 334 case EVEX_16bit: 335 mod_idx = 1; 336 break; 337 338 case EVEX_32bit: 339 mod_idx = 2; 340 break; 341 342 case EVEX_64bit: 343 mod_idx = 3; 344 break; 345 } 346 break; 347 348 case EVEX_T1F: 349 case EVEX_T2: 350 case EVEX_T4: 351 mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0; 352 break; 353 354 case EVEX_T8: 355 break; 356 357 case EVEX_HVM: 358 break; 359 360 case EVEX_QVM: 361 break; 362 363 case EVEX_OVM: 364 break; 365 366 case EVEX_M128: 367 break; 368 369 case EVEX_DUP: 370 break; 371 372 default: 373 assert(0, "no valid evex tuple_table entry"); 374 break; 375 } 376 377 if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) { 378 int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len]; 379 if ((disp % disp_factor) == 0) { 380 int new_disp = disp / disp_factor; 381 if ((-0x80 <= new_disp && new_disp < 0x80)) { 382 disp = new_disp; 383 } 384 } else { 385 return false; 386 } 387 } 388 } 389 return (-0x80 <= disp && disp < 0x80); 390 } 391 392 393 bool Assembler::emit_compressed_disp_byte(int &disp) { 394 int mod_idx = 0; 395 // We will test if the displacement fits the compressed format and if so 396 // apply the compression to the displacment iff the result is8bit. 397 if (VM_Version::supports_evex() && _is_evex_instruction) { 398 switch (_tuple_type) { 399 case EVEX_FV: 400 if ((_evex_encoding & VEX_W) == VEX_W) { 401 mod_idx += 2 + ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; 402 } else { 403 mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; 404 } 405 break; 406 407 case EVEX_HV: 408 mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0; 409 break; 410 411 case EVEX_FVM: 412 break; 413 414 case EVEX_T1S: 415 switch (_input_size_in_bits) { 416 case EVEX_8bit: 417 break; 418 419 case EVEX_16bit: 420 mod_idx = 1; 421 break; 422 423 case EVEX_32bit: 424 mod_idx = 2; 425 break; 426 427 case EVEX_64bit: 428 mod_idx = 3; 429 break; 430 } 431 break; 432 433 case EVEX_T1F: 434 case EVEX_T2: 435 case EVEX_T4: 436 mod_idx = (_input_size_in_bits == EVEX_64bit) ? 1 : 0; 437 break; 438 439 case EVEX_T8: 440 break; 441 442 case EVEX_HVM: 443 break; 444 445 case EVEX_QVM: 446 break; 447 448 case EVEX_OVM: 449 break; 450 451 case EVEX_M128: 452 break; 453 454 case EVEX_DUP: 455 break; 456 457 default: 458 assert(0, "no valid evex tuple_table entry"); 459 break; 460 } 461 462 if (_avx_vector_len >= AVX_128bit && _avx_vector_len <= AVX_512bit) { 463 int disp_factor = tuple_table[_tuple_type + mod_idx][_avx_vector_len]; 464 if ((disp % disp_factor) == 0) { 465 int new_disp = disp / disp_factor; 466 if (is8bit(new_disp)) { 467 disp = new_disp; 468 } 469 } else { 470 return false; 471 } 472 } 473 } 474 return is8bit(disp); 475 } 476 477 478 void Assembler::emit_operand(Register reg, Register base, Register index, 479 Address::ScaleFactor scale, int disp, 480 RelocationHolder const& rspec, 481 int rip_relative_correction) { 482 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type(); 483 484 // Encode the registers as needed in the fields they are used in 485 486 int regenc = encode(reg) << 3; 487 int indexenc = index->is_valid() ? encode(index) << 3 : 0; 488 int baseenc = base->is_valid() ? encode(base) : 0; 489 490 if (base->is_valid()) { 491 if (index->is_valid()) { 492 assert(scale != Address::no_scale, "inconsistent address"); 493 // [base + index*scale + disp] 494 if (disp == 0 && rtype == relocInfo::none && 495 base != rbp LP64_ONLY(&& base != r13)) { 496 // [base + index*scale] 497 // [00 reg 100][ss index base] 498 assert(index != rsp, "illegal addressing mode"); 499 emit_int8(0x04 | regenc); 500 emit_int8(scale << 6 | indexenc | baseenc); 501 } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) { 502 // [base + index*scale + imm8] 503 // [01 reg 100][ss index base] imm8 504 assert(index != rsp, "illegal addressing mode"); 505 emit_int8(0x44 | regenc); 506 emit_int8(scale << 6 | indexenc | baseenc); 507 emit_int8(disp & 0xFF); 508 } else { 509 // [base + index*scale + disp32] 510 // [10 reg 100][ss index base] disp32 511 assert(index != rsp, "illegal addressing mode"); 512 emit_int8(0x84 | regenc); 513 emit_int8(scale << 6 | indexenc | baseenc); 514 emit_data(disp, rspec, disp32_operand); 515 } 516 } else if (base == rsp LP64_ONLY(|| base == r12)) { 517 // [rsp + disp] 518 if (disp == 0 && rtype == relocInfo::none) { 519 // [rsp] 520 // [00 reg 100][00 100 100] 521 emit_int8(0x04 | regenc); 522 emit_int8(0x24); 523 } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) { 524 // [rsp + imm8] 525 // [01 reg 100][00 100 100] disp8 526 emit_int8(0x44 | regenc); 527 emit_int8(0x24); 528 emit_int8(disp & 0xFF); 529 } else { 530 // [rsp + imm32] 531 // [10 reg 100][00 100 100] disp32 532 emit_int8(0x84 | regenc); 533 emit_int8(0x24); 534 emit_data(disp, rspec, disp32_operand); 535 } 536 } else { 537 // [base + disp] 538 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode"); 539 if (disp == 0 && rtype == relocInfo::none && 540 base != rbp LP64_ONLY(&& base != r13)) { 541 // [base] 542 // [00 reg base] 543 emit_int8(0x00 | regenc | baseenc); 544 } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) { 545 // [base + disp8] 546 // [01 reg base] disp8 547 emit_int8(0x40 | regenc | baseenc); 548 emit_int8(disp & 0xFF); 549 } else { 550 // [base + disp32] 551 // [10 reg base] disp32 552 emit_int8(0x80 | regenc | baseenc); 553 emit_data(disp, rspec, disp32_operand); 554 } 555 } 556 } else { 557 if (index->is_valid()) { 558 assert(scale != Address::no_scale, "inconsistent address"); 559 // [index*scale + disp] 560 // [00 reg 100][ss index 101] disp32 561 assert(index != rsp, "illegal addressing mode"); 562 emit_int8(0x04 | regenc); 563 emit_int8(scale << 6 | indexenc | 0x05); 564 emit_data(disp, rspec, disp32_operand); 565 } else if (rtype != relocInfo::none ) { 566 // [disp] (64bit) RIP-RELATIVE (32bit) abs 567 // [00 000 101] disp32 568 569 emit_int8(0x05 | regenc); 570 // Note that the RIP-rel. correction applies to the generated 571 // disp field, but _not_ to the target address in the rspec. 572 573 // disp was created by converting the target address minus the pc 574 // at the start of the instruction. That needs more correction here. 575 // intptr_t disp = target - next_ip; 576 assert(inst_mark() != NULL, "must be inside InstructionMark"); 577 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction; 578 int64_t adjusted = disp; 579 // Do rip-rel adjustment for 64bit 580 LP64_ONLY(adjusted -= (next_ip - inst_mark())); 581 assert(is_simm32(adjusted), 582 "must be 32bit offset (RIP relative address)"); 583 emit_data((int32_t) adjusted, rspec, disp32_operand); 584 585 } else { 586 // 32bit never did this, did everything as the rip-rel/disp code above 587 // [disp] ABSOLUTE 588 // [00 reg 100][00 100 101] disp32 589 emit_int8(0x04 | regenc); 590 emit_int8(0x25); 591 emit_data(disp, rspec, disp32_operand); 592 } 593 } 594 _is_evex_instruction = false; 595 } 596 597 void Assembler::emit_operand(XMMRegister reg, Register base, Register index, 598 Address::ScaleFactor scale, int disp, 599 RelocationHolder const& rspec) { 600 if (UseAVX > 2) { 601 int xreg_enc = reg->encoding(); 602 if (xreg_enc > 15) { 603 XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf); 604 emit_operand((Register)new_reg, base, index, scale, disp, rspec); 605 return; 606 } 607 } 608 emit_operand((Register)reg, base, index, scale, disp, rspec); 609 } 610 611 // Secret local extension to Assembler::WhichOperand: 612 #define end_pc_operand (_WhichOperand_limit) 613 614 address Assembler::locate_operand(address inst, WhichOperand which) { 615 // Decode the given instruction, and return the address of 616 // an embedded 32-bit operand word. 617 618 // If "which" is disp32_operand, selects the displacement portion 619 // of an effective address specifier. 620 // If "which" is imm64_operand, selects the trailing immediate constant. 621 // If "which" is call32_operand, selects the displacement of a call or jump. 622 // Caller is responsible for ensuring that there is such an operand, 623 // and that it is 32/64 bits wide. 624 625 // If "which" is end_pc_operand, find the end of the instruction. 626 627 address ip = inst; 628 bool is_64bit = false; 629 630 debug_only(bool has_disp32 = false); 631 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn 632 633 again_after_prefix: 634 switch (0xFF & *ip++) { 635 636 // These convenience macros generate groups of "case" labels for the switch. 637 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3 638 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \ 639 case (x)+4: case (x)+5: case (x)+6: case (x)+7 640 #define REP16(x) REP8((x)+0): \ 641 case REP8((x)+8) 642 643 case CS_segment: 644 case SS_segment: 645 case DS_segment: 646 case ES_segment: 647 case FS_segment: 648 case GS_segment: 649 // Seems dubious 650 LP64_ONLY(assert(false, "shouldn't have that prefix")); 651 assert(ip == inst+1, "only one prefix allowed"); 652 goto again_after_prefix; 653 654 case 0x67: 655 case REX: 656 case REX_B: 657 case REX_X: 658 case REX_XB: 659 case REX_R: 660 case REX_RB: 661 case REX_RX: 662 case REX_RXB: 663 NOT_LP64(assert(false, "64bit prefixes")); 664 goto again_after_prefix; 665 666 case REX_W: 667 case REX_WB: 668 case REX_WX: 669 case REX_WXB: 670 case REX_WR: 671 case REX_WRB: 672 case REX_WRX: 673 case REX_WRXB: 674 NOT_LP64(assert(false, "64bit prefixes")); 675 is_64bit = true; 676 goto again_after_prefix; 677 678 case 0xFF: // pushq a; decl a; incl a; call a; jmp a 679 case 0x88: // movb a, r 680 case 0x89: // movl a, r 681 case 0x8A: // movb r, a 682 case 0x8B: // movl r, a 683 case 0x8F: // popl a 684 debug_only(has_disp32 = true); 685 break; 686 687 case 0x68: // pushq #32 688 if (which == end_pc_operand) { 689 return ip + 4; 690 } 691 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate"); 692 return ip; // not produced by emit_operand 693 694 case 0x66: // movw ... (size prefix) 695 again_after_size_prefix2: 696 switch (0xFF & *ip++) { 697 case REX: 698 case REX_B: 699 case REX_X: 700 case REX_XB: 701 case REX_R: 702 case REX_RB: 703 case REX_RX: 704 case REX_RXB: 705 case REX_W: 706 case REX_WB: 707 case REX_WX: 708 case REX_WXB: 709 case REX_WR: 710 case REX_WRB: 711 case REX_WRX: 712 case REX_WRXB: 713 NOT_LP64(assert(false, "64bit prefix found")); 714 goto again_after_size_prefix2; 715 case 0x8B: // movw r, a 716 case 0x89: // movw a, r 717 debug_only(has_disp32 = true); 718 break; 719 case 0xC7: // movw a, #16 720 debug_only(has_disp32 = true); 721 tail_size = 2; // the imm16 722 break; 723 case 0x0F: // several SSE/SSE2 variants 724 ip--; // reparse the 0x0F 725 goto again_after_prefix; 726 default: 727 ShouldNotReachHere(); 728 } 729 break; 730 731 case REP8(0xB8): // movl/q r, #32/#64(oop?) 732 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4); 733 // these asserts are somewhat nonsensical 734 #ifndef _LP64 735 assert(which == imm_operand || which == disp32_operand, 736 err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip))); 737 #else 738 assert((which == call32_operand || which == imm_operand) && is_64bit || 739 which == narrow_oop_operand && !is_64bit, 740 err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip))); 741 #endif // _LP64 742 return ip; 743 744 case 0x69: // imul r, a, #32 745 case 0xC7: // movl a, #32(oop?) 746 tail_size = 4; 747 debug_only(has_disp32 = true); // has both kinds of operands! 748 break; 749 750 case 0x0F: // movx..., etc. 751 switch (0xFF & *ip++) { 752 case 0x3A: // pcmpestri 753 tail_size = 1; 754 case 0x38: // ptest, pmovzxbw 755 ip++; // skip opcode 756 debug_only(has_disp32 = true); // has both kinds of operands! 757 break; 758 759 case 0x70: // pshufd r, r/a, #8 760 debug_only(has_disp32 = true); // has both kinds of operands! 761 case 0x73: // psrldq r, #8 762 tail_size = 1; 763 break; 764 765 case 0x12: // movlps 766 case 0x28: // movaps 767 case 0x2E: // ucomiss 768 case 0x2F: // comiss 769 case 0x54: // andps 770 case 0x55: // andnps 771 case 0x56: // orps 772 case 0x57: // xorps 773 case 0x6E: // movd 774 case 0x7E: // movd 775 case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush 776 debug_only(has_disp32 = true); 777 break; 778 779 case 0xAD: // shrd r, a, %cl 780 case 0xAF: // imul r, a 781 case 0xBE: // movsbl r, a (movsxb) 782 case 0xBF: // movswl r, a (movsxw) 783 case 0xB6: // movzbl r, a (movzxb) 784 case 0xB7: // movzwl r, a (movzxw) 785 case REP16(0x40): // cmovl cc, r, a 786 case 0xB0: // cmpxchgb 787 case 0xB1: // cmpxchg 788 case 0xC1: // xaddl 789 case 0xC7: // cmpxchg8 790 case REP16(0x90): // setcc a 791 debug_only(has_disp32 = true); 792 // fall out of the switch to decode the address 793 break; 794 795 case 0xC4: // pinsrw r, a, #8 796 debug_only(has_disp32 = true); 797 case 0xC5: // pextrw r, r, #8 798 tail_size = 1; // the imm8 799 break; 800 801 case 0xAC: // shrd r, a, #8 802 debug_only(has_disp32 = true); 803 tail_size = 1; // the imm8 804 break; 805 806 case REP16(0x80): // jcc rdisp32 807 if (which == end_pc_operand) return ip + 4; 808 assert(which == call32_operand, "jcc has no disp32 or imm"); 809 return ip; 810 default: 811 ShouldNotReachHere(); 812 } 813 break; 814 815 case 0x81: // addl a, #32; addl r, #32 816 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 817 // on 32bit in the case of cmpl, the imm might be an oop 818 tail_size = 4; 819 debug_only(has_disp32 = true); // has both kinds of operands! 820 break; 821 822 case 0x83: // addl a, #8; addl r, #8 823 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 824 debug_only(has_disp32 = true); // has both kinds of operands! 825 tail_size = 1; 826 break; 827 828 case 0x9B: 829 switch (0xFF & *ip++) { 830 case 0xD9: // fnstcw a 831 debug_only(has_disp32 = true); 832 break; 833 default: 834 ShouldNotReachHere(); 835 } 836 break; 837 838 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a 839 case REP4(0x10): // adc... 840 case REP4(0x20): // and... 841 case REP4(0x30): // xor... 842 case REP4(0x08): // or... 843 case REP4(0x18): // sbb... 844 case REP4(0x28): // sub... 845 case 0xF7: // mull a 846 case 0x8D: // lea r, a 847 case 0x87: // xchg r, a 848 case REP4(0x38): // cmp... 849 case 0x85: // test r, a 850 debug_only(has_disp32 = true); // has both kinds of operands! 851 break; 852 853 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8 854 case 0xC6: // movb a, #8 855 case 0x80: // cmpb a, #8 856 case 0x6B: // imul r, a, #8 857 debug_only(has_disp32 = true); // has both kinds of operands! 858 tail_size = 1; // the imm8 859 break; 860 861 case 0xC4: // VEX_3bytes 862 case 0xC5: // VEX_2bytes 863 assert((UseAVX > 0), "shouldn't have VEX prefix"); 864 assert(ip == inst+1, "no prefixes allowed"); 865 // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions 866 // but they have prefix 0x0F and processed when 0x0F processed above. 867 // 868 // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES 869 // instructions (these instructions are not supported in 64-bit mode). 870 // To distinguish them bits [7:6] are set in the VEX second byte since 871 // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set 872 // those VEX bits REX and vvvv bits are inverted. 873 // 874 // Fortunately C2 doesn't generate these instructions so we don't need 875 // to check for them in product version. 876 877 // Check second byte 878 NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions")); 879 880 // First byte 881 if ((0xFF & *inst) == VEX_3bytes) { 882 ip++; // third byte 883 is_64bit = ((VEX_W & *ip) == VEX_W); 884 } 885 ip++; // opcode 886 // To find the end of instruction (which == end_pc_operand). 887 switch (0xFF & *ip) { 888 case 0x61: // pcmpestri r, r/a, #8 889 case 0x70: // pshufd r, r/a, #8 890 case 0x73: // psrldq r, #8 891 tail_size = 1; // the imm8 892 break; 893 default: 894 break; 895 } 896 ip++; // skip opcode 897 debug_only(has_disp32 = true); // has both kinds of operands! 898 break; 899 900 case 0x62: // EVEX_4bytes 901 assert((UseAVX > 0), "shouldn't have EVEX prefix"); 902 assert(ip == inst+1, "no prefixes allowed"); 903 // no EVEX collisions, all instructions that have 0x62 opcodes 904 // have EVEX versions and are subopcodes of 0x66 905 ip++; // skip P0 and exmaine W in P1 906 is_64bit = ((VEX_W & *ip) == VEX_W); 907 ip++; // move to P2 908 ip++; // skip P2, move to opcode 909 // To find the end of instruction (which == end_pc_operand). 910 switch (0xFF & *ip) { 911 case 0x61: // pcmpestri r, r/a, #8 912 case 0x70: // pshufd r, r/a, #8 913 case 0x73: // psrldq r, #8 914 tail_size = 1; // the imm8 915 break; 916 default: 917 break; 918 } 919 ip++; // skip opcode 920 debug_only(has_disp32 = true); // has both kinds of operands! 921 break; 922 923 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 924 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 925 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a 926 case 0xDD: // fld_d a; fst_d a; fstp_d a 927 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a 928 case 0xDF: // fild_d a; fistp_d a 929 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a 930 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a 931 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a 932 debug_only(has_disp32 = true); 933 break; 934 935 case 0xE8: // call rdisp32 936 case 0xE9: // jmp rdisp32 937 if (which == end_pc_operand) return ip + 4; 938 assert(which == call32_operand, "call has no disp32 or imm"); 939 return ip; 940 941 case 0xF0: // Lock 942 assert(os::is_MP(), "only on MP"); 943 goto again_after_prefix; 944 945 case 0xF3: // For SSE 946 case 0xF2: // For SSE2 947 switch (0xFF & *ip++) { 948 case REX: 949 case REX_B: 950 case REX_X: 951 case REX_XB: 952 case REX_R: 953 case REX_RB: 954 case REX_RX: 955 case REX_RXB: 956 case REX_W: 957 case REX_WB: 958 case REX_WX: 959 case REX_WXB: 960 case REX_WR: 961 case REX_WRB: 962 case REX_WRX: 963 case REX_WRXB: 964 NOT_LP64(assert(false, "found 64bit prefix")); 965 ip++; 966 default: 967 ip++; 968 } 969 debug_only(has_disp32 = true); // has both kinds of operands! 970 break; 971 972 default: 973 ShouldNotReachHere(); 974 975 #undef REP8 976 #undef REP16 977 } 978 979 assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); 980 #ifdef _LP64 981 assert(which != imm_operand, "instruction is not a movq reg, imm64"); 982 #else 983 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field"); 984 assert(which != imm_operand || has_disp32, "instruction has no imm32 field"); 985 #endif // LP64 986 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field"); 987 988 // parse the output of emit_operand 989 int op2 = 0xFF & *ip++; 990 int base = op2 & 0x07; 991 int op3 = -1; 992 const int b100 = 4; 993 const int b101 = 5; 994 if (base == b100 && (op2 >> 6) != 3) { 995 op3 = 0xFF & *ip++; 996 base = op3 & 0x07; // refetch the base 997 } 998 // now ip points at the disp (if any) 999 1000 switch (op2 >> 6) { 1001 case 0: 1002 // [00 reg 100][ss index base] 1003 // [00 reg 100][00 100 esp] 1004 // [00 reg base] 1005 // [00 reg 100][ss index 101][disp32] 1006 // [00 reg 101] [disp32] 1007 1008 if (base == b101) { 1009 if (which == disp32_operand) 1010 return ip; // caller wants the disp32 1011 ip += 4; // skip the disp32 1012 } 1013 break; 1014 1015 case 1: 1016 // [01 reg 100][ss index base][disp8] 1017 // [01 reg 100][00 100 esp][disp8] 1018 // [01 reg base] [disp8] 1019 ip += 1; // skip the disp8 1020 break; 1021 1022 case 2: 1023 // [10 reg 100][ss index base][disp32] 1024 // [10 reg 100][00 100 esp][disp32] 1025 // [10 reg base] [disp32] 1026 if (which == disp32_operand) 1027 return ip; // caller wants the disp32 1028 ip += 4; // skip the disp32 1029 break; 1030 1031 case 3: 1032 // [11 reg base] (not a memory addressing mode) 1033 break; 1034 } 1035 1036 if (which == end_pc_operand) { 1037 return ip + tail_size; 1038 } 1039 1040 #ifdef _LP64 1041 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32"); 1042 #else 1043 assert(which == imm_operand, "instruction has only an imm field"); 1044 #endif // LP64 1045 return ip; 1046 } 1047 1048 address Assembler::locate_next_instruction(address inst) { 1049 // Secretly share code with locate_operand: 1050 return locate_operand(inst, end_pc_operand); 1051 } 1052 1053 1054 #ifdef ASSERT 1055 void Assembler::check_relocation(RelocationHolder const& rspec, int format) { 1056 address inst = inst_mark(); 1057 assert(inst != NULL && inst < pc(), "must point to beginning of instruction"); 1058 address opnd; 1059 1060 Relocation* r = rspec.reloc(); 1061 if (r->type() == relocInfo::none) { 1062 return; 1063 } else if (r->is_call() || format == call32_operand) { 1064 // assert(format == imm32_operand, "cannot specify a nonzero format"); 1065 opnd = locate_operand(inst, call32_operand); 1066 } else if (r->is_data()) { 1067 assert(format == imm_operand || format == disp32_operand 1068 LP64_ONLY(|| format == narrow_oop_operand), "format ok"); 1069 opnd = locate_operand(inst, (WhichOperand)format); 1070 } else { 1071 assert(format == imm_operand, "cannot specify a format"); 1072 return; 1073 } 1074 assert(opnd == pc(), "must put operand where relocs can find it"); 1075 } 1076 #endif // ASSERT 1077 1078 void Assembler::emit_operand32(Register reg, Address adr) { 1079 assert(reg->encoding() < 8, "no extended registers"); 1080 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 1081 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 1082 adr._rspec); 1083 } 1084 1085 void Assembler::emit_operand(Register reg, Address adr, 1086 int rip_relative_correction) { 1087 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 1088 adr._rspec, 1089 rip_relative_correction); 1090 } 1091 1092 void Assembler::emit_operand(XMMRegister reg, Address adr) { 1093 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 1094 adr._rspec); 1095 } 1096 1097 // MMX operations 1098 void Assembler::emit_operand(MMXRegister reg, Address adr) { 1099 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 1100 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 1101 } 1102 1103 // work around gcc (3.2.1-7a) bug 1104 void Assembler::emit_operand(Address adr, MMXRegister reg) { 1105 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 1106 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 1107 } 1108 1109 1110 void Assembler::emit_farith(int b1, int b2, int i) { 1111 assert(isByte(b1) && isByte(b2), "wrong opcode"); 1112 assert(0 <= i && i < 8, "illegal stack offset"); 1113 emit_int8(b1); 1114 emit_int8(b2 + i); 1115 } 1116 1117 1118 // Now the Assembler instructions (identical for 32/64 bits) 1119 1120 void Assembler::adcl(Address dst, int32_t imm32) { 1121 InstructionMark im(this); 1122 prefix(dst); 1123 emit_arith_operand(0x81, rdx, dst, imm32); 1124 } 1125 1126 void Assembler::adcl(Address dst, Register src) { 1127 InstructionMark im(this); 1128 prefix(dst, src); 1129 emit_int8(0x11); 1130 emit_operand(src, dst); 1131 } 1132 1133 void Assembler::adcl(Register dst, int32_t imm32) { 1134 prefix(dst); 1135 emit_arith(0x81, 0xD0, dst, imm32); 1136 } 1137 1138 void Assembler::adcl(Register dst, Address src) { 1139 InstructionMark im(this); 1140 prefix(src, dst); 1141 emit_int8(0x13); 1142 emit_operand(dst, src); 1143 } 1144 1145 void Assembler::adcl(Register dst, Register src) { 1146 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1147 emit_arith(0x13, 0xC0, dst, src); 1148 } 1149 1150 void Assembler::addl(Address dst, int32_t imm32) { 1151 InstructionMark im(this); 1152 prefix(dst); 1153 emit_arith_operand(0x81, rax, dst, imm32); 1154 } 1155 1156 void Assembler::addl(Address dst, Register src) { 1157 InstructionMark im(this); 1158 prefix(dst, src); 1159 emit_int8(0x01); 1160 emit_operand(src, dst); 1161 } 1162 1163 void Assembler::addl(Register dst, int32_t imm32) { 1164 prefix(dst); 1165 emit_arith(0x81, 0xC0, dst, imm32); 1166 } 1167 1168 void Assembler::addl(Register dst, Address src) { 1169 InstructionMark im(this); 1170 prefix(src, dst); 1171 emit_int8(0x03); 1172 emit_operand(dst, src); 1173 } 1174 1175 void Assembler::addl(Register dst, Register src) { 1176 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1177 emit_arith(0x03, 0xC0, dst, src); 1178 } 1179 1180 void Assembler::addr_nop_4() { 1181 assert(UseAddressNop, "no CPU support"); 1182 // 4 bytes: NOP DWORD PTR [EAX+0] 1183 emit_int8(0x0F); 1184 emit_int8(0x1F); 1185 emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 1186 emit_int8(0); // 8-bits offset (1 byte) 1187 } 1188 1189 void Assembler::addr_nop_5() { 1190 assert(UseAddressNop, "no CPU support"); 1191 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 1192 emit_int8(0x0F); 1193 emit_int8(0x1F); 1194 emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 1195 emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 1196 emit_int8(0); // 8-bits offset (1 byte) 1197 } 1198 1199 void Assembler::addr_nop_7() { 1200 assert(UseAddressNop, "no CPU support"); 1201 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 1202 emit_int8(0x0F); 1203 emit_int8(0x1F); 1204 emit_int8((unsigned char)0x80); 1205 // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 1206 emit_int32(0); // 32-bits offset (4 bytes) 1207 } 1208 1209 void Assembler::addr_nop_8() { 1210 assert(UseAddressNop, "no CPU support"); 1211 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 1212 emit_int8(0x0F); 1213 emit_int8(0x1F); 1214 emit_int8((unsigned char)0x84); 1215 // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 1216 emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 1217 emit_int32(0); // 32-bits offset (4 bytes) 1218 } 1219 1220 void Assembler::addsd(XMMRegister dst, XMMRegister src) { 1221 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1222 if (VM_Version::supports_evex()) { 1223 emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2); 1224 } else { 1225 emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); 1226 } 1227 } 1228 1229 void Assembler::addsd(XMMRegister dst, Address src) { 1230 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1231 if (VM_Version::supports_evex()) { 1232 _tuple_type = EVEX_T1S; 1233 _input_size_in_bits = EVEX_64bit; 1234 emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2); 1235 } else { 1236 emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); 1237 } 1238 } 1239 1240 void Assembler::addss(XMMRegister dst, XMMRegister src) { 1241 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1242 emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); 1243 } 1244 1245 void Assembler::addss(XMMRegister dst, Address src) { 1246 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1247 if (VM_Version::supports_evex()) { 1248 _tuple_type = EVEX_T1S; 1249 _input_size_in_bits = EVEX_32bit; 1250 } 1251 emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); 1252 } 1253 1254 void Assembler::aesdec(XMMRegister dst, Address src) { 1255 assert(VM_Version::supports_aes(), ""); 1256 InstructionMark im(this); 1257 simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, 1258 VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); 1259 emit_int8((unsigned char)0xDE); 1260 emit_operand(dst, src); 1261 } 1262 1263 void Assembler::aesdec(XMMRegister dst, XMMRegister src) { 1264 assert(VM_Version::supports_aes(), ""); 1265 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, 1266 VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); 1267 emit_int8((unsigned char)0xDE); 1268 emit_int8(0xC0 | encode); 1269 } 1270 1271 void Assembler::aesdeclast(XMMRegister dst, Address src) { 1272 assert(VM_Version::supports_aes(), ""); 1273 InstructionMark im(this); 1274 simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, 1275 VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); 1276 emit_int8((unsigned char)0xDF); 1277 emit_operand(dst, src); 1278 } 1279 1280 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) { 1281 assert(VM_Version::supports_aes(), ""); 1282 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, 1283 VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); 1284 emit_int8((unsigned char)0xDF); 1285 emit_int8((unsigned char)(0xC0 | encode)); 1286 } 1287 1288 void Assembler::aesenc(XMMRegister dst, Address src) { 1289 assert(VM_Version::supports_aes(), ""); 1290 InstructionMark im(this); 1291 simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, 1292 VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); 1293 emit_int8((unsigned char)0xDC); 1294 emit_operand(dst, src); 1295 } 1296 1297 void Assembler::aesenc(XMMRegister dst, XMMRegister src) { 1298 assert(VM_Version::supports_aes(), ""); 1299 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, 1300 VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); 1301 emit_int8((unsigned char)0xDC); 1302 emit_int8(0xC0 | encode); 1303 } 1304 1305 void Assembler::aesenclast(XMMRegister dst, Address src) { 1306 assert(VM_Version::supports_aes(), ""); 1307 InstructionMark im(this); 1308 simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, 1309 VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); 1310 emit_int8((unsigned char)0xDD); 1311 emit_operand(dst, src); 1312 } 1313 1314 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) { 1315 assert(VM_Version::supports_aes(), ""); 1316 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, 1317 VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); 1318 emit_int8((unsigned char)0xDD); 1319 emit_int8((unsigned char)(0xC0 | encode)); 1320 } 1321 1322 void Assembler::andl(Address dst, int32_t imm32) { 1323 InstructionMark im(this); 1324 prefix(dst); 1325 emit_int8((unsigned char)0x81); 1326 emit_operand(rsp, dst, 4); 1327 emit_int32(imm32); 1328 } 1329 1330 void Assembler::andl(Register dst, int32_t imm32) { 1331 prefix(dst); 1332 emit_arith(0x81, 0xE0, dst, imm32); 1333 } 1334 1335 void Assembler::andl(Register dst, Address src) { 1336 InstructionMark im(this); 1337 prefix(src, dst); 1338 emit_int8(0x23); 1339 emit_operand(dst, src); 1340 } 1341 1342 void Assembler::andl(Register dst, Register src) { 1343 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1344 emit_arith(0x23, 0xC0, dst, src); 1345 } 1346 1347 void Assembler::andnl(Register dst, Register src1, Register src2) { 1348 assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1349 int encode = vex_prefix_0F38_and_encode_legacy(dst, src1, src2); 1350 emit_int8((unsigned char)0xF2); 1351 emit_int8((unsigned char)(0xC0 | encode)); 1352 } 1353 1354 void Assembler::andnl(Register dst, Register src1, Address src2) { 1355 InstructionMark im(this); 1356 assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1357 vex_prefix_0F38_legacy(dst, src1, src2); 1358 emit_int8((unsigned char)0xF2); 1359 emit_operand(dst, src2); 1360 } 1361 1362 void Assembler::bsfl(Register dst, Register src) { 1363 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1364 emit_int8(0x0F); 1365 emit_int8((unsigned char)0xBC); 1366 emit_int8((unsigned char)(0xC0 | encode)); 1367 } 1368 1369 void Assembler::bsrl(Register dst, Register src) { 1370 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1371 emit_int8(0x0F); 1372 emit_int8((unsigned char)0xBD); 1373 emit_int8((unsigned char)(0xC0 | encode)); 1374 } 1375 1376 void Assembler::bswapl(Register reg) { // bswap 1377 int encode = prefix_and_encode(reg->encoding()); 1378 emit_int8(0x0F); 1379 emit_int8((unsigned char)(0xC8 | encode)); 1380 } 1381 1382 void Assembler::blsil(Register dst, Register src) { 1383 assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1384 int encode = vex_prefix_0F38_and_encode_legacy(rbx, dst, src); 1385 emit_int8((unsigned char)0xF3); 1386 emit_int8((unsigned char)(0xC0 | encode)); 1387 } 1388 1389 void Assembler::blsil(Register dst, Address src) { 1390 InstructionMark im(this); 1391 assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1392 vex_prefix_0F38_legacy(rbx, dst, src); 1393 emit_int8((unsigned char)0xF3); 1394 emit_operand(rbx, src); 1395 } 1396 1397 void Assembler::blsmskl(Register dst, Register src) { 1398 assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1399 int encode = vex_prefix_0F38_and_encode_legacy(rdx, dst, src); 1400 emit_int8((unsigned char)0xF3); 1401 emit_int8((unsigned char)(0xC0 | encode)); 1402 } 1403 1404 void Assembler::blsmskl(Register dst, Address src) { 1405 InstructionMark im(this); 1406 assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1407 vex_prefix_0F38_legacy(rdx, dst, src); 1408 emit_int8((unsigned char)0xF3); 1409 emit_operand(rdx, src); 1410 } 1411 1412 void Assembler::blsrl(Register dst, Register src) { 1413 assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1414 int encode = vex_prefix_0F38_and_encode_legacy(rcx, dst, src); 1415 emit_int8((unsigned char)0xF3); 1416 emit_int8((unsigned char)(0xC0 | encode)); 1417 } 1418 1419 void Assembler::blsrl(Register dst, Address src) { 1420 InstructionMark im(this); 1421 assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 1422 vex_prefix_0F38_legacy(rcx, dst, src); 1423 emit_int8((unsigned char)0xF3); 1424 emit_operand(rcx, src); 1425 } 1426 1427 void Assembler::call(Label& L, relocInfo::relocType rtype) { 1428 // suspect disp32 is always good 1429 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); 1430 1431 if (L.is_bound()) { 1432 const int long_size = 5; 1433 int offs = (int)( target(L) - pc() ); 1434 assert(offs <= 0, "assembler error"); 1435 InstructionMark im(this); 1436 // 1110 1000 #32-bit disp 1437 emit_int8((unsigned char)0xE8); 1438 emit_data(offs - long_size, rtype, operand); 1439 } else { 1440 InstructionMark im(this); 1441 // 1110 1000 #32-bit disp 1442 L.add_patch_at(code(), locator()); 1443 1444 emit_int8((unsigned char)0xE8); 1445 emit_data(int(0), rtype, operand); 1446 } 1447 } 1448 1449 void Assembler::call(Register dst) { 1450 int encode = prefix_and_encode(dst->encoding()); 1451 emit_int8((unsigned char)0xFF); 1452 emit_int8((unsigned char)(0xD0 | encode)); 1453 } 1454 1455 1456 void Assembler::call(Address adr) { 1457 InstructionMark im(this); 1458 prefix(adr); 1459 emit_int8((unsigned char)0xFF); 1460 emit_operand(rdx, adr); 1461 } 1462 1463 void Assembler::call_literal(address entry, RelocationHolder const& rspec) { 1464 assert(entry != NULL, "call most probably wrong"); 1465 InstructionMark im(this); 1466 emit_int8((unsigned char)0xE8); 1467 intptr_t disp = entry - (pc() + sizeof(int32_t)); 1468 assert(is_simm32(disp), "must be 32bit offset (call2)"); 1469 // Technically, should use call32_operand, but this format is 1470 // implied by the fact that we're emitting a call instruction. 1471 1472 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand); 1473 emit_data((int) disp, rspec, operand); 1474 } 1475 1476 void Assembler::cdql() { 1477 emit_int8((unsigned char)0x99); 1478 } 1479 1480 void Assembler::cld() { 1481 emit_int8((unsigned char)0xFC); 1482 } 1483 1484 void Assembler::cmovl(Condition cc, Register dst, Register src) { 1485 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1486 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1487 emit_int8(0x0F); 1488 emit_int8(0x40 | cc); 1489 emit_int8((unsigned char)(0xC0 | encode)); 1490 } 1491 1492 1493 void Assembler::cmovl(Condition cc, Register dst, Address src) { 1494 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1495 prefix(src, dst); 1496 emit_int8(0x0F); 1497 emit_int8(0x40 | cc); 1498 emit_operand(dst, src); 1499 } 1500 1501 void Assembler::cmpb(Address dst, int imm8) { 1502 InstructionMark im(this); 1503 prefix(dst); 1504 emit_int8((unsigned char)0x80); 1505 emit_operand(rdi, dst, 1); 1506 emit_int8(imm8); 1507 } 1508 1509 void Assembler::cmpl(Address dst, int32_t imm32) { 1510 InstructionMark im(this); 1511 prefix(dst); 1512 emit_int8((unsigned char)0x81); 1513 emit_operand(rdi, dst, 4); 1514 emit_int32(imm32); 1515 } 1516 1517 void Assembler::cmpl(Register dst, int32_t imm32) { 1518 prefix(dst); 1519 emit_arith(0x81, 0xF8, dst, imm32); 1520 } 1521 1522 void Assembler::cmpl(Register dst, Register src) { 1523 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1524 emit_arith(0x3B, 0xC0, dst, src); 1525 } 1526 1527 1528 void Assembler::cmpl(Register dst, Address src) { 1529 InstructionMark im(this); 1530 prefix(src, dst); 1531 emit_int8((unsigned char)0x3B); 1532 emit_operand(dst, src); 1533 } 1534 1535 void Assembler::cmpw(Address dst, int imm16) { 1536 InstructionMark im(this); 1537 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers"); 1538 emit_int8(0x66); 1539 emit_int8((unsigned char)0x81); 1540 emit_operand(rdi, dst, 2); 1541 emit_int16(imm16); 1542 } 1543 1544 // The 32-bit cmpxchg compares the value at adr with the contents of rax, 1545 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1546 // The ZF is set if the compared values were equal, and cleared otherwise. 1547 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg 1548 InstructionMark im(this); 1549 prefix(adr, reg); 1550 emit_int8(0x0F); 1551 emit_int8((unsigned char)0xB1); 1552 emit_operand(reg, adr); 1553 } 1554 1555 // The 8-bit cmpxchg compares the value at adr with the contents of rax, 1556 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1557 // The ZF is set if the compared values were equal, and cleared otherwise. 1558 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg 1559 InstructionMark im(this); 1560 prefix(adr, reg, true); 1561 emit_int8(0x0F); 1562 emit_int8((unsigned char)0xB0); 1563 emit_operand(reg, adr); 1564 } 1565 1566 void Assembler::comisd(XMMRegister dst, Address src) { 1567 // NOTE: dbx seems to decode this as comiss even though the 1568 // 0x66 is there. Strangly ucomisd comes out correct 1569 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1570 if (VM_Version::supports_evex()) { 1571 _tuple_type = EVEX_T1S; 1572 _input_size_in_bits = EVEX_64bit; 1573 emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); 1574 } else { 1575 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); 1576 } 1577 } 1578 1579 void Assembler::comisd(XMMRegister dst, XMMRegister src) { 1580 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1581 if (VM_Version::supports_evex()) { 1582 emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); 1583 } else { 1584 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); 1585 } 1586 } 1587 1588 void Assembler::comiss(XMMRegister dst, Address src) { 1589 if (VM_Version::supports_evex()) { 1590 _tuple_type = EVEX_T1S; 1591 _input_size_in_bits = EVEX_32bit; 1592 } 1593 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1594 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true); 1595 } 1596 1597 void Assembler::comiss(XMMRegister dst, XMMRegister src) { 1598 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1599 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true); 1600 } 1601 1602 void Assembler::cpuid() { 1603 emit_int8(0x0F); 1604 emit_int8((unsigned char)0xA2); 1605 } 1606 1607 // Opcode / Instruction Op / En 64 - Bit Mode Compat / Leg Mode Description Implemented 1608 // F2 0F 38 F0 / r CRC32 r32, r / m8 RM Valid Valid Accumulate CRC32 on r / m8. v 1609 // F2 REX 0F 38 F0 / r CRC32 r32, r / m8* RM Valid N.E. Accumulate CRC32 on r / m8. - 1610 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8 RM Valid N.E. Accumulate CRC32 on r / m8. - 1611 // 1612 // F2 0F 38 F1 / r CRC32 r32, r / m16 RM Valid Valid Accumulate CRC32 on r / m16. v 1613 // 1614 // F2 0F 38 F1 / r CRC32 r32, r / m32 RM Valid Valid Accumulate CRC32 on r / m32. v 1615 // 1616 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64 RM Valid N.E. Accumulate CRC32 on r / m64. v 1617 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) { 1618 assert(VM_Version::supports_sse4_2(), ""); 1619 int8_t w = 0x01; 1620 Prefix p = Prefix_EMPTY; 1621 1622 emit_int8((int8_t)0xF2); 1623 switch (sizeInBytes) { 1624 case 1: 1625 w = 0; 1626 break; 1627 case 2: 1628 case 4: 1629 break; 1630 LP64_ONLY(case 8:) 1631 // This instruction is not valid in 32 bits 1632 // Note: 1633 // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf 1634 // 1635 // Page B - 72 Vol. 2C says 1636 // qwreg2 to qwreg 1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2 1637 // mem64 to qwreg 1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m 1638 // F0!!! 1639 // while 3 - 208 Vol. 2A 1640 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64 RM Valid N.E.Accumulate CRC32 on r / m64. 1641 // 1642 // the 0 on a last bit is reserved for a different flavor of this instruction : 1643 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8 RM Valid N.E.Accumulate CRC32 on r / m8. 1644 p = REX_W; 1645 break; 1646 default: 1647 assert(0, "Unsupported value for a sizeInBytes argument"); 1648 break; 1649 } 1650 LP64_ONLY(prefix(crc, v, p);) 1651 emit_int8((int8_t)0x0F); 1652 emit_int8(0x38); 1653 emit_int8((int8_t)(0xF0 | w)); 1654 emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7)); 1655 } 1656 1657 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) { 1658 assert(VM_Version::supports_sse4_2(), ""); 1659 InstructionMark im(this); 1660 int8_t w = 0x01; 1661 Prefix p = Prefix_EMPTY; 1662 1663 emit_int8((int8_t)0xF2); 1664 switch (sizeInBytes) { 1665 case 1: 1666 w = 0; 1667 break; 1668 case 2: 1669 case 4: 1670 break; 1671 LP64_ONLY(case 8:) 1672 // This instruction is not valid in 32 bits 1673 p = REX_W; 1674 break; 1675 default: 1676 assert(0, "Unsupported value for a sizeInBytes argument"); 1677 break; 1678 } 1679 LP64_ONLY(prefix(crc, adr, p);) 1680 emit_int8((int8_t)0x0F); 1681 emit_int8(0x38); 1682 emit_int8((int8_t)(0xF0 | w)); 1683 emit_operand(crc, adr); 1684 } 1685 1686 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 1687 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1688 emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, /* legacy_mode */ true); 1689 } 1690 1691 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 1692 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1693 emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ true); 1694 } 1695 1696 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 1697 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1698 if (VM_Version::supports_evex()) { 1699 emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2); 1700 } else { 1701 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); 1702 } 1703 } 1704 1705 void Assembler::cvtsd2ss(XMMRegister dst, Address src) { 1706 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1707 if (VM_Version::supports_evex()) { 1708 _tuple_type = EVEX_T1F; 1709 _input_size_in_bits = EVEX_64bit; 1710 emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2); 1711 } else { 1712 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); 1713 } 1714 } 1715 1716 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { 1717 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1718 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VM_Version::supports_evex()); 1719 emit_int8(0x2A); 1720 emit_int8((unsigned char)(0xC0 | encode)); 1721 } 1722 1723 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { 1724 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1725 if (VM_Version::supports_evex()) { 1726 _tuple_type = EVEX_T1S; 1727 _input_size_in_bits = EVEX_32bit; 1728 emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true); 1729 } else { 1730 emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2); 1731 } 1732 } 1733 1734 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { 1735 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1736 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); 1737 emit_int8(0x2A); 1738 emit_int8((unsigned char)(0xC0 | encode)); 1739 } 1740 1741 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { 1742 if (VM_Version::supports_evex()) { 1743 _tuple_type = EVEX_T1S; 1744 _input_size_in_bits = EVEX_32bit; 1745 } 1746 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1747 emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); 1748 } 1749 1750 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { 1751 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1752 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); 1753 emit_int8(0x2A); 1754 emit_int8((unsigned char)(0xC0 | encode)); 1755 } 1756 1757 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { 1758 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1759 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); 1760 } 1761 1762 void Assembler::cvtss2sd(XMMRegister dst, Address src) { 1763 if (VM_Version::supports_evex()) { 1764 _tuple_type = EVEX_T1S; 1765 _input_size_in_bits = EVEX_32bit; 1766 } 1767 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1768 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); 1769 } 1770 1771 1772 void Assembler::cvttsd2sil(Register dst, XMMRegister src) { 1773 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1774 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true); 1775 emit_int8(0x2C); 1776 emit_int8((unsigned char)(0xC0 | encode)); 1777 } 1778 1779 void Assembler::cvttss2sil(Register dst, XMMRegister src) { 1780 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1781 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true); 1782 emit_int8(0x2C); 1783 emit_int8((unsigned char)(0xC0 | encode)); 1784 } 1785 1786 void Assembler::decl(Address dst) { 1787 // Don't use it directly. Use MacroAssembler::decrement() instead. 1788 InstructionMark im(this); 1789 prefix(dst); 1790 emit_int8((unsigned char)0xFF); 1791 emit_operand(rcx, dst); 1792 } 1793 1794 void Assembler::divsd(XMMRegister dst, Address src) { 1795 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1796 if (VM_Version::supports_evex()) { 1797 _tuple_type = EVEX_T1S; 1798 _input_size_in_bits = EVEX_64bit; 1799 emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2); 1800 } else { 1801 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); 1802 } 1803 } 1804 1805 void Assembler::divsd(XMMRegister dst, XMMRegister src) { 1806 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1807 if (VM_Version::supports_evex()) { 1808 emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2); 1809 } else { 1810 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); 1811 } 1812 } 1813 1814 void Assembler::divss(XMMRegister dst, Address src) { 1815 if (VM_Version::supports_evex()) { 1816 _tuple_type = EVEX_T1S; 1817 _input_size_in_bits = EVEX_32bit; 1818 } 1819 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1820 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); 1821 } 1822 1823 void Assembler::divss(XMMRegister dst, XMMRegister src) { 1824 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1825 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); 1826 } 1827 1828 void Assembler::emms() { 1829 NOT_LP64(assert(VM_Version::supports_mmx(), "")); 1830 emit_int8(0x0F); 1831 emit_int8(0x77); 1832 } 1833 1834 void Assembler::hlt() { 1835 emit_int8((unsigned char)0xF4); 1836 } 1837 1838 void Assembler::idivl(Register src) { 1839 int encode = prefix_and_encode(src->encoding()); 1840 emit_int8((unsigned char)0xF7); 1841 emit_int8((unsigned char)(0xF8 | encode)); 1842 } 1843 1844 void Assembler::divl(Register src) { // Unsigned 1845 int encode = prefix_and_encode(src->encoding()); 1846 emit_int8((unsigned char)0xF7); 1847 emit_int8((unsigned char)(0xF0 | encode)); 1848 } 1849 1850 void Assembler::imull(Register dst, Register src) { 1851 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1852 emit_int8(0x0F); 1853 emit_int8((unsigned char)0xAF); 1854 emit_int8((unsigned char)(0xC0 | encode)); 1855 } 1856 1857 1858 void Assembler::imull(Register dst, Register src, int value) { 1859 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1860 if (is8bit(value)) { 1861 emit_int8(0x6B); 1862 emit_int8((unsigned char)(0xC0 | encode)); 1863 emit_int8(value & 0xFF); 1864 } else { 1865 emit_int8(0x69); 1866 emit_int8((unsigned char)(0xC0 | encode)); 1867 emit_int32(value); 1868 } 1869 } 1870 1871 void Assembler::imull(Register dst, Address src) { 1872 InstructionMark im(this); 1873 prefix(src, dst); 1874 emit_int8(0x0F); 1875 emit_int8((unsigned char) 0xAF); 1876 emit_operand(dst, src); 1877 } 1878 1879 1880 void Assembler::incl(Address dst) { 1881 // Don't use it directly. Use MacroAssembler::increment() instead. 1882 InstructionMark im(this); 1883 prefix(dst); 1884 emit_int8((unsigned char)0xFF); 1885 emit_operand(rax, dst); 1886 } 1887 1888 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) { 1889 InstructionMark im(this); 1890 assert((0 <= cc) && (cc < 16), "illegal cc"); 1891 if (L.is_bound()) { 1892 address dst = target(L); 1893 assert(dst != NULL, "jcc most probably wrong"); 1894 1895 const int short_size = 2; 1896 const int long_size = 6; 1897 intptr_t offs = (intptr_t)dst - (intptr_t)pc(); 1898 if (maybe_short && is8bit(offs - short_size)) { 1899 // 0111 tttn #8-bit disp 1900 emit_int8(0x70 | cc); 1901 emit_int8((offs - short_size) & 0xFF); 1902 } else { 1903 // 0000 1111 1000 tttn #32-bit disp 1904 assert(is_simm32(offs - long_size), 1905 "must be 32bit offset (call4)"); 1906 emit_int8(0x0F); 1907 emit_int8((unsigned char)(0x80 | cc)); 1908 emit_int32(offs - long_size); 1909 } 1910 } else { 1911 // Note: could eliminate cond. jumps to this jump if condition 1912 // is the same however, seems to be rather unlikely case. 1913 // Note: use jccb() if label to be bound is very close to get 1914 // an 8-bit displacement 1915 L.add_patch_at(code(), locator()); 1916 emit_int8(0x0F); 1917 emit_int8((unsigned char)(0x80 | cc)); 1918 emit_int32(0); 1919 } 1920 } 1921 1922 void Assembler::jccb(Condition cc, Label& L) { 1923 if (L.is_bound()) { 1924 const int short_size = 2; 1925 address entry = target(L); 1926 #ifdef ASSERT 1927 intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size); 1928 intptr_t delta = short_branch_delta(); 1929 if (delta != 0) { 1930 dist += (dist < 0 ? (-delta) :delta); 1931 } 1932 assert(is8bit(dist), "Dispacement too large for a short jmp"); 1933 #endif 1934 intptr_t offs = (intptr_t)entry - (intptr_t)pc(); 1935 // 0111 tttn #8-bit disp 1936 emit_int8(0x70 | cc); 1937 emit_int8((offs - short_size) & 0xFF); 1938 } else { 1939 InstructionMark im(this); 1940 L.add_patch_at(code(), locator()); 1941 emit_int8(0x70 | cc); 1942 emit_int8(0); 1943 } 1944 } 1945 1946 void Assembler::jmp(Address adr) { 1947 InstructionMark im(this); 1948 prefix(adr); 1949 emit_int8((unsigned char)0xFF); 1950 emit_operand(rsp, adr); 1951 } 1952 1953 void Assembler::jmp(Label& L, bool maybe_short) { 1954 if (L.is_bound()) { 1955 address entry = target(L); 1956 assert(entry != NULL, "jmp most probably wrong"); 1957 InstructionMark im(this); 1958 const int short_size = 2; 1959 const int long_size = 5; 1960 intptr_t offs = entry - pc(); 1961 if (maybe_short && is8bit(offs - short_size)) { 1962 emit_int8((unsigned char)0xEB); 1963 emit_int8((offs - short_size) & 0xFF); 1964 } else { 1965 emit_int8((unsigned char)0xE9); 1966 emit_int32(offs - long_size); 1967 } 1968 } else { 1969 // By default, forward jumps are always 32-bit displacements, since 1970 // we can't yet know where the label will be bound. If you're sure that 1971 // the forward jump will not run beyond 256 bytes, use jmpb to 1972 // force an 8-bit displacement. 1973 InstructionMark im(this); 1974 L.add_patch_at(code(), locator()); 1975 emit_int8((unsigned char)0xE9); 1976 emit_int32(0); 1977 } 1978 } 1979 1980 void Assembler::jmp(Register entry) { 1981 int encode = prefix_and_encode(entry->encoding()); 1982 emit_int8((unsigned char)0xFF); 1983 emit_int8((unsigned char)(0xE0 | encode)); 1984 } 1985 1986 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { 1987 InstructionMark im(this); 1988 emit_int8((unsigned char)0xE9); 1989 assert(dest != NULL, "must have a target"); 1990 intptr_t disp = dest - (pc() + sizeof(int32_t)); 1991 assert(is_simm32(disp), "must be 32bit offset (jmp)"); 1992 emit_data(disp, rspec.reloc(), call32_operand); 1993 } 1994 1995 void Assembler::jmpb(Label& L) { 1996 if (L.is_bound()) { 1997 const int short_size = 2; 1998 address entry = target(L); 1999 assert(entry != NULL, "jmp most probably wrong"); 2000 #ifdef ASSERT 2001 intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size); 2002 intptr_t delta = short_branch_delta(); 2003 if (delta != 0) { 2004 dist += (dist < 0 ? (-delta) :delta); 2005 } 2006 assert(is8bit(dist), "Dispacement too large for a short jmp"); 2007 #endif 2008 intptr_t offs = entry - pc(); 2009 emit_int8((unsigned char)0xEB); 2010 emit_int8((offs - short_size) & 0xFF); 2011 } else { 2012 InstructionMark im(this); 2013 L.add_patch_at(code(), locator()); 2014 emit_int8((unsigned char)0xEB); 2015 emit_int8(0); 2016 } 2017 } 2018 2019 void Assembler::ldmxcsr( Address src) { 2020 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2021 InstructionMark im(this); 2022 prefix(src); 2023 emit_int8(0x0F); 2024 emit_int8((unsigned char)0xAE); 2025 emit_operand(as_Register(2), src); 2026 } 2027 2028 void Assembler::leal(Register dst, Address src) { 2029 InstructionMark im(this); 2030 #ifdef _LP64 2031 emit_int8(0x67); // addr32 2032 prefix(src, dst); 2033 #endif // LP64 2034 emit_int8((unsigned char)0x8D); 2035 emit_operand(dst, src); 2036 } 2037 2038 void Assembler::lfence() { 2039 emit_int8(0x0F); 2040 emit_int8((unsigned char)0xAE); 2041 emit_int8((unsigned char)0xE8); 2042 } 2043 2044 void Assembler::lock() { 2045 emit_int8((unsigned char)0xF0); 2046 } 2047 2048 void Assembler::lzcntl(Register dst, Register src) { 2049 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 2050 emit_int8((unsigned char)0xF3); 2051 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2052 emit_int8(0x0F); 2053 emit_int8((unsigned char)0xBD); 2054 emit_int8((unsigned char)(0xC0 | encode)); 2055 } 2056 2057 // Emit mfence instruction 2058 void Assembler::mfence() { 2059 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) 2060 emit_int8(0x0F); 2061 emit_int8((unsigned char)0xAE); 2062 emit_int8((unsigned char)0xF0); 2063 } 2064 2065 void Assembler::mov(Register dst, Register src) { 2066 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 2067 } 2068 2069 void Assembler::movapd(XMMRegister dst, XMMRegister src) { 2070 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2071 if (VM_Version::supports_avx512novl()) { 2072 int vector_len = AVX_512bit; 2073 int dst_enc = dst->encoding(); 2074 int src_enc = src->encoding(); 2075 int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F, 2076 /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); 2077 emit_int8(0x28); 2078 emit_int8((unsigned char)(0xC0 | encode)); 2079 } else if (VM_Version::supports_evex()) { 2080 emit_simd_arith_nonds_q(0x28, dst, src, VEX_SIMD_66); 2081 } else { 2082 emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66); 2083 } 2084 } 2085 2086 void Assembler::movaps(XMMRegister dst, XMMRegister src) { 2087 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2088 if (VM_Version::supports_avx512novl()) { 2089 int vector_len = AVX_512bit; 2090 int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, vector_len); 2091 emit_int8(0x28); 2092 emit_int8((unsigned char)(0xC0 | encode)); 2093 } else { 2094 emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE); 2095 } 2096 } 2097 2098 void Assembler::movlhps(XMMRegister dst, XMMRegister src) { 2099 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2100 int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, /* no_mask_reg */ true); 2101 emit_int8(0x16); 2102 emit_int8((unsigned char)(0xC0 | encode)); 2103 } 2104 2105 void Assembler::movb(Register dst, Address src) { 2106 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 2107 InstructionMark im(this); 2108 prefix(src, dst, true); 2109 emit_int8((unsigned char)0x8A); 2110 emit_operand(dst, src); 2111 } 2112 2113 void Assembler::kmovql(KRegister dst, KRegister src) { 2114 NOT_LP64(assert(VM_Version::supports_evex(), "")); 2115 int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, 2116 /* no_mask_reg */ true, VEX_OPCODE_0F, /* rex_w */ true); 2117 emit_int8((unsigned char)0x90); 2118 emit_int8((unsigned char)(0xC0 | encode)); 2119 } 2120 2121 void Assembler::kmovql(KRegister dst, Address src) { 2122 NOT_LP64(assert(VM_Version::supports_evex(), "")); 2123 int dst_enc = dst->encoding(); 2124 int nds_enc = 0; 2125 vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_NONE, 2126 VEX_OPCODE_0F, /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true); 2127 emit_int8((unsigned char)0x90); 2128 emit_operand((Register)dst, src); 2129 } 2130 2131 void Assembler::kmovql(Address dst, KRegister src) { 2132 NOT_LP64(assert(VM_Version::supports_evex(), "")); 2133 int src_enc = src->encoding(); 2134 int nds_enc = 0; 2135 vex_prefix(dst, nds_enc, src_enc, VEX_SIMD_NONE, 2136 VEX_OPCODE_0F, /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true); 2137 emit_int8((unsigned char)0x90); 2138 emit_operand((Register)src, dst); 2139 } 2140 2141 void Assembler::kmovql(KRegister dst, Register src) { 2142 NOT_LP64(assert(VM_Version::supports_evex(), "")); 2143 VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE; 2144 int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true, 2145 VEX_OPCODE_0F, /* legacy_mode */ !_legacy_mode_bw); 2146 emit_int8((unsigned char)0x92); 2147 emit_int8((unsigned char)(0xC0 | encode)); 2148 } 2149 2150 void Assembler::kmovdl(KRegister dst, Register src) { 2151 NOT_LP64(assert(VM_Version::supports_evex(), "")); 2152 VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE; 2153 int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true); 2154 emit_int8((unsigned char)0x92); 2155 emit_int8((unsigned char)(0xC0 | encode)); 2156 } 2157 2158 void Assembler::kmovwl(KRegister dst, Register src) { 2159 NOT_LP64(assert(VM_Version::supports_evex(), "")); 2160 int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, /* no_mask_reg */ true); 2161 emit_int8((unsigned char)0x92); 2162 emit_int8((unsigned char)(0xC0 | encode)); 2163 } 2164 2165 void Assembler::movb(Address dst, int imm8) { 2166 InstructionMark im(this); 2167 prefix(dst); 2168 emit_int8((unsigned char)0xC6); 2169 emit_operand(rax, dst, 1); 2170 emit_int8(imm8); 2171 } 2172 2173 2174 void Assembler::movb(Address dst, Register src) { 2175 assert(src->has_byte_register(), "must have byte register"); 2176 InstructionMark im(this); 2177 prefix(dst, src, true); 2178 emit_int8((unsigned char)0x88); 2179 emit_operand(src, dst); 2180 } 2181 2182 void Assembler::movdl(XMMRegister dst, Register src) { 2183 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2184 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, /* no_mask_reg */ true); 2185 emit_int8(0x6E); 2186 emit_int8((unsigned char)(0xC0 | encode)); 2187 } 2188 2189 void Assembler::movdl(Register dst, XMMRegister src) { 2190 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2191 // swap src/dst to get correct prefix 2192 int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66, /* no_mask_reg */ true); 2193 emit_int8(0x7E); 2194 emit_int8((unsigned char)(0xC0 | encode)); 2195 } 2196 2197 void Assembler::movdl(XMMRegister dst, Address src) { 2198 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2199 if (VM_Version::supports_evex()) { 2200 _tuple_type = EVEX_T1S; 2201 _input_size_in_bits = EVEX_32bit; 2202 } 2203 InstructionMark im(this); 2204 simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true); 2205 emit_int8(0x6E); 2206 emit_operand(dst, src); 2207 } 2208 2209 void Assembler::movdl(Address dst, XMMRegister src) { 2210 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2211 if (VM_Version::supports_evex()) { 2212 _tuple_type = EVEX_T1S; 2213 _input_size_in_bits = EVEX_32bit; 2214 } 2215 InstructionMark im(this); 2216 simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true); 2217 emit_int8(0x7E); 2218 emit_operand(src, dst); 2219 } 2220 2221 void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 2222 _instruction_uses_vl = true; 2223 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2224 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66); 2225 } 2226 2227 void Assembler::movdqa(XMMRegister dst, Address src) { 2228 _instruction_uses_vl = true; 2229 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2230 if (VM_Version::supports_evex()) { 2231 _tuple_type = EVEX_FVM; 2232 } 2233 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66); 2234 } 2235 2236 void Assembler::movdqu(XMMRegister dst, Address src) { 2237 _instruction_uses_vl = true; 2238 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2239 if (VM_Version::supports_evex()) { 2240 _tuple_type = EVEX_FVM; 2241 } 2242 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); 2243 } 2244 2245 void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 2246 _instruction_uses_vl = true; 2247 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2248 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); 2249 } 2250 2251 void Assembler::movdqu(Address dst, XMMRegister src) { 2252 _instruction_uses_vl = true; 2253 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2254 if (VM_Version::supports_evex()) { 2255 _tuple_type = EVEX_FVM; 2256 } 2257 InstructionMark im(this); 2258 simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false); 2259 emit_int8(0x7F); 2260 emit_operand(src, dst); 2261 } 2262 2263 // Move Unaligned 256bit Vector 2264 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) { 2265 _instruction_uses_vl = true; 2266 assert(UseAVX > 0, ""); 2267 int vector_len = AVX_256bit; 2268 int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector_len); 2269 emit_int8(0x6F); 2270 emit_int8((unsigned char)(0xC0 | encode)); 2271 } 2272 2273 void Assembler::vmovdqu(XMMRegister dst, Address src) { 2274 _instruction_uses_vl = true; 2275 assert(UseAVX > 0, ""); 2276 if (VM_Version::supports_evex()) { 2277 _tuple_type = EVEX_FVM; 2278 } 2279 InstructionMark im(this); 2280 int vector_len = AVX_256bit; 2281 vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len); 2282 emit_int8(0x6F); 2283 emit_operand(dst, src); 2284 } 2285 2286 void Assembler::vmovdqu(Address dst, XMMRegister src) { 2287 _instruction_uses_vl = true; 2288 assert(UseAVX > 0, ""); 2289 if (VM_Version::supports_evex()) { 2290 _tuple_type = EVEX_FVM; 2291 } 2292 InstructionMark im(this); 2293 int vector_len = AVX_256bit; 2294 // swap src<->dst for encoding 2295 assert(src != xnoreg, "sanity"); 2296 vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len); 2297 emit_int8(0x7F); 2298 emit_operand(src, dst); 2299 } 2300 2301 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64) 2302 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) { 2303 _instruction_uses_vl = true; 2304 assert(UseAVX > 0, ""); 2305 int src_enc = src->encoding(); 2306 int dst_enc = dst->encoding(); 2307 int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F, 2308 /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); 2309 emit_int8(0x6F); 2310 emit_int8((unsigned char)(0xC0 | encode)); 2311 } 2312 2313 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) { 2314 _instruction_uses_vl = true; 2315 assert(UseAVX > 0, ""); 2316 InstructionMark im(this); 2317 if (VM_Version::supports_evex()) { 2318 _tuple_type = EVEX_FVM; 2319 } 2320 vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len); 2321 emit_int8(0x6F); 2322 emit_operand(dst, src); 2323 } 2324 2325 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) { 2326 _instruction_uses_vl = true; 2327 assert(UseAVX > 0, ""); 2328 InstructionMark im(this); 2329 assert(src != xnoreg, "sanity"); 2330 if (VM_Version::supports_evex()) { 2331 _tuple_type = EVEX_FVM; 2332 } 2333 // swap src<->dst for encoding 2334 vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len); 2335 emit_int8(0x7F); 2336 emit_operand(src, dst); 2337 } 2338 2339 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { 2340 _instruction_uses_vl = true; 2341 assert(UseAVX > 0, ""); 2342 int src_enc = src->encoding(); 2343 int dst_enc = dst->encoding(); 2344 int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F, 2345 /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); 2346 emit_int8(0x6F); 2347 emit_int8((unsigned char)(0xC0 | encode)); 2348 } 2349 2350 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) { 2351 _instruction_uses_vl = true; 2352 assert(UseAVX > 2, ""); 2353 InstructionMark im(this); 2354 _tuple_type = EVEX_FVM; 2355 vex_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, vector_len); 2356 emit_int8(0x6F); 2357 emit_operand(dst, src); 2358 } 2359 2360 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) { 2361 _instruction_uses_vl = true; 2362 assert(UseAVX > 2, ""); 2363 InstructionMark im(this); 2364 assert(src != xnoreg, "sanity"); 2365 _tuple_type = EVEX_FVM; 2366 // swap src<->dst for encoding 2367 vex_prefix_q(src, xnoreg, dst, VEX_SIMD_F3, vector_len); 2368 emit_int8(0x7F); 2369 emit_operand(src, dst); 2370 } 2371 2372 // Uses zero extension on 64bit 2373 2374 void Assembler::movl(Register dst, int32_t imm32) { 2375 int encode = prefix_and_encode(dst->encoding()); 2376 emit_int8((unsigned char)(0xB8 | encode)); 2377 emit_int32(imm32); 2378 } 2379 2380 void Assembler::movl(Register dst, Register src) { 2381 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2382 emit_int8((unsigned char)0x8B); 2383 emit_int8((unsigned char)(0xC0 | encode)); 2384 } 2385 2386 void Assembler::movl(Register dst, Address src) { 2387 InstructionMark im(this); 2388 prefix(src, dst); 2389 emit_int8((unsigned char)0x8B); 2390 emit_operand(dst, src); 2391 } 2392 2393 void Assembler::movl(Address dst, int32_t imm32) { 2394 InstructionMark im(this); 2395 prefix(dst); 2396 emit_int8((unsigned char)0xC7); 2397 emit_operand(rax, dst, 4); 2398 emit_int32(imm32); 2399 } 2400 2401 void Assembler::movl(Address dst, Register src) { 2402 InstructionMark im(this); 2403 prefix(dst, src); 2404 emit_int8((unsigned char)0x89); 2405 emit_operand(src, dst); 2406 } 2407 2408 // New cpus require to use movsd and movss to avoid partial register stall 2409 // when loading from memory. But for old Opteron use movlpd instead of movsd. 2410 // The selection is done in MacroAssembler::movdbl() and movflt(). 2411 void Assembler::movlpd(XMMRegister dst, Address src) { 2412 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2413 if (VM_Version::supports_evex()) { 2414 _tuple_type = EVEX_T1S; 2415 _input_size_in_bits = EVEX_32bit; 2416 emit_simd_arith_q(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); 2417 } else { 2418 emit_simd_arith(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); 2419 } 2420 } 2421 2422 void Assembler::movq( MMXRegister dst, Address src ) { 2423 assert( VM_Version::supports_mmx(), "" ); 2424 emit_int8(0x0F); 2425 emit_int8(0x6F); 2426 emit_operand(dst, src); 2427 } 2428 2429 void Assembler::movq( Address dst, MMXRegister src ) { 2430 assert( VM_Version::supports_mmx(), "" ); 2431 emit_int8(0x0F); 2432 emit_int8(0x7F); 2433 // workaround gcc (3.2.1-7a) bug 2434 // In that version of gcc with only an emit_operand(MMX, Address) 2435 // gcc will tail jump and try and reverse the parameters completely 2436 // obliterating dst in the process. By having a version available 2437 // that doesn't need to swap the args at the tail jump the bug is 2438 // avoided. 2439 emit_operand(dst, src); 2440 } 2441 2442 void Assembler::movq(XMMRegister dst, Address src) { 2443 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2444 InstructionMark im(this); 2445 if (VM_Version::supports_evex()) { 2446 _tuple_type = EVEX_T1S; 2447 _input_size_in_bits = EVEX_64bit; 2448 simd_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, /* no_mask_reg */ true); 2449 } else { 2450 simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); 2451 } 2452 emit_int8(0x7E); 2453 emit_operand(dst, src); 2454 } 2455 2456 void Assembler::movq(Address dst, XMMRegister src) { 2457 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2458 InstructionMark im(this); 2459 if (VM_Version::supports_evex()) { 2460 _tuple_type = EVEX_T1S; 2461 _input_size_in_bits = EVEX_64bit; 2462 simd_prefix(src, xnoreg, dst, VEX_SIMD_66, /* no_mask_reg */ true, 2463 VEX_OPCODE_0F, /* rex_w */ true); 2464 } else { 2465 simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ true); 2466 } 2467 emit_int8((unsigned char)0xD6); 2468 emit_operand(src, dst); 2469 } 2470 2471 void Assembler::movsbl(Register dst, Address src) { // movsxb 2472 InstructionMark im(this); 2473 prefix(src, dst); 2474 emit_int8(0x0F); 2475 emit_int8((unsigned char)0xBE); 2476 emit_operand(dst, src); 2477 } 2478 2479 void Assembler::movsbl(Register dst, Register src) { // movsxb 2480 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 2481 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 2482 emit_int8(0x0F); 2483 emit_int8((unsigned char)0xBE); 2484 emit_int8((unsigned char)(0xC0 | encode)); 2485 } 2486 2487 void Assembler::movsd(XMMRegister dst, XMMRegister src) { 2488 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2489 if (VM_Version::supports_evex()) { 2490 emit_simd_arith_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true); 2491 } else { 2492 emit_simd_arith(0x10, dst, src, VEX_SIMD_F2); 2493 } 2494 } 2495 2496 void Assembler::movsd(XMMRegister dst, Address src) { 2497 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2498 if (VM_Version::supports_evex()) { 2499 _tuple_type = EVEX_T1S; 2500 _input_size_in_bits = EVEX_64bit; 2501 emit_simd_arith_nonds_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true); 2502 } else { 2503 emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2); 2504 } 2505 } 2506 2507 void Assembler::movsd(Address dst, XMMRegister src) { 2508 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2509 InstructionMark im(this); 2510 if (VM_Version::supports_evex()) { 2511 _tuple_type = EVEX_T1S; 2512 _input_size_in_bits = EVEX_64bit; 2513 simd_prefix_q(src, xnoreg, dst, VEX_SIMD_F2); 2514 } else { 2515 simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, /* no_mask_reg */ false); 2516 } 2517 emit_int8(0x11); 2518 emit_operand(src, dst); 2519 } 2520 2521 void Assembler::movss(XMMRegister dst, XMMRegister src) { 2522 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2523 emit_simd_arith(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); 2524 } 2525 2526 void Assembler::movss(XMMRegister dst, Address src) { 2527 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2528 if (VM_Version::supports_evex()) { 2529 _tuple_type = EVEX_T1S; 2530 _input_size_in_bits = EVEX_32bit; 2531 } 2532 emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); 2533 } 2534 2535 void Assembler::movss(Address dst, XMMRegister src) { 2536 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2537 if (VM_Version::supports_evex()) { 2538 _tuple_type = EVEX_T1S; 2539 _input_size_in_bits = EVEX_32bit; 2540 } 2541 InstructionMark im(this); 2542 simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false); 2543 emit_int8(0x11); 2544 emit_operand(src, dst); 2545 } 2546 2547 void Assembler::movswl(Register dst, Address src) { // movsxw 2548 InstructionMark im(this); 2549 prefix(src, dst); 2550 emit_int8(0x0F); 2551 emit_int8((unsigned char)0xBF); 2552 emit_operand(dst, src); 2553 } 2554 2555 void Assembler::movswl(Register dst, Register src) { // movsxw 2556 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2557 emit_int8(0x0F); 2558 emit_int8((unsigned char)0xBF); 2559 emit_int8((unsigned char)(0xC0 | encode)); 2560 } 2561 2562 void Assembler::movw(Address dst, int imm16) { 2563 InstructionMark im(this); 2564 2565 emit_int8(0x66); // switch to 16-bit mode 2566 prefix(dst); 2567 emit_int8((unsigned char)0xC7); 2568 emit_operand(rax, dst, 2); 2569 emit_int16(imm16); 2570 } 2571 2572 void Assembler::movw(Register dst, Address src) { 2573 InstructionMark im(this); 2574 emit_int8(0x66); 2575 prefix(src, dst); 2576 emit_int8((unsigned char)0x8B); 2577 emit_operand(dst, src); 2578 } 2579 2580 void Assembler::movw(Address dst, Register src) { 2581 InstructionMark im(this); 2582 emit_int8(0x66); 2583 prefix(dst, src); 2584 emit_int8((unsigned char)0x89); 2585 emit_operand(src, dst); 2586 } 2587 2588 void Assembler::movzbl(Register dst, Address src) { // movzxb 2589 InstructionMark im(this); 2590 prefix(src, dst); 2591 emit_int8(0x0F); 2592 emit_int8((unsigned char)0xB6); 2593 emit_operand(dst, src); 2594 } 2595 2596 void Assembler::movzbl(Register dst, Register src) { // movzxb 2597 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 2598 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 2599 emit_int8(0x0F); 2600 emit_int8((unsigned char)0xB6); 2601 emit_int8(0xC0 | encode); 2602 } 2603 2604 void Assembler::movzwl(Register dst, Address src) { // movzxw 2605 InstructionMark im(this); 2606 prefix(src, dst); 2607 emit_int8(0x0F); 2608 emit_int8((unsigned char)0xB7); 2609 emit_operand(dst, src); 2610 } 2611 2612 void Assembler::movzwl(Register dst, Register src) { // movzxw 2613 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2614 emit_int8(0x0F); 2615 emit_int8((unsigned char)0xB7); 2616 emit_int8(0xC0 | encode); 2617 } 2618 2619 void Assembler::mull(Address src) { 2620 InstructionMark im(this); 2621 prefix(src); 2622 emit_int8((unsigned char)0xF7); 2623 emit_operand(rsp, src); 2624 } 2625 2626 void Assembler::mull(Register src) { 2627 int encode = prefix_and_encode(src->encoding()); 2628 emit_int8((unsigned char)0xF7); 2629 emit_int8((unsigned char)(0xE0 | encode)); 2630 } 2631 2632 void Assembler::mulsd(XMMRegister dst, Address src) { 2633 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2634 if (VM_Version::supports_evex()) { 2635 _tuple_type = EVEX_T1S; 2636 _input_size_in_bits = EVEX_64bit; 2637 emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2); 2638 } else { 2639 emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); 2640 } 2641 } 2642 2643 void Assembler::mulsd(XMMRegister dst, XMMRegister src) { 2644 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2645 if (VM_Version::supports_evex()) { 2646 emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2); 2647 } else { 2648 emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); 2649 } 2650 } 2651 2652 void Assembler::mulss(XMMRegister dst, Address src) { 2653 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2654 if (VM_Version::supports_evex()) { 2655 _tuple_type = EVEX_T1S; 2656 _input_size_in_bits = EVEX_32bit; 2657 } 2658 emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); 2659 } 2660 2661 void Assembler::mulss(XMMRegister dst, XMMRegister src) { 2662 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2663 emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); 2664 } 2665 2666 void Assembler::negl(Register dst) { 2667 int encode = prefix_and_encode(dst->encoding()); 2668 emit_int8((unsigned char)0xF7); 2669 emit_int8((unsigned char)(0xD8 | encode)); 2670 } 2671 2672 void Assembler::nop(int i) { 2673 #ifdef ASSERT 2674 assert(i > 0, " "); 2675 // The fancy nops aren't currently recognized by debuggers making it a 2676 // pain to disassemble code while debugging. If asserts are on clearly 2677 // speed is not an issue so simply use the single byte traditional nop 2678 // to do alignment. 2679 2680 for (; i > 0 ; i--) emit_int8((unsigned char)0x90); 2681 return; 2682 2683 #endif // ASSERT 2684 2685 if (UseAddressNop && VM_Version::is_intel()) { 2686 // 2687 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 2688 // 1: 0x90 2689 // 2: 0x66 0x90 2690 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2691 // 4: 0x0F 0x1F 0x40 0x00 2692 // 5: 0x0F 0x1F 0x44 0x00 0x00 2693 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2694 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2695 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2696 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2697 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2698 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2699 2700 // The rest coding is Intel specific - don't use consecutive address nops 2701 2702 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2703 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2704 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2705 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 2706 2707 while(i >= 15) { 2708 // For Intel don't generate consecutive addess nops (mix with regular nops) 2709 i -= 15; 2710 emit_int8(0x66); // size prefix 2711 emit_int8(0x66); // size prefix 2712 emit_int8(0x66); // size prefix 2713 addr_nop_8(); 2714 emit_int8(0x66); // size prefix 2715 emit_int8(0x66); // size prefix 2716 emit_int8(0x66); // size prefix 2717 emit_int8((unsigned char)0x90); 2718 // nop 2719 } 2720 switch (i) { 2721 case 14: 2722 emit_int8(0x66); // size prefix 2723 case 13: 2724 emit_int8(0x66); // size prefix 2725 case 12: 2726 addr_nop_8(); 2727 emit_int8(0x66); // size prefix 2728 emit_int8(0x66); // size prefix 2729 emit_int8(0x66); // size prefix 2730 emit_int8((unsigned char)0x90); 2731 // nop 2732 break; 2733 case 11: 2734 emit_int8(0x66); // size prefix 2735 case 10: 2736 emit_int8(0x66); // size prefix 2737 case 9: 2738 emit_int8(0x66); // size prefix 2739 case 8: 2740 addr_nop_8(); 2741 break; 2742 case 7: 2743 addr_nop_7(); 2744 break; 2745 case 6: 2746 emit_int8(0x66); // size prefix 2747 case 5: 2748 addr_nop_5(); 2749 break; 2750 case 4: 2751 addr_nop_4(); 2752 break; 2753 case 3: 2754 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2755 emit_int8(0x66); // size prefix 2756 case 2: 2757 emit_int8(0x66); // size prefix 2758 case 1: 2759 emit_int8((unsigned char)0x90); 2760 // nop 2761 break; 2762 default: 2763 assert(i == 0, " "); 2764 } 2765 return; 2766 } 2767 if (UseAddressNop && VM_Version::is_amd()) { 2768 // 2769 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD. 2770 // 1: 0x90 2771 // 2: 0x66 0x90 2772 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2773 // 4: 0x0F 0x1F 0x40 0x00 2774 // 5: 0x0F 0x1F 0x44 0x00 0x00 2775 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2776 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2777 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2778 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2779 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2780 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2781 2782 // The rest coding is AMD specific - use consecutive address nops 2783 2784 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2785 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2786 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2787 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2788 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2789 // Size prefixes (0x66) are added for larger sizes 2790 2791 while(i >= 22) { 2792 i -= 11; 2793 emit_int8(0x66); // size prefix 2794 emit_int8(0x66); // size prefix 2795 emit_int8(0x66); // size prefix 2796 addr_nop_8(); 2797 } 2798 // Generate first nop for size between 21-12 2799 switch (i) { 2800 case 21: 2801 i -= 1; 2802 emit_int8(0x66); // size prefix 2803 case 20: 2804 case 19: 2805 i -= 1; 2806 emit_int8(0x66); // size prefix 2807 case 18: 2808 case 17: 2809 i -= 1; 2810 emit_int8(0x66); // size prefix 2811 case 16: 2812 case 15: 2813 i -= 8; 2814 addr_nop_8(); 2815 break; 2816 case 14: 2817 case 13: 2818 i -= 7; 2819 addr_nop_7(); 2820 break; 2821 case 12: 2822 i -= 6; 2823 emit_int8(0x66); // size prefix 2824 addr_nop_5(); 2825 break; 2826 default: 2827 assert(i < 12, " "); 2828 } 2829 2830 // Generate second nop for size between 11-1 2831 switch (i) { 2832 case 11: 2833 emit_int8(0x66); // size prefix 2834 case 10: 2835 emit_int8(0x66); // size prefix 2836 case 9: 2837 emit_int8(0x66); // size prefix 2838 case 8: 2839 addr_nop_8(); 2840 break; 2841 case 7: 2842 addr_nop_7(); 2843 break; 2844 case 6: 2845 emit_int8(0x66); // size prefix 2846 case 5: 2847 addr_nop_5(); 2848 break; 2849 case 4: 2850 addr_nop_4(); 2851 break; 2852 case 3: 2853 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2854 emit_int8(0x66); // size prefix 2855 case 2: 2856 emit_int8(0x66); // size prefix 2857 case 1: 2858 emit_int8((unsigned char)0x90); 2859 // nop 2860 break; 2861 default: 2862 assert(i == 0, " "); 2863 } 2864 return; 2865 } 2866 2867 // Using nops with size prefixes "0x66 0x90". 2868 // From AMD Optimization Guide: 2869 // 1: 0x90 2870 // 2: 0x66 0x90 2871 // 3: 0x66 0x66 0x90 2872 // 4: 0x66 0x66 0x66 0x90 2873 // 5: 0x66 0x66 0x90 0x66 0x90 2874 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2875 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2876 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2877 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2878 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2879 // 2880 while(i > 12) { 2881 i -= 4; 2882 emit_int8(0x66); // size prefix 2883 emit_int8(0x66); 2884 emit_int8(0x66); 2885 emit_int8((unsigned char)0x90); 2886 // nop 2887 } 2888 // 1 - 12 nops 2889 if(i > 8) { 2890 if(i > 9) { 2891 i -= 1; 2892 emit_int8(0x66); 2893 } 2894 i -= 3; 2895 emit_int8(0x66); 2896 emit_int8(0x66); 2897 emit_int8((unsigned char)0x90); 2898 } 2899 // 1 - 8 nops 2900 if(i > 4) { 2901 if(i > 6) { 2902 i -= 1; 2903 emit_int8(0x66); 2904 } 2905 i -= 3; 2906 emit_int8(0x66); 2907 emit_int8(0x66); 2908 emit_int8((unsigned char)0x90); 2909 } 2910 switch (i) { 2911 case 4: 2912 emit_int8(0x66); 2913 case 3: 2914 emit_int8(0x66); 2915 case 2: 2916 emit_int8(0x66); 2917 case 1: 2918 emit_int8((unsigned char)0x90); 2919 break; 2920 default: 2921 assert(i == 0, " "); 2922 } 2923 } 2924 2925 void Assembler::notl(Register dst) { 2926 int encode = prefix_and_encode(dst->encoding()); 2927 emit_int8((unsigned char)0xF7); 2928 emit_int8((unsigned char)(0xD0 | encode)); 2929 } 2930 2931 void Assembler::orl(Address dst, int32_t imm32) { 2932 InstructionMark im(this); 2933 prefix(dst); 2934 emit_arith_operand(0x81, rcx, dst, imm32); 2935 } 2936 2937 void Assembler::orl(Register dst, int32_t imm32) { 2938 prefix(dst); 2939 emit_arith(0x81, 0xC8, dst, imm32); 2940 } 2941 2942 void Assembler::orl(Register dst, Address src) { 2943 InstructionMark im(this); 2944 prefix(src, dst); 2945 emit_int8(0x0B); 2946 emit_operand(dst, src); 2947 } 2948 2949 void Assembler::orl(Register dst, Register src) { 2950 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2951 emit_arith(0x0B, 0xC0, dst, src); 2952 } 2953 2954 void Assembler::orl(Address dst, Register src) { 2955 InstructionMark im(this); 2956 prefix(dst, src); 2957 emit_int8(0x09); 2958 emit_operand(src, dst); 2959 } 2960 2961 void Assembler::packuswb(XMMRegister dst, Address src) { 2962 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2963 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2964 if (VM_Version::supports_evex()) { 2965 _tuple_type = EVEX_FV; 2966 _input_size_in_bits = EVEX_32bit; 2967 } 2968 emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 2969 } 2970 2971 void Assembler::packuswb(XMMRegister dst, XMMRegister src) { 2972 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2973 emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 2974 } 2975 2976 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 2977 assert(UseAVX > 0, "some form of AVX must be enabled"); 2978 emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 2979 } 2980 2981 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) { 2982 _instruction_uses_vl = true; 2983 assert(VM_Version::supports_avx2(), ""); 2984 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, 2985 VEX_OPCODE_0F_3A, /* rex_w */ true, vector_len); 2986 emit_int8(0x00); 2987 emit_int8(0xC0 | encode); 2988 emit_int8(imm8); 2989 } 2990 2991 void Assembler::pause() { 2992 emit_int8((unsigned char)0xF3); 2993 emit_int8((unsigned char)0x90); 2994 } 2995 2996 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { 2997 assert(VM_Version::supports_sse4_2(), ""); 2998 InstructionMark im(this); 2999 simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_3A, 3000 /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); 3001 emit_int8(0x61); 3002 emit_operand(dst, src); 3003 emit_int8(imm8); 3004 } 3005 3006 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { 3007 assert(VM_Version::supports_sse4_2(), ""); 3008 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, 3009 VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); 3010 emit_int8(0x61); 3011 emit_int8((unsigned char)(0xC0 | encode)); 3012 emit_int8(imm8); 3013 } 3014 3015 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) { 3016 assert(VM_Version::supports_sse4_1(), ""); 3017 int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true, 3018 VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq); 3019 emit_int8(0x16); 3020 emit_int8((unsigned char)(0xC0 | encode)); 3021 emit_int8(imm8); 3022 } 3023 3024 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) { 3025 assert(VM_Version::supports_sse4_1(), ""); 3026 int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true, 3027 VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq); 3028 emit_int8(0x16); 3029 emit_int8((unsigned char)(0xC0 | encode)); 3030 emit_int8(imm8); 3031 } 3032 3033 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) { 3034 assert(VM_Version::supports_sse4_1(), ""); 3035 int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true, 3036 VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq); 3037 emit_int8(0x22); 3038 emit_int8((unsigned char)(0xC0 | encode)); 3039 emit_int8(imm8); 3040 } 3041 3042 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) { 3043 assert(VM_Version::supports_sse4_1(), ""); 3044 int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true, 3045 VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq); 3046 emit_int8(0x22); 3047 emit_int8((unsigned char)(0xC0 | encode)); 3048 emit_int8(imm8); 3049 } 3050 3051 void Assembler::pmovzxbw(XMMRegister dst, Address src) { 3052 assert(VM_Version::supports_sse4_1(), ""); 3053 if (VM_Version::supports_evex()) { 3054 _tuple_type = EVEX_HVM; 3055 } 3056 InstructionMark im(this); 3057 simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38); 3058 emit_int8(0x30); 3059 emit_operand(dst, src); 3060 } 3061 3062 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { 3063 assert(VM_Version::supports_sse4_1(), ""); 3064 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38); 3065 emit_int8(0x30); 3066 emit_int8((unsigned char)(0xC0 | encode)); 3067 } 3068 3069 // generic 3070 void Assembler::pop(Register dst) { 3071 int encode = prefix_and_encode(dst->encoding()); 3072 emit_int8(0x58 | encode); 3073 } 3074 3075 void Assembler::popcntl(Register dst, Address src) { 3076 assert(VM_Version::supports_popcnt(), "must support"); 3077 InstructionMark im(this); 3078 emit_int8((unsigned char)0xF3); 3079 prefix(src, dst); 3080 emit_int8(0x0F); 3081 emit_int8((unsigned char)0xB8); 3082 emit_operand(dst, src); 3083 } 3084 3085 void Assembler::popcntl(Register dst, Register src) { 3086 assert(VM_Version::supports_popcnt(), "must support"); 3087 emit_int8((unsigned char)0xF3); 3088 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 3089 emit_int8(0x0F); 3090 emit_int8((unsigned char)0xB8); 3091 emit_int8((unsigned char)(0xC0 | encode)); 3092 } 3093 3094 void Assembler::popf() { 3095 emit_int8((unsigned char)0x9D); 3096 } 3097 3098 #ifndef _LP64 // no 32bit push/pop on amd64 3099 void Assembler::popl(Address dst) { 3100 // NOTE: this will adjust stack by 8byte on 64bits 3101 InstructionMark im(this); 3102 prefix(dst); 3103 emit_int8((unsigned char)0x8F); 3104 emit_operand(rax, dst); 3105 } 3106 #endif 3107 3108 void Assembler::prefetch_prefix(Address src) { 3109 prefix(src); 3110 emit_int8(0x0F); 3111 } 3112 3113 void Assembler::prefetchnta(Address src) { 3114 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 3115 InstructionMark im(this); 3116 prefetch_prefix(src); 3117 emit_int8(0x18); 3118 emit_operand(rax, src); // 0, src 3119 } 3120 3121 void Assembler::prefetchr(Address src) { 3122 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 3123 InstructionMark im(this); 3124 prefetch_prefix(src); 3125 emit_int8(0x0D); 3126 emit_operand(rax, src); // 0, src 3127 } 3128 3129 void Assembler::prefetcht0(Address src) { 3130 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 3131 InstructionMark im(this); 3132 prefetch_prefix(src); 3133 emit_int8(0x18); 3134 emit_operand(rcx, src); // 1, src 3135 } 3136 3137 void Assembler::prefetcht1(Address src) { 3138 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 3139 InstructionMark im(this); 3140 prefetch_prefix(src); 3141 emit_int8(0x18); 3142 emit_operand(rdx, src); // 2, src 3143 } 3144 3145 void Assembler::prefetcht2(Address src) { 3146 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 3147 InstructionMark im(this); 3148 prefetch_prefix(src); 3149 emit_int8(0x18); 3150 emit_operand(rbx, src); // 3, src 3151 } 3152 3153 void Assembler::prefetchw(Address src) { 3154 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 3155 InstructionMark im(this); 3156 prefetch_prefix(src); 3157 emit_int8(0x0D); 3158 emit_operand(rcx, src); // 1, src 3159 } 3160 3161 void Assembler::prefix(Prefix p) { 3162 emit_int8(p); 3163 } 3164 3165 void Assembler::pshufb(XMMRegister dst, XMMRegister src) { 3166 assert(VM_Version::supports_ssse3(), ""); 3167 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, 3168 VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); 3169 emit_int8(0x00); 3170 emit_int8((unsigned char)(0xC0 | encode)); 3171 } 3172 3173 void Assembler::pshufb(XMMRegister dst, Address src) { 3174 assert(VM_Version::supports_ssse3(), ""); 3175 if (VM_Version::supports_evex()) { 3176 _tuple_type = EVEX_FVM; 3177 } 3178 InstructionMark im(this); 3179 simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, 3180 VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); 3181 emit_int8(0x00); 3182 emit_operand(dst, src); 3183 } 3184 3185 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 3186 _instruction_uses_vl = true; 3187 assert(isByte(mode), "invalid value"); 3188 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3189 emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66); 3190 emit_int8(mode & 0xFF); 3191 } 3192 3193 void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 3194 _instruction_uses_vl = true; 3195 assert(isByte(mode), "invalid value"); 3196 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3197 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 3198 if (VM_Version::supports_evex()) { 3199 _tuple_type = EVEX_FV; 3200 _input_size_in_bits = EVEX_32bit; 3201 } 3202 InstructionMark im(this); 3203 simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false); 3204 emit_int8(0x70); 3205 emit_operand(dst, src); 3206 emit_int8(mode & 0xFF); 3207 } 3208 3209 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 3210 assert(isByte(mode), "invalid value"); 3211 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3212 emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 3213 emit_int8(mode & 0xFF); 3214 } 3215 3216 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 3217 assert(isByte(mode), "invalid value"); 3218 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3219 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 3220 if (VM_Version::supports_evex()) { 3221 _tuple_type = EVEX_FVM; 3222 } 3223 InstructionMark im(this); 3224 simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, /* no_mask_reg */ false, 3225 VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); 3226 emit_int8(0x70); 3227 emit_operand(dst, src); 3228 emit_int8(mode & 0xFF); 3229 } 3230 3231 void Assembler::psrldq(XMMRegister dst, int shift) { 3232 // Shift left 128 bit value in dst XMMRegister by shift number of bytes. 3233 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3234 // XMM3 is for /3 encoding: 66 0F 73 /3 ib 3235 int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true, 3236 VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); 3237 emit_int8(0x73); 3238 emit_int8((unsigned char)(0xC0 | encode)); 3239 emit_int8(shift); 3240 } 3241 3242 void Assembler::pslldq(XMMRegister dst, int shift) { 3243 // Shift left 128 bit value in dst XMMRegister by shift number of bytes. 3244 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3245 // XMM7 is for /7 encoding: 66 0F 73 /7 ib 3246 int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true, 3247 VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); 3248 emit_int8(0x73); 3249 emit_int8((unsigned char)(0xC0 | encode)); 3250 emit_int8(shift); 3251 } 3252 3253 void Assembler::ptest(XMMRegister dst, Address src) { 3254 assert(VM_Version::supports_sse4_1(), ""); 3255 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 3256 InstructionMark im(this); 3257 simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, 3258 VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); 3259 emit_int8(0x17); 3260 emit_operand(dst, src); 3261 } 3262 3263 void Assembler::ptest(XMMRegister dst, XMMRegister src) { 3264 assert(VM_Version::supports_sse4_1(), ""); 3265 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, 3266 VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); 3267 emit_int8(0x17); 3268 emit_int8((unsigned char)(0xC0 | encode)); 3269 } 3270 3271 void Assembler::vptest(XMMRegister dst, Address src) { 3272 assert(VM_Version::supports_avx(), ""); 3273 InstructionMark im(this); 3274 int vector_len = AVX_256bit; 3275 assert(dst != xnoreg, "sanity"); 3276 int dst_enc = dst->encoding(); 3277 // swap src<->dst for encoding 3278 vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* rex_w */ false, 3279 vector_len, /* legacy_mode */ true, /* no_mask_reg */ false); 3280 emit_int8(0x17); 3281 emit_operand(dst, src); 3282 } 3283 3284 void Assembler::vptest(XMMRegister dst, XMMRegister src) { 3285 assert(VM_Version::supports_avx(), ""); 3286 int vector_len = AVX_256bit; 3287 int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true); 3288 emit_int8(0x17); 3289 emit_int8((unsigned char)(0xC0 | encode)); 3290 } 3291 3292 void Assembler::punpcklbw(XMMRegister dst, Address src) { 3293 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3294 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 3295 if (VM_Version::supports_evex()) { 3296 _tuple_type = EVEX_FVM; 3297 } 3298 emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw); 3299 } 3300 3301 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 3302 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3303 emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw); 3304 } 3305 3306 void Assembler::punpckldq(XMMRegister dst, Address src) { 3307 _instruction_uses_vl = true; 3308 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3309 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 3310 if (VM_Version::supports_evex()) { 3311 _tuple_type = EVEX_FV; 3312 _input_size_in_bits = EVEX_32bit; 3313 } 3314 emit_simd_arith(0x62, dst, src, VEX_SIMD_66); 3315 } 3316 3317 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { 3318 _instruction_uses_vl = true; 3319 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3320 emit_simd_arith(0x62, dst, src, VEX_SIMD_66); 3321 } 3322 3323 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) { 3324 _instruction_uses_vl = true; 3325 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3326 if (VM_Version::supports_evex()) { 3327 emit_simd_arith_q(0x6C, dst, src, VEX_SIMD_66); 3328 } else { 3329 emit_simd_arith(0x6C, dst, src, VEX_SIMD_66); 3330 } 3331 } 3332 3333 void Assembler::push(int32_t imm32) { 3334 // in 64bits we push 64bits onto the stack but only 3335 // take a 32bit immediate 3336 emit_int8(0x68); 3337 emit_int32(imm32); 3338 } 3339 3340 void Assembler::push(Register src) { 3341 int encode = prefix_and_encode(src->encoding()); 3342 3343 emit_int8(0x50 | encode); 3344 } 3345 3346 void Assembler::pushf() { 3347 emit_int8((unsigned char)0x9C); 3348 } 3349 3350 #ifndef _LP64 // no 32bit push/pop on amd64 3351 void Assembler::pushl(Address src) { 3352 // Note this will push 64bit on 64bit 3353 InstructionMark im(this); 3354 prefix(src); 3355 emit_int8((unsigned char)0xFF); 3356 emit_operand(rsi, src); 3357 } 3358 #endif 3359 3360 void Assembler::rcll(Register dst, int imm8) { 3361 assert(isShiftCount(imm8), "illegal shift count"); 3362 int encode = prefix_and_encode(dst->encoding()); 3363 if (imm8 == 1) { 3364 emit_int8((unsigned char)0xD1); 3365 emit_int8((unsigned char)(0xD0 | encode)); 3366 } else { 3367 emit_int8((unsigned char)0xC1); 3368 emit_int8((unsigned char)0xD0 | encode); 3369 emit_int8(imm8); 3370 } 3371 } 3372 3373 void Assembler::rdtsc() { 3374 emit_int8((unsigned char)0x0F); 3375 emit_int8((unsigned char)0x31); 3376 } 3377 3378 // copies data from [esi] to [edi] using rcx pointer sized words 3379 // generic 3380 void Assembler::rep_mov() { 3381 emit_int8((unsigned char)0xF3); 3382 // MOVSQ 3383 LP64_ONLY(prefix(REX_W)); 3384 emit_int8((unsigned char)0xA5); 3385 } 3386 3387 // sets rcx bytes with rax, value at [edi] 3388 void Assembler::rep_stosb() { 3389 emit_int8((unsigned char)0xF3); // REP 3390 LP64_ONLY(prefix(REX_W)); 3391 emit_int8((unsigned char)0xAA); // STOSB 3392 } 3393 3394 // sets rcx pointer sized words with rax, value at [edi] 3395 // generic 3396 void Assembler::rep_stos() { 3397 emit_int8((unsigned char)0xF3); // REP 3398 LP64_ONLY(prefix(REX_W)); // LP64:STOSQ, LP32:STOSD 3399 emit_int8((unsigned char)0xAB); 3400 } 3401 3402 // scans rcx pointer sized words at [edi] for occurance of rax, 3403 // generic 3404 void Assembler::repne_scan() { // repne_scan 3405 emit_int8((unsigned char)0xF2); 3406 // SCASQ 3407 LP64_ONLY(prefix(REX_W)); 3408 emit_int8((unsigned char)0xAF); 3409 } 3410 3411 #ifdef _LP64 3412 // scans rcx 4 byte words at [edi] for occurance of rax, 3413 // generic 3414 void Assembler::repne_scanl() { // repne_scan 3415 emit_int8((unsigned char)0xF2); 3416 // SCASL 3417 emit_int8((unsigned char)0xAF); 3418 } 3419 #endif 3420 3421 void Assembler::ret(int imm16) { 3422 if (imm16 == 0) { 3423 emit_int8((unsigned char)0xC3); 3424 } else { 3425 emit_int8((unsigned char)0xC2); 3426 emit_int16(imm16); 3427 } 3428 } 3429 3430 void Assembler::sahf() { 3431 #ifdef _LP64 3432 // Not supported in 64bit mode 3433 ShouldNotReachHere(); 3434 #endif 3435 emit_int8((unsigned char)0x9E); 3436 } 3437 3438 void Assembler::sarl(Register dst, int imm8) { 3439 int encode = prefix_and_encode(dst->encoding()); 3440 assert(isShiftCount(imm8), "illegal shift count"); 3441 if (imm8 == 1) { 3442 emit_int8((unsigned char)0xD1); 3443 emit_int8((unsigned char)(0xF8 | encode)); 3444 } else { 3445 emit_int8((unsigned char)0xC1); 3446 emit_int8((unsigned char)(0xF8 | encode)); 3447 emit_int8(imm8); 3448 } 3449 } 3450 3451 void Assembler::sarl(Register dst) { 3452 int encode = prefix_and_encode(dst->encoding()); 3453 emit_int8((unsigned char)0xD3); 3454 emit_int8((unsigned char)(0xF8 | encode)); 3455 } 3456 3457 void Assembler::sbbl(Address dst, int32_t imm32) { 3458 InstructionMark im(this); 3459 prefix(dst); 3460 emit_arith_operand(0x81, rbx, dst, imm32); 3461 } 3462 3463 void Assembler::sbbl(Register dst, int32_t imm32) { 3464 prefix(dst); 3465 emit_arith(0x81, 0xD8, dst, imm32); 3466 } 3467 3468 3469 void Assembler::sbbl(Register dst, Address src) { 3470 InstructionMark im(this); 3471 prefix(src, dst); 3472 emit_int8(0x1B); 3473 emit_operand(dst, src); 3474 } 3475 3476 void Assembler::sbbl(Register dst, Register src) { 3477 (void) prefix_and_encode(dst->encoding(), src->encoding()); 3478 emit_arith(0x1B, 0xC0, dst, src); 3479 } 3480 3481 void Assembler::setb(Condition cc, Register dst) { 3482 assert(0 <= cc && cc < 16, "illegal cc"); 3483 int encode = prefix_and_encode(dst->encoding(), true); 3484 emit_int8(0x0F); 3485 emit_int8((unsigned char)0x90 | cc); 3486 emit_int8((unsigned char)(0xC0 | encode)); 3487 } 3488 3489 void Assembler::shll(Register dst, int imm8) { 3490 assert(isShiftCount(imm8), "illegal shift count"); 3491 int encode = prefix_and_encode(dst->encoding()); 3492 if (imm8 == 1 ) { 3493 emit_int8((unsigned char)0xD1); 3494 emit_int8((unsigned char)(0xE0 | encode)); 3495 } else { 3496 emit_int8((unsigned char)0xC1); 3497 emit_int8((unsigned char)(0xE0 | encode)); 3498 emit_int8(imm8); 3499 } 3500 } 3501 3502 void Assembler::shll(Register dst) { 3503 int encode = prefix_and_encode(dst->encoding()); 3504 emit_int8((unsigned char)0xD3); 3505 emit_int8((unsigned char)(0xE0 | encode)); 3506 } 3507 3508 void Assembler::shrl(Register dst, int imm8) { 3509 assert(isShiftCount(imm8), "illegal shift count"); 3510 int encode = prefix_and_encode(dst->encoding()); 3511 emit_int8((unsigned char)0xC1); 3512 emit_int8((unsigned char)(0xE8 | encode)); 3513 emit_int8(imm8); 3514 } 3515 3516 void Assembler::shrl(Register dst) { 3517 int encode = prefix_and_encode(dst->encoding()); 3518 emit_int8((unsigned char)0xD3); 3519 emit_int8((unsigned char)(0xE8 | encode)); 3520 } 3521 3522 // copies a single word from [esi] to [edi] 3523 void Assembler::smovl() { 3524 emit_int8((unsigned char)0xA5); 3525 } 3526 3527 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { 3528 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3529 if (VM_Version::supports_evex()) { 3530 emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2); 3531 } else { 3532 emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); 3533 } 3534 } 3535 3536 void Assembler::sqrtsd(XMMRegister dst, Address src) { 3537 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3538 if (VM_Version::supports_evex()) { 3539 _tuple_type = EVEX_T1S; 3540 _input_size_in_bits = EVEX_64bit; 3541 emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2); 3542 } else { 3543 emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); 3544 } 3545 } 3546 3547 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { 3548 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3549 emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); 3550 } 3551 3552 void Assembler::std() { 3553 emit_int8((unsigned char)0xFD); 3554 } 3555 3556 void Assembler::sqrtss(XMMRegister dst, Address src) { 3557 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3558 if (VM_Version::supports_evex()) { 3559 _tuple_type = EVEX_T1S; 3560 _input_size_in_bits = EVEX_32bit; 3561 } 3562 emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); 3563 } 3564 3565 void Assembler::stmxcsr( Address dst) { 3566 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3567 InstructionMark im(this); 3568 prefix(dst); 3569 emit_int8(0x0F); 3570 emit_int8((unsigned char)0xAE); 3571 emit_operand(as_Register(3), dst); 3572 } 3573 3574 void Assembler::subl(Address dst, int32_t imm32) { 3575 InstructionMark im(this); 3576 prefix(dst); 3577 emit_arith_operand(0x81, rbp, dst, imm32); 3578 } 3579 3580 void Assembler::subl(Address dst, Register src) { 3581 InstructionMark im(this); 3582 prefix(dst, src); 3583 emit_int8(0x29); 3584 emit_operand(src, dst); 3585 } 3586 3587 void Assembler::subl(Register dst, int32_t imm32) { 3588 prefix(dst); 3589 emit_arith(0x81, 0xE8, dst, imm32); 3590 } 3591 3592 // Force generation of a 4 byte immediate value even if it fits into 8bit 3593 void Assembler::subl_imm32(Register dst, int32_t imm32) { 3594 prefix(dst); 3595 emit_arith_imm32(0x81, 0xE8, dst, imm32); 3596 } 3597 3598 void Assembler::subl(Register dst, Address src) { 3599 InstructionMark im(this); 3600 prefix(src, dst); 3601 emit_int8(0x2B); 3602 emit_operand(dst, src); 3603 } 3604 3605 void Assembler::subl(Register dst, Register src) { 3606 (void) prefix_and_encode(dst->encoding(), src->encoding()); 3607 emit_arith(0x2B, 0xC0, dst, src); 3608 } 3609 3610 void Assembler::subsd(XMMRegister dst, XMMRegister src) { 3611 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3612 if (VM_Version::supports_evex()) { 3613 emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2); 3614 } else { 3615 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); 3616 } 3617 } 3618 3619 void Assembler::subsd(XMMRegister dst, Address src) { 3620 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3621 if (VM_Version::supports_evex()) { 3622 _tuple_type = EVEX_T1S; 3623 _input_size_in_bits = EVEX_64bit; 3624 } 3625 if (VM_Version::supports_evex()) { 3626 emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2); 3627 } else { 3628 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); 3629 } 3630 } 3631 3632 void Assembler::subss(XMMRegister dst, XMMRegister src) { 3633 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3634 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); 3635 } 3636 3637 void Assembler::subss(XMMRegister dst, Address src) { 3638 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3639 if (VM_Version::supports_evex()) { 3640 _tuple_type = EVEX_T1S; 3641 _input_size_in_bits = EVEX_32bit; 3642 } 3643 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); 3644 } 3645 3646 void Assembler::testb(Register dst, int imm8) { 3647 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 3648 (void) prefix_and_encode(dst->encoding(), true); 3649 emit_arith_b(0xF6, 0xC0, dst, imm8); 3650 } 3651 3652 void Assembler::testl(Register dst, int32_t imm32) { 3653 // not using emit_arith because test 3654 // doesn't support sign-extension of 3655 // 8bit operands 3656 int encode = dst->encoding(); 3657 if (encode == 0) { 3658 emit_int8((unsigned char)0xA9); 3659 } else { 3660 encode = prefix_and_encode(encode); 3661 emit_int8((unsigned char)0xF7); 3662 emit_int8((unsigned char)(0xC0 | encode)); 3663 } 3664 emit_int32(imm32); 3665 } 3666 3667 void Assembler::testl(Register dst, Register src) { 3668 (void) prefix_and_encode(dst->encoding(), src->encoding()); 3669 emit_arith(0x85, 0xC0, dst, src); 3670 } 3671 3672 void Assembler::testl(Register dst, Address src) { 3673 InstructionMark im(this); 3674 prefix(src, dst); 3675 emit_int8((unsigned char)0x85); 3676 emit_operand(dst, src); 3677 } 3678 3679 void Assembler::tzcntl(Register dst, Register src) { 3680 assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported"); 3681 emit_int8((unsigned char)0xF3); 3682 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 3683 emit_int8(0x0F); 3684 emit_int8((unsigned char)0xBC); 3685 emit_int8((unsigned char)0xC0 | encode); 3686 } 3687 3688 void Assembler::tzcntq(Register dst, Register src) { 3689 assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported"); 3690 emit_int8((unsigned char)0xF3); 3691 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 3692 emit_int8(0x0F); 3693 emit_int8((unsigned char)0xBC); 3694 emit_int8((unsigned char)(0xC0 | encode)); 3695 } 3696 3697 void Assembler::ucomisd(XMMRegister dst, Address src) { 3698 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3699 if (VM_Version::supports_evex()) { 3700 _tuple_type = EVEX_T1S; 3701 _input_size_in_bits = EVEX_64bit; 3702 emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); 3703 } else { 3704 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); 3705 } 3706 } 3707 3708 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { 3709 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3710 if (VM_Version::supports_evex()) { 3711 emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true); 3712 } else { 3713 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); 3714 } 3715 } 3716 3717 void Assembler::ucomiss(XMMRegister dst, Address src) { 3718 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3719 if (VM_Version::supports_evex()) { 3720 _tuple_type = EVEX_T1S; 3721 _input_size_in_bits = EVEX_32bit; 3722 } 3723 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true); 3724 } 3725 3726 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { 3727 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3728 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true); 3729 } 3730 3731 void Assembler::xabort(int8_t imm8) { 3732 emit_int8((unsigned char)0xC6); 3733 emit_int8((unsigned char)0xF8); 3734 emit_int8((unsigned char)(imm8 & 0xFF)); 3735 } 3736 3737 void Assembler::xaddl(Address dst, Register src) { 3738 InstructionMark im(this); 3739 prefix(dst, src); 3740 emit_int8(0x0F); 3741 emit_int8((unsigned char)0xC1); 3742 emit_operand(src, dst); 3743 } 3744 3745 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) { 3746 InstructionMark im(this); 3747 relocate(rtype); 3748 if (abort.is_bound()) { 3749 address entry = target(abort); 3750 assert(entry != NULL, "abort entry NULL"); 3751 intptr_t offset = entry - pc(); 3752 emit_int8((unsigned char)0xC7); 3753 emit_int8((unsigned char)0xF8); 3754 emit_int32(offset - 6); // 2 opcode + 4 address 3755 } else { 3756 abort.add_patch_at(code(), locator()); 3757 emit_int8((unsigned char)0xC7); 3758 emit_int8((unsigned char)0xF8); 3759 emit_int32(0); 3760 } 3761 } 3762 3763 void Assembler::xchgl(Register dst, Address src) { // xchg 3764 InstructionMark im(this); 3765 prefix(src, dst); 3766 emit_int8((unsigned char)0x87); 3767 emit_operand(dst, src); 3768 } 3769 3770 void Assembler::xchgl(Register dst, Register src) { 3771 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 3772 emit_int8((unsigned char)0x87); 3773 emit_int8((unsigned char)(0xC0 | encode)); 3774 } 3775 3776 void Assembler::xend() { 3777 emit_int8((unsigned char)0x0F); 3778 emit_int8((unsigned char)0x01); 3779 emit_int8((unsigned char)0xD5); 3780 } 3781 3782 void Assembler::xgetbv() { 3783 emit_int8(0x0F); 3784 emit_int8(0x01); 3785 emit_int8((unsigned char)0xD0); 3786 } 3787 3788 void Assembler::xorl(Register dst, int32_t imm32) { 3789 prefix(dst); 3790 emit_arith(0x81, 0xF0, dst, imm32); 3791 } 3792 3793 void Assembler::xorl(Register dst, Address src) { 3794 InstructionMark im(this); 3795 prefix(src, dst); 3796 emit_int8(0x33); 3797 emit_operand(dst, src); 3798 } 3799 3800 void Assembler::xorl(Register dst, Register src) { 3801 (void) prefix_and_encode(dst->encoding(), src->encoding()); 3802 emit_arith(0x33, 0xC0, dst, src); 3803 } 3804 3805 3806 // AVX 3-operands scalar float-point arithmetic instructions 3807 3808 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { 3809 assert(VM_Version::supports_avx(), ""); 3810 if (VM_Version::supports_evex()) { 3811 _tuple_type = EVEX_T1S; 3812 _input_size_in_bits = EVEX_64bit; 3813 emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit); 3814 } else { 3815 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit); 3816 } 3817 } 3818 3819 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3820 assert(VM_Version::supports_avx(), ""); 3821 if (VM_Version::supports_evex()) { 3822 emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit); 3823 } else { 3824 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit); 3825 } 3826 } 3827 3828 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { 3829 assert(VM_Version::supports_avx(), ""); 3830 if (VM_Version::supports_evex()) { 3831 _tuple_type = EVEX_T1S; 3832 _input_size_in_bits = EVEX_32bit; 3833 } 3834 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit); 3835 } 3836 3837 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3838 assert(VM_Version::supports_avx(), ""); 3839 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit); 3840 } 3841 3842 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { 3843 assert(VM_Version::supports_avx(), ""); 3844 if (VM_Version::supports_evex()) { 3845 _tuple_type = EVEX_T1S; 3846 _input_size_in_bits = EVEX_64bit; 3847 emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit); 3848 } else { 3849 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit); 3850 } 3851 } 3852 3853 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3854 assert(VM_Version::supports_avx(), ""); 3855 if (VM_Version::supports_evex()) { 3856 emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit); 3857 } else { 3858 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit); 3859 } 3860 } 3861 3862 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { 3863 assert(VM_Version::supports_avx(), ""); 3864 if (VM_Version::supports_evex()) { 3865 _tuple_type = EVEX_T1S; 3866 _input_size_in_bits = EVEX_32bit; 3867 } 3868 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit); 3869 } 3870 3871 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3872 assert(VM_Version::supports_avx(), ""); 3873 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit); 3874 } 3875 3876 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { 3877 assert(VM_Version::supports_avx(), ""); 3878 if (VM_Version::supports_evex()) { 3879 _tuple_type = EVEX_T1S; 3880 _input_size_in_bits = EVEX_64bit; 3881 emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit); 3882 } else { 3883 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit); 3884 } 3885 } 3886 3887 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3888 assert(VM_Version::supports_avx(), ""); 3889 if (VM_Version::supports_evex()) { 3890 emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit); 3891 } else { 3892 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit); 3893 } 3894 } 3895 3896 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { 3897 assert(VM_Version::supports_avx(), ""); 3898 if (VM_Version::supports_evex()) { 3899 _tuple_type = EVEX_T1S; 3900 _input_size_in_bits = EVEX_32bit; 3901 } 3902 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit); 3903 } 3904 3905 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3906 assert(VM_Version::supports_avx(), ""); 3907 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit); 3908 } 3909 3910 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { 3911 assert(VM_Version::supports_avx(), ""); 3912 if (VM_Version::supports_evex()) { 3913 _tuple_type = EVEX_T1S; 3914 _input_size_in_bits = EVEX_64bit; 3915 emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit); 3916 } else { 3917 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit); 3918 } 3919 } 3920 3921 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3922 assert(VM_Version::supports_avx(), ""); 3923 if (VM_Version::supports_evex()) { 3924 emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit); 3925 } else { 3926 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit); 3927 } 3928 } 3929 3930 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { 3931 assert(VM_Version::supports_avx(), ""); 3932 if (VM_Version::supports_evex()) { 3933 _tuple_type = EVEX_T1S; 3934 _input_size_in_bits = EVEX_32bit; 3935 } 3936 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit); 3937 } 3938 3939 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3940 assert(VM_Version::supports_avx(), ""); 3941 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit); 3942 } 3943 3944 //====================VECTOR ARITHMETIC===================================== 3945 3946 // Float-point vector arithmetic 3947 3948 void Assembler::addpd(XMMRegister dst, XMMRegister src) { 3949 _instruction_uses_vl = true; 3950 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3951 if (VM_Version::supports_evex()) { 3952 emit_simd_arith_q(0x58, dst, src, VEX_SIMD_66); 3953 } else { 3954 emit_simd_arith(0x58, dst, src, VEX_SIMD_66); 3955 } 3956 } 3957 3958 void Assembler::addps(XMMRegister dst, XMMRegister src) { 3959 _instruction_uses_vl = true; 3960 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3961 emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE); 3962 } 3963 3964 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 3965 _instruction_uses_vl = true; 3966 assert(VM_Version::supports_avx(), ""); 3967 if (VM_Version::supports_evex()) { 3968 emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len); 3969 } else { 3970 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len); 3971 } 3972 } 3973 3974 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 3975 _instruction_uses_vl = true; 3976 assert(VM_Version::supports_avx(), ""); 3977 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len); 3978 } 3979 3980 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 3981 _instruction_uses_vl = true; 3982 assert(VM_Version::supports_avx(), ""); 3983 if (VM_Version::supports_evex()) { 3984 _tuple_type = EVEX_FV; 3985 _input_size_in_bits = EVEX_64bit; 3986 emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len); 3987 } else { 3988 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len); 3989 } 3990 } 3991 3992 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 3993 _instruction_uses_vl = true; 3994 assert(VM_Version::supports_avx(), ""); 3995 if (VM_Version::supports_evex()) { 3996 _tuple_type = EVEX_FV; 3997 _input_size_in_bits = EVEX_32bit; 3998 } 3999 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len); 4000 } 4001 4002 void Assembler::subpd(XMMRegister dst, XMMRegister src) { 4003 _instruction_uses_vl = true; 4004 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4005 if (VM_Version::supports_evex()) { 4006 emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_66); 4007 } else { 4008 emit_simd_arith(0x5C, dst, src, VEX_SIMD_66); 4009 } 4010 } 4011 4012 void Assembler::subps(XMMRegister dst, XMMRegister src) { 4013 _instruction_uses_vl = true; 4014 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4015 emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE); 4016 } 4017 4018 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4019 _instruction_uses_vl = true; 4020 assert(VM_Version::supports_avx(), ""); 4021 if (VM_Version::supports_evex()) { 4022 emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len); 4023 } else { 4024 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len); 4025 } 4026 } 4027 4028 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4029 _instruction_uses_vl = true; 4030 assert(VM_Version::supports_avx(), ""); 4031 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len); 4032 } 4033 4034 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4035 _instruction_uses_vl = true; 4036 assert(VM_Version::supports_avx(), ""); 4037 if (VM_Version::supports_evex()) { 4038 _tuple_type = EVEX_FV; 4039 _input_size_in_bits = EVEX_64bit; 4040 emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len); 4041 } else { 4042 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len); 4043 } 4044 } 4045 4046 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4047 _instruction_uses_vl = true; 4048 assert(VM_Version::supports_avx(), ""); 4049 if (VM_Version::supports_evex()) { 4050 _tuple_type = EVEX_FV; 4051 _input_size_in_bits = EVEX_32bit; 4052 } 4053 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len); 4054 } 4055 4056 void Assembler::mulpd(XMMRegister dst, XMMRegister src) { 4057 _instruction_uses_vl = true; 4058 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4059 if (VM_Version::supports_evex()) { 4060 emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66); 4061 } else { 4062 emit_simd_arith(0x59, dst, src, VEX_SIMD_66); 4063 } 4064 } 4065 4066 void Assembler::mulps(XMMRegister dst, XMMRegister src) { 4067 _instruction_uses_vl = true; 4068 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4069 emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE); 4070 } 4071 4072 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4073 _instruction_uses_vl = true; 4074 assert(VM_Version::supports_avx(), ""); 4075 if (VM_Version::supports_evex()) { 4076 emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len); 4077 } else { 4078 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len); 4079 } 4080 } 4081 4082 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4083 _instruction_uses_vl = true; 4084 assert(VM_Version::supports_avx(), ""); 4085 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len); 4086 } 4087 4088 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4089 _instruction_uses_vl = true; 4090 assert(VM_Version::supports_avx(), ""); 4091 if (VM_Version::supports_evex()) { 4092 _tuple_type = EVEX_FV; 4093 _input_size_in_bits = EVEX_64bit; 4094 emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len); 4095 } else { 4096 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len); 4097 } 4098 } 4099 4100 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4101 _instruction_uses_vl = true; 4102 assert(VM_Version::supports_avx(), ""); 4103 if (VM_Version::supports_evex()) { 4104 _tuple_type = EVEX_FV; 4105 _input_size_in_bits = EVEX_32bit; 4106 } 4107 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len); 4108 } 4109 4110 void Assembler::divpd(XMMRegister dst, XMMRegister src) { 4111 _instruction_uses_vl = true; 4112 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4113 if (VM_Version::supports_evex()) { 4114 emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_66); 4115 } else { 4116 emit_simd_arith(0x5E, dst, src, VEX_SIMD_66); 4117 } 4118 } 4119 4120 void Assembler::divps(XMMRegister dst, XMMRegister src) { 4121 _instruction_uses_vl = true; 4122 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4123 emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE); 4124 } 4125 4126 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4127 _instruction_uses_vl = true; 4128 assert(VM_Version::supports_avx(), ""); 4129 if (VM_Version::supports_evex()) { 4130 emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len); 4131 } else { 4132 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len); 4133 } 4134 } 4135 4136 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4137 _instruction_uses_vl = true; 4138 assert(VM_Version::supports_avx(), ""); 4139 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len); 4140 } 4141 4142 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4143 _instruction_uses_vl = true; 4144 assert(VM_Version::supports_avx(), ""); 4145 if (VM_Version::supports_evex()) { 4146 _tuple_type = EVEX_FV; 4147 _input_size_in_bits = EVEX_64bit; 4148 emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len); 4149 } else { 4150 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len); 4151 } 4152 } 4153 4154 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4155 _instruction_uses_vl = true; 4156 assert(VM_Version::supports_avx(), ""); 4157 if (VM_Version::supports_evex()) { 4158 _tuple_type = EVEX_FV; 4159 _input_size_in_bits = EVEX_32bit; 4160 } 4161 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len); 4162 } 4163 4164 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) { 4165 _instruction_uses_vl = true; 4166 assert(VM_Version::supports_avx(), ""); 4167 if (VM_Version::supports_evex()) { 4168 emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len); 4169 } else { 4170 emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len); 4171 } 4172 } 4173 4174 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) { 4175 _instruction_uses_vl = true; 4176 assert(VM_Version::supports_avx(), ""); 4177 if (VM_Version::supports_evex()) { 4178 _tuple_type = EVEX_FV; 4179 _input_size_in_bits = EVEX_64bit; 4180 emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len); 4181 } else { 4182 emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len); 4183 } 4184 } 4185 4186 void Assembler::andpd(XMMRegister dst, XMMRegister src) { 4187 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4188 if (VM_Version::supports_avx512dq()) { 4189 emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66); 4190 } else { 4191 emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true); 4192 } 4193 } 4194 4195 void Assembler::andps(XMMRegister dst, XMMRegister src) { 4196 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4197 emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); 4198 } 4199 4200 void Assembler::andps(XMMRegister dst, Address src) { 4201 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4202 if (VM_Version::supports_evex()) { 4203 _tuple_type = EVEX_FV; 4204 _input_size_in_bits = EVEX_32bit; 4205 } 4206 emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); 4207 } 4208 4209 void Assembler::andpd(XMMRegister dst, Address src) { 4210 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4211 if (VM_Version::supports_avx512dq()) { 4212 _tuple_type = EVEX_FV; 4213 _input_size_in_bits = EVEX_64bit; 4214 emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66); 4215 } else { 4216 emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true); 4217 } 4218 } 4219 4220 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4221 assert(VM_Version::supports_avx(), ""); 4222 if (VM_Version::supports_avx512dq()) { 4223 emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len); 4224 } else { 4225 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true); 4226 } 4227 } 4228 4229 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4230 assert(VM_Version::supports_avx(), ""); 4231 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); 4232 } 4233 4234 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4235 assert(VM_Version::supports_avx(), ""); 4236 if (VM_Version::supports_avx512dq()) { 4237 _tuple_type = EVEX_FV; 4238 _input_size_in_bits = EVEX_64bit; 4239 emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len); 4240 } else { 4241 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true); 4242 } 4243 } 4244 4245 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4246 assert(VM_Version::supports_avx(), ""); 4247 if (VM_Version::supports_evex()) { 4248 _tuple_type = EVEX_FV; 4249 _input_size_in_bits = EVEX_32bit; 4250 } 4251 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); 4252 } 4253 4254 void Assembler::xorpd(XMMRegister dst, XMMRegister src) { 4255 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4256 if (VM_Version::supports_avx512dq()) { 4257 emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66); 4258 } else { 4259 emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true); 4260 } 4261 } 4262 4263 void Assembler::xorps(XMMRegister dst, XMMRegister src) { 4264 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4265 emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); 4266 } 4267 4268 void Assembler::xorpd(XMMRegister dst, Address src) { 4269 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4270 if (VM_Version::supports_avx512dq()) { 4271 _tuple_type = EVEX_FV; 4272 _input_size_in_bits = EVEX_64bit; 4273 emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66); 4274 } else { 4275 emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true); 4276 } 4277 } 4278 4279 void Assembler::xorps(XMMRegister dst, Address src) { 4280 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4281 if (VM_Version::supports_evex()) { 4282 _tuple_type = EVEX_FV; 4283 _input_size_in_bits = EVEX_32bit; 4284 } 4285 emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); 4286 } 4287 4288 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4289 assert(VM_Version::supports_avx(), ""); 4290 if (VM_Version::supports_avx512dq()) { 4291 emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len); 4292 } else { 4293 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true); 4294 } 4295 } 4296 4297 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4298 assert(VM_Version::supports_avx(), ""); 4299 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); 4300 } 4301 4302 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4303 assert(VM_Version::supports_avx(), ""); 4304 if (VM_Version::supports_avx512dq()) { 4305 _tuple_type = EVEX_FV; 4306 _input_size_in_bits = EVEX_64bit; 4307 emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len); 4308 } else { 4309 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true); 4310 } 4311 } 4312 4313 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4314 assert(VM_Version::supports_avx(), ""); 4315 if (VM_Version::supports_evex()) { 4316 _tuple_type = EVEX_FV; 4317 _input_size_in_bits = EVEX_32bit; 4318 } 4319 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); 4320 } 4321 4322 // Integer vector arithmetic 4323 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4324 assert(VM_Version::supports_avx() && (vector_len == 0) || 4325 VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 4326 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true); 4327 emit_int8(0x01); 4328 emit_int8((unsigned char)(0xC0 | encode)); 4329 } 4330 4331 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4332 assert(VM_Version::supports_avx() && (vector_len == 0) || 4333 VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 4334 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true); 4335 emit_int8(0x02); 4336 emit_int8((unsigned char)(0xC0 | encode)); 4337 } 4338 4339 void Assembler::paddb(XMMRegister dst, XMMRegister src) { 4340 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4341 emit_simd_arith(0xFC, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4342 } 4343 4344 void Assembler::paddw(XMMRegister dst, XMMRegister src) { 4345 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4346 emit_simd_arith(0xFD, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4347 } 4348 4349 void Assembler::paddd(XMMRegister dst, XMMRegister src) { 4350 _instruction_uses_vl = true; 4351 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4352 emit_simd_arith(0xFE, dst, src, VEX_SIMD_66); 4353 } 4354 4355 void Assembler::paddq(XMMRegister dst, XMMRegister src) { 4356 _instruction_uses_vl = true; 4357 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4358 if (VM_Version::supports_evex()) { 4359 emit_simd_arith_q(0xD4, dst, src, VEX_SIMD_66); 4360 } else { 4361 emit_simd_arith(0xD4, dst, src, VEX_SIMD_66); 4362 } 4363 } 4364 4365 void Assembler::phaddw(XMMRegister dst, XMMRegister src) { 4366 NOT_LP64(assert(VM_Version::supports_sse3(), "")); 4367 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, 4368 VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); 4369 emit_int8(0x01); 4370 emit_int8((unsigned char)(0xC0 | encode)); 4371 } 4372 4373 void Assembler::phaddd(XMMRegister dst, XMMRegister src) { 4374 NOT_LP64(assert(VM_Version::supports_sse3(), "")); 4375 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, 4376 VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); 4377 emit_int8(0x02); 4378 emit_int8((unsigned char)(0xC0 | encode)); 4379 } 4380 4381 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4382 assert(UseAVX > 0, "requires some form of AVX"); 4383 emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4384 } 4385 4386 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4387 assert(UseAVX > 0, "requires some form of AVX"); 4388 emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4389 } 4390 4391 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4392 _instruction_uses_vl = true; 4393 assert(UseAVX > 0, "requires some form of AVX"); 4394 emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len); 4395 } 4396 4397 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4398 _instruction_uses_vl = true; 4399 assert(UseAVX > 0, "requires some form of AVX"); 4400 if (VM_Version::supports_evex()) { 4401 emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len); 4402 } else { 4403 emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len); 4404 } 4405 } 4406 4407 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4408 assert(UseAVX > 0, "requires some form of AVX"); 4409 if (VM_Version::supports_evex()) { 4410 _tuple_type = EVEX_FVM; 4411 } 4412 emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4413 } 4414 4415 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4416 assert(UseAVX > 0, "requires some form of AVX"); 4417 if (VM_Version::supports_evex()) { 4418 _tuple_type = EVEX_FVM; 4419 } 4420 emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4421 } 4422 4423 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4424 _instruction_uses_vl = true; 4425 assert(UseAVX > 0, "requires some form of AVX"); 4426 if (VM_Version::supports_evex()) { 4427 _tuple_type = EVEX_FV; 4428 _input_size_in_bits = EVEX_32bit; 4429 } 4430 emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len); 4431 } 4432 4433 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4434 _instruction_uses_vl = true; 4435 assert(UseAVX > 0, "requires some form of AVX"); 4436 if (VM_Version::supports_evex()) { 4437 _tuple_type = EVEX_FV; 4438 _input_size_in_bits = EVEX_64bit; 4439 emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len); 4440 } else { 4441 emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len); 4442 } 4443 } 4444 4445 void Assembler::psubb(XMMRegister dst, XMMRegister src) { 4446 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4447 emit_simd_arith(0xF8, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4448 } 4449 4450 void Assembler::psubw(XMMRegister dst, XMMRegister src) { 4451 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4452 emit_simd_arith(0xF9, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4453 } 4454 4455 void Assembler::psubd(XMMRegister dst, XMMRegister src) { 4456 _instruction_uses_vl = true; 4457 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4458 emit_simd_arith(0xFA, dst, src, VEX_SIMD_66); 4459 } 4460 4461 void Assembler::psubq(XMMRegister dst, XMMRegister src) { 4462 _instruction_uses_vl = true; 4463 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4464 if (VM_Version::supports_evex()) { 4465 emit_simd_arith_q(0xFB, dst, src, VEX_SIMD_66); 4466 } else { 4467 emit_simd_arith(0xFB, dst, src, VEX_SIMD_66); 4468 } 4469 } 4470 4471 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4472 assert(UseAVX > 0, "requires some form of AVX"); 4473 emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4474 } 4475 4476 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4477 assert(UseAVX > 0, "requires some form of AVX"); 4478 emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4479 } 4480 4481 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4482 _instruction_uses_vl = true; 4483 assert(UseAVX > 0, "requires some form of AVX"); 4484 emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len); 4485 } 4486 4487 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4488 _instruction_uses_vl = true; 4489 assert(UseAVX > 0, "requires some form of AVX"); 4490 if (VM_Version::supports_evex()) { 4491 emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len); 4492 } else { 4493 emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len); 4494 } 4495 } 4496 4497 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4498 assert(UseAVX > 0, "requires some form of AVX"); 4499 if (VM_Version::supports_evex()) { 4500 _tuple_type = EVEX_FVM; 4501 } 4502 emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4503 } 4504 4505 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4506 assert(UseAVX > 0, "requires some form of AVX"); 4507 if (VM_Version::supports_evex()) { 4508 _tuple_type = EVEX_FVM; 4509 } 4510 emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4511 } 4512 4513 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4514 _instruction_uses_vl = true; 4515 assert(UseAVX > 0, "requires some form of AVX"); 4516 if (VM_Version::supports_evex()) { 4517 _tuple_type = EVEX_FV; 4518 _input_size_in_bits = EVEX_32bit; 4519 } 4520 emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len); 4521 } 4522 4523 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4524 _instruction_uses_vl = true; 4525 assert(UseAVX > 0, "requires some form of AVX"); 4526 if (VM_Version::supports_evex()) { 4527 _tuple_type = EVEX_FV; 4528 _input_size_in_bits = EVEX_64bit; 4529 emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len); 4530 } else { 4531 emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len); 4532 } 4533 } 4534 4535 void Assembler::pmullw(XMMRegister dst, XMMRegister src) { 4536 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4537 emit_simd_arith(0xD5, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4538 } 4539 4540 void Assembler::pmulld(XMMRegister dst, XMMRegister src) { 4541 _instruction_uses_vl = true; 4542 assert(VM_Version::supports_sse4_1(), ""); 4543 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, 4544 /* no_mask_reg */ false, VEX_OPCODE_0F_38); 4545 emit_int8(0x40); 4546 emit_int8((unsigned char)(0xC0 | encode)); 4547 } 4548 4549 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4550 assert(UseAVX > 0, "requires some form of AVX"); 4551 emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4552 } 4553 4554 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4555 _instruction_uses_vl = true; 4556 assert(UseAVX > 0, "requires some form of AVX"); 4557 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38); 4558 emit_int8(0x40); 4559 emit_int8((unsigned char)(0xC0 | encode)); 4560 } 4561 4562 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4563 assert(UseAVX > 2, "requires some form of AVX"); 4564 int src_enc = src->encoding(); 4565 int dst_enc = dst->encoding(); 4566 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 4567 int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, 4568 /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false); 4569 emit_int8(0x40); 4570 emit_int8((unsigned char)(0xC0 | encode)); 4571 } 4572 4573 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4574 assert(UseAVX > 0, "requires some form of AVX"); 4575 if (VM_Version::supports_evex()) { 4576 _tuple_type = EVEX_FVM; 4577 } 4578 emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4579 } 4580 4581 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4582 _instruction_uses_vl = true; 4583 assert(UseAVX > 0, "requires some form of AVX"); 4584 if (VM_Version::supports_evex()) { 4585 _tuple_type = EVEX_FV; 4586 _input_size_in_bits = EVEX_32bit; 4587 } 4588 InstructionMark im(this); 4589 int dst_enc = dst->encoding(); 4590 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 4591 vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, 4592 VEX_OPCODE_0F_38, /* vex_w */ false, vector_len); 4593 emit_int8(0x40); 4594 emit_operand(dst, src); 4595 } 4596 4597 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4598 assert(UseAVX > 0, "requires some form of AVX"); 4599 if (VM_Version::supports_evex()) { 4600 _tuple_type = EVEX_FV; 4601 _input_size_in_bits = EVEX_64bit; 4602 } 4603 InstructionMark im(this); 4604 int dst_enc = dst->encoding(); 4605 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 4606 vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, 4607 VEX_OPCODE_0F_38, /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq); 4608 emit_int8(0x40); 4609 emit_operand(dst, src); 4610 } 4611 4612 // Shift packed integers left by specified number of bits. 4613 void Assembler::psllw(XMMRegister dst, int shift) { 4614 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4615 // XMM6 is for /6 encoding: 66 0F 71 /6 ib 4616 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F, 4617 /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); 4618 emit_int8(0x71); 4619 emit_int8((unsigned char)(0xC0 | encode)); 4620 emit_int8(shift & 0xFF); 4621 } 4622 4623 void Assembler::pslld(XMMRegister dst, int shift) { 4624 _instruction_uses_vl = true; 4625 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4626 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 4627 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false); 4628 emit_int8(0x72); 4629 emit_int8((unsigned char)(0xC0 | encode)); 4630 emit_int8(shift & 0xFF); 4631 } 4632 4633 void Assembler::psllq(XMMRegister dst, int shift) { 4634 _instruction_uses_vl = true; 4635 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4636 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 4637 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ true); 4638 emit_int8(0x73); 4639 emit_int8((unsigned char)(0xC0 | encode)); 4640 emit_int8(shift & 0xFF); 4641 } 4642 4643 void Assembler::psllw(XMMRegister dst, XMMRegister shift) { 4644 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4645 emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4646 } 4647 4648 void Assembler::pslld(XMMRegister dst, XMMRegister shift) { 4649 _instruction_uses_vl = true; 4650 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4651 emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66); 4652 } 4653 4654 void Assembler::psllq(XMMRegister dst, XMMRegister shift) { 4655 _instruction_uses_vl = true; 4656 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4657 if (VM_Version::supports_evex()) { 4658 emit_simd_arith_q(0xF3, dst, shift, VEX_SIMD_66); 4659 } else { 4660 emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66); 4661 } 4662 } 4663 4664 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) { 4665 assert(UseAVX > 0, "requires some form of AVX"); 4666 // XMM6 is for /6 encoding: 66 0F 71 /6 ib 4667 emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4668 emit_int8(shift & 0xFF); 4669 } 4670 4671 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) { 4672 _instruction_uses_vl = true; 4673 assert(UseAVX > 0, "requires some form of AVX"); 4674 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 4675 emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector_len); 4676 emit_int8(shift & 0xFF); 4677 } 4678 4679 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) { 4680 _instruction_uses_vl = true; 4681 assert(UseAVX > 0, "requires some form of AVX"); 4682 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 4683 if (VM_Version::supports_evex()) { 4684 emit_vex_arith_q(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len); 4685 } else { 4686 emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len); 4687 } 4688 emit_int8(shift & 0xFF); 4689 } 4690 4691 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { 4692 assert(UseAVX > 0, "requires some form of AVX"); 4693 emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4694 } 4695 4696 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { 4697 _instruction_uses_vl = true; 4698 assert(UseAVX > 0, "requires some form of AVX"); 4699 emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector_len); 4700 } 4701 4702 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { 4703 _instruction_uses_vl = true; 4704 assert(UseAVX > 0, "requires some form of AVX"); 4705 if (VM_Version::supports_evex()) { 4706 emit_vex_arith_q(0xF3, dst, src, shift, VEX_SIMD_66, vector_len); 4707 } else { 4708 emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector_len); 4709 } 4710 } 4711 4712 // Shift packed integers logically right by specified number of bits. 4713 void Assembler::psrlw(XMMRegister dst, int shift) { 4714 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4715 // XMM2 is for /2 encoding: 66 0F 71 /2 ib 4716 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, 4717 VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); 4718 emit_int8(0x71); 4719 emit_int8((unsigned char)(0xC0 | encode)); 4720 emit_int8(shift & 0xFF); 4721 } 4722 4723 void Assembler::psrld(XMMRegister dst, int shift) { 4724 _instruction_uses_vl = true; 4725 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4726 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 4727 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false); 4728 emit_int8(0x72); 4729 emit_int8((unsigned char)(0xC0 | encode)); 4730 emit_int8(shift & 0xFF); 4731 } 4732 4733 void Assembler::psrlq(XMMRegister dst, int shift) { 4734 _instruction_uses_vl = true; 4735 // Do not confuse it with psrldq SSE2 instruction which 4736 // shifts 128 bit value in xmm register by number of bytes. 4737 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4738 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 4739 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, 4740 VEX_OPCODE_0F, /* rex_w */ VM_Version::supports_evex()); 4741 emit_int8(0x73); 4742 emit_int8((unsigned char)(0xC0 | encode)); 4743 emit_int8(shift & 0xFF); 4744 } 4745 4746 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) { 4747 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4748 emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4749 } 4750 4751 void Assembler::psrld(XMMRegister dst, XMMRegister shift) { 4752 _instruction_uses_vl = true; 4753 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4754 emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66); 4755 } 4756 4757 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) { 4758 _instruction_uses_vl = true; 4759 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4760 if (VM_Version::supports_evex()) { 4761 emit_simd_arith_q(0xD3, dst, shift, VEX_SIMD_66); 4762 } else { 4763 emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66); 4764 } 4765 } 4766 4767 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) { 4768 assert(UseAVX > 0, "requires some form of AVX"); 4769 // XMM2 is for /2 encoding: 66 0F 71 /2 ib 4770 emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4771 emit_int8(shift & 0xFF); 4772 } 4773 4774 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) { 4775 _instruction_uses_vl = true; 4776 assert(UseAVX > 0, "requires some form of AVX"); 4777 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 4778 emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector_len); 4779 emit_int8(shift & 0xFF); 4780 } 4781 4782 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) { 4783 _instruction_uses_vl = true; 4784 assert(UseAVX > 0, "requires some form of AVX"); 4785 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 4786 if (VM_Version::supports_evex()) { 4787 emit_vex_arith_q(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len); 4788 } else { 4789 emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len); 4790 } 4791 emit_int8(shift & 0xFF); 4792 } 4793 4794 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { 4795 assert(UseAVX > 0, "requires some form of AVX"); 4796 emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4797 } 4798 4799 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { 4800 _instruction_uses_vl = true; 4801 assert(UseAVX > 0, "requires some form of AVX"); 4802 emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector_len); 4803 } 4804 4805 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { 4806 _instruction_uses_vl = true; 4807 assert(UseAVX > 0, "requires some form of AVX"); 4808 if (VM_Version::supports_evex()) { 4809 emit_vex_arith_q(0xD3, dst, src, shift, VEX_SIMD_66, vector_len); 4810 } else { 4811 emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector_len); 4812 } 4813 } 4814 4815 // Shift packed integers arithmetically right by specified number of bits. 4816 void Assembler::psraw(XMMRegister dst, int shift) { 4817 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4818 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 4819 int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, 4820 VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw); 4821 emit_int8(0x71); 4822 emit_int8((unsigned char)(0xC0 | encode)); 4823 emit_int8(shift & 0xFF); 4824 } 4825 4826 void Assembler::psrad(XMMRegister dst, int shift) { 4827 _instruction_uses_vl = true; 4828 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4829 // XMM4 is for /4 encoding: 66 0F 72 /4 ib 4830 int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false); 4831 emit_int8(0x72); 4832 emit_int8((unsigned char)(0xC0 | encode)); 4833 emit_int8(shift & 0xFF); 4834 } 4835 4836 void Assembler::psraw(XMMRegister dst, XMMRegister shift) { 4837 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4838 emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4839 } 4840 4841 void Assembler::psrad(XMMRegister dst, XMMRegister shift) { 4842 _instruction_uses_vl = true; 4843 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4844 emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66); 4845 } 4846 4847 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) { 4848 assert(UseAVX > 0, "requires some form of AVX"); 4849 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 4850 emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4851 emit_int8(shift & 0xFF); 4852 } 4853 4854 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) { 4855 _instruction_uses_vl = true; 4856 assert(UseAVX > 0, "requires some form of AVX"); 4857 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 4858 emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector_len); 4859 emit_int8(shift & 0xFF); 4860 } 4861 4862 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { 4863 assert(UseAVX > 0, "requires some form of AVX"); 4864 emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw); 4865 } 4866 4867 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { 4868 _instruction_uses_vl = true; 4869 assert(UseAVX > 0, "requires some form of AVX"); 4870 emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector_len); 4871 } 4872 4873 4874 // AND packed integers 4875 void Assembler::pand(XMMRegister dst, XMMRegister src) { 4876 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4877 emit_simd_arith(0xDB, dst, src, VEX_SIMD_66); 4878 } 4879 4880 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4881 _instruction_uses_vl = true; 4882 assert(UseAVX > 0, "requires some form of AVX"); 4883 emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len); 4884 } 4885 4886 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4887 _instruction_uses_vl = true; 4888 assert(UseAVX > 0, "requires some form of AVX"); 4889 if (VM_Version::supports_evex()) { 4890 _tuple_type = EVEX_FV; 4891 _input_size_in_bits = EVEX_32bit; 4892 } 4893 emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len); 4894 } 4895 4896 void Assembler::por(XMMRegister dst, XMMRegister src) { 4897 _instruction_uses_vl = true; 4898 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4899 emit_simd_arith(0xEB, dst, src, VEX_SIMD_66); 4900 } 4901 4902 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4903 _instruction_uses_vl = true; 4904 assert(UseAVX > 0, "requires some form of AVX"); 4905 emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len); 4906 } 4907 4908 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4909 _instruction_uses_vl = true; 4910 assert(UseAVX > 0, "requires some form of AVX"); 4911 if (VM_Version::supports_evex()) { 4912 _tuple_type = EVEX_FV; 4913 _input_size_in_bits = EVEX_32bit; 4914 } 4915 emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len); 4916 } 4917 4918 void Assembler::pxor(XMMRegister dst, XMMRegister src) { 4919 _instruction_uses_vl = true; 4920 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4921 emit_simd_arith(0xEF, dst, src, VEX_SIMD_66); 4922 } 4923 4924 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { 4925 _instruction_uses_vl = true; 4926 assert(UseAVX > 0, "requires some form of AVX"); 4927 emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len); 4928 } 4929 4930 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { 4931 _instruction_uses_vl = true; 4932 assert(UseAVX > 0, "requires some form of AVX"); 4933 if (VM_Version::supports_evex()) { 4934 _tuple_type = EVEX_FV; 4935 _input_size_in_bits = EVEX_32bit; 4936 } 4937 emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len); 4938 } 4939 4940 4941 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { 4942 assert(VM_Version::supports_avx(), ""); 4943 int vector_len = AVX_256bit; 4944 if (VM_Version::supports_evex()) { 4945 vector_len = AVX_512bit; 4946 } 4947 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A); 4948 emit_int8(0x18); 4949 emit_int8((unsigned char)(0xC0 | encode)); 4950 // 0x00 - insert into lower 128 bits 4951 // 0x01 - insert into upper 128 bits 4952 emit_int8(0x01); 4953 } 4954 4955 void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) { 4956 assert(VM_Version::supports_evex(), ""); 4957 int vector_len = AVX_512bit; 4958 int src_enc = src->encoding(); 4959 int dst_enc = dst->encoding(); 4960 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 4961 int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, 4962 /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); 4963 emit_int8(0x1A); 4964 emit_int8((unsigned char)(0xC0 | encode)); 4965 // 0x00 - insert into lower 256 bits 4966 // 0x01 - insert into upper 256 bits 4967 emit_int8(0x01); 4968 } 4969 4970 void Assembler::vinsertf64x4h(XMMRegister dst, Address src) { 4971 assert(VM_Version::supports_evex(), ""); 4972 _tuple_type = EVEX_T4; 4973 _input_size_in_bits = EVEX_64bit; 4974 InstructionMark im(this); 4975 int vector_len = AVX_512bit; 4976 assert(dst != xnoreg, "sanity"); 4977 int dst_enc = dst->encoding(); 4978 // swap src<->dst for encoding 4979 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ true, vector_len); 4980 emit_int8(0x1A); 4981 emit_operand(dst, src); 4982 // 0x01 - insert into upper 128 bits 4983 emit_int8(0x01); 4984 } 4985 4986 void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) { 4987 assert(VM_Version::supports_evex(), ""); 4988 int vector_len = AVX_512bit; 4989 int src_enc = src->encoding(); 4990 int dst_enc = dst->encoding(); 4991 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 4992 int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, 4993 /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); 4994 emit_int8(0x18); 4995 emit_int8((unsigned char)(0xC0 | encode)); 4996 // 0x00 - insert into q0 128 bits (0..127) 4997 // 0x01 - insert into q1 128 bits (128..255) 4998 // 0x02 - insert into q2 128 bits (256..383) 4999 // 0x03 - insert into q3 128 bits (384..511) 5000 emit_int8(value & 0x3); 5001 } 5002 5003 void Assembler::vinsertf32x4h(XMMRegister dst, Address src, int value) { 5004 assert(VM_Version::supports_evex(), ""); 5005 _tuple_type = EVEX_T4; 5006 _input_size_in_bits = EVEX_32bit; 5007 InstructionMark im(this); 5008 int vector_len = AVX_512bit; 5009 assert(dst != xnoreg, "sanity"); 5010 int dst_enc = dst->encoding(); 5011 // swap src<->dst for encoding 5012 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); 5013 emit_int8(0x18); 5014 emit_operand(dst, src); 5015 // 0x00 - insert into q0 128 bits (0..127) 5016 // 0x01 - insert into q1 128 bits (128..255) 5017 // 0x02 - insert into q2 128 bits (256..383) 5018 // 0x03 - insert into q3 128 bits (384..511) 5019 emit_int8(value & 0x3); 5020 } 5021 5022 void Assembler::vinsertf128h(XMMRegister dst, Address src) { 5023 assert(VM_Version::supports_avx(), ""); 5024 int vector_len = AVX_256bit; 5025 if (VM_Version::supports_evex()) { 5026 _tuple_type = EVEX_T4; 5027 _input_size_in_bits = EVEX_32bit; 5028 vector_len = AVX_512bit; 5029 } 5030 InstructionMark im(this); 5031 assert(dst != xnoreg, "sanity"); 5032 int dst_enc = dst->encoding(); 5033 // swap src<->dst for encoding 5034 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); 5035 emit_int8(0x18); 5036 emit_operand(dst, src); 5037 // 0x01 - insert into upper 128 bits 5038 emit_int8(0x01); 5039 } 5040 5041 void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) { 5042 assert(VM_Version::supports_avx(), ""); 5043 int vector_len = AVX_256bit; 5044 if (VM_Version::supports_evex()) { 5045 vector_len = AVX_512bit; 5046 } 5047 int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A); 5048 emit_int8(0x19); 5049 emit_int8((unsigned char)(0xC0 | encode)); 5050 // 0x00 - insert into lower 128 bits 5051 // 0x01 - insert into upper 128 bits 5052 emit_int8(0x01); 5053 } 5054 5055 void Assembler::vextractf128h(Address dst, XMMRegister src) { 5056 assert(VM_Version::supports_avx(), ""); 5057 int vector_len = AVX_256bit; 5058 if (VM_Version::supports_evex()) { 5059 _tuple_type = EVEX_T4; 5060 _input_size_in_bits = EVEX_32bit; 5061 vector_len = AVX_512bit; 5062 } 5063 InstructionMark im(this); 5064 assert(src != xnoreg, "sanity"); 5065 int src_enc = src->encoding(); 5066 vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); 5067 emit_int8(0x19); 5068 emit_operand(src, dst); 5069 // 0x01 - extract from upper 128 bits 5070 emit_int8(0x01); 5071 } 5072 5073 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { 5074 assert(VM_Version::supports_avx2(), ""); 5075 int vector_len = AVX_256bit; 5076 if (VM_Version::supports_evex()) { 5077 vector_len = AVX_512bit; 5078 } 5079 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A); 5080 emit_int8(0x38); 5081 emit_int8((unsigned char)(0xC0 | encode)); 5082 // 0x00 - insert into lower 128 bits 5083 // 0x01 - insert into upper 128 bits 5084 emit_int8(0x01); 5085 } 5086 5087 void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) { 5088 assert(VM_Version::supports_evex(), ""); 5089 int vector_len = AVX_512bit; 5090 int src_enc = src->encoding(); 5091 int dst_enc = dst->encoding(); 5092 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 5093 int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, 5094 /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_reg_mask */ false); 5095 emit_int8(0x38); 5096 emit_int8((unsigned char)(0xC0 | encode)); 5097 // 0x00 - insert into lower 256 bits 5098 // 0x01 - insert into upper 256 bits 5099 emit_int8(0x01); 5100 } 5101 5102 void Assembler::vinserti128h(XMMRegister dst, Address src) { 5103 assert(VM_Version::supports_avx2(), ""); 5104 int vector_len = AVX_256bit; 5105 if (VM_Version::supports_evex()) { 5106 _tuple_type = EVEX_T4; 5107 _input_size_in_bits = EVEX_32bit; 5108 vector_len = AVX_512bit; 5109 } 5110 InstructionMark im(this); 5111 assert(dst != xnoreg, "sanity"); 5112 int dst_enc = dst->encoding(); 5113 // swap src<->dst for encoding 5114 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); 5115 emit_int8(0x38); 5116 emit_operand(dst, src); 5117 // 0x01 - insert into upper 128 bits 5118 emit_int8(0x01); 5119 } 5120 5121 void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) { 5122 assert(VM_Version::supports_avx(), ""); 5123 int vector_len = AVX_256bit; 5124 if (VM_Version::supports_evex()) { 5125 vector_len = AVX_512bit; 5126 } 5127 int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A); 5128 emit_int8(0x39); 5129 emit_int8((unsigned char)(0xC0 | encode)); 5130 // 0x00 - insert into lower 128 bits 5131 // 0x01 - insert into upper 128 bits 5132 emit_int8(0x01); 5133 } 5134 5135 void Assembler::vextracti128h(Address dst, XMMRegister src) { 5136 assert(VM_Version::supports_avx2(), ""); 5137 int vector_len = AVX_256bit; 5138 if (VM_Version::supports_evex()) { 5139 _tuple_type = EVEX_T4; 5140 _input_size_in_bits = EVEX_32bit; 5141 vector_len = AVX_512bit; 5142 } 5143 InstructionMark im(this); 5144 assert(src != xnoreg, "sanity"); 5145 int src_enc = src->encoding(); 5146 vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); 5147 emit_int8(0x39); 5148 emit_operand(src, dst); 5149 // 0x01 - extract from upper 128 bits 5150 emit_int8(0x01); 5151 } 5152 5153 void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src) { 5154 assert(VM_Version::supports_evex(), ""); 5155 int vector_len = AVX_512bit; 5156 int src_enc = src->encoding(); 5157 int dst_enc = dst->encoding(); 5158 int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, 5159 /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); 5160 emit_int8(0x3B); 5161 emit_int8((unsigned char)(0xC0 | encode)); 5162 // 0x01 - extract from upper 256 bits 5163 emit_int8(0x01); 5164 } 5165 5166 void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) { 5167 assert(VM_Version::supports_evex(), ""); 5168 int vector_len = AVX_512bit; 5169 int src_enc = src->encoding(); 5170 int dst_enc = dst->encoding(); 5171 int encode; 5172 if (VM_Version::supports_avx512dq()) { 5173 encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, 5174 /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); 5175 } else { 5176 encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, 5177 /* vex_w */ false, vector_len, /* legacy_mode */ true, /* no_mask_reg */ false); 5178 } 5179 emit_int8(0x39); 5180 emit_int8((unsigned char)(0xC0 | encode)); 5181 // 0x01 - extract from bits 255:128 5182 // 0x02 - extract from bits 383:256 5183 // 0x03 - extract from bits 511:384 5184 emit_int8(value & 0x3); 5185 } 5186 5187 void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src) { 5188 assert(VM_Version::supports_evex(), ""); 5189 int vector_len = AVX_512bit; 5190 int src_enc = src->encoding(); 5191 int dst_enc = dst->encoding(); 5192 int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, 5193 /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); 5194 emit_int8(0x1B); 5195 emit_int8((unsigned char)(0xC0 | encode)); 5196 // 0x01 - extract from upper 256 bits 5197 emit_int8(0x01); 5198 } 5199 5200 void Assembler::vextractf64x4h(Address dst, XMMRegister src) { 5201 assert(VM_Version::supports_evex(), ""); 5202 _tuple_type = EVEX_T4; 5203 _input_size_in_bits = EVEX_64bit; 5204 InstructionMark im(this); 5205 int vector_len = AVX_512bit; 5206 assert(src != xnoreg, "sanity"); 5207 int src_enc = src->encoding(); 5208 vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, 5209 /* vex_w */ true, vector_len); 5210 emit_int8(0x1B); 5211 emit_operand(src, dst); 5212 // 0x01 - extract from upper 256 bits 5213 emit_int8(0x01); 5214 } 5215 5216 void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) { 5217 assert(VM_Version::supports_evex(), ""); 5218 int vector_len = AVX_512bit; 5219 int src_enc = src->encoding(); 5220 int dst_enc = dst->encoding(); 5221 int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, 5222 /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); 5223 emit_int8(0x19); 5224 emit_int8((unsigned char)(0xC0 | encode)); 5225 // 0x00 - extract from bits 127:0 5226 // 0x01 - extract from bits 255:128 5227 // 0x02 - extract from bits 383:256 5228 // 0x03 - extract from bits 511:384 5229 emit_int8(value & 0x3); 5230 } 5231 5232 void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) { 5233 assert(VM_Version::supports_evex(), ""); 5234 _tuple_type = EVEX_T4; 5235 _input_size_in_bits = EVEX_32bit; 5236 InstructionMark im(this); 5237 int vector_len = AVX_512bit; 5238 assert(src != xnoreg, "sanity"); 5239 int src_enc = src->encoding(); 5240 vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len); 5241 emit_int8(0x19); 5242 emit_operand(src, dst); 5243 // 0x00 - extract from bits 127:0 5244 // 0x01 - extract from bits 255:128 5245 // 0x02 - extract from bits 383:256 5246 // 0x03 - extract from bits 511:384 5247 emit_int8(value & 0x3); 5248 } 5249 5250 void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) { 5251 assert(VM_Version::supports_evex(), ""); 5252 int vector_len = AVX_512bit; 5253 int src_enc = src->encoding(); 5254 int dst_enc = dst->encoding(); 5255 int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, 5256 /* vex_w */ !_legacy_mode_dq, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); 5257 emit_int8(0x19); 5258 emit_int8((unsigned char)(0xC0 | encode)); 5259 // 0x01 - extract from bits 255:128 5260 // 0x02 - extract from bits 383:256 5261 // 0x03 - extract from bits 511:384 5262 emit_int8(value & 0x3); 5263 } 5264 5265 // duplicate 4-bytes integer data from src into 8 locations in dest 5266 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) { 5267 _instruction_uses_vl = true; 5268 assert(UseAVX > 1, ""); 5269 int vector_len = AVX_256bit; 5270 int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38); 5271 emit_int8(0x58); 5272 emit_int8((unsigned char)(0xC0 | encode)); 5273 } 5274 5275 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL 5276 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) { 5277 _instruction_uses_vl = true; 5278 assert(UseAVX > 1, ""); 5279 int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38); 5280 emit_int8(0x78); 5281 emit_int8((unsigned char)(0xC0 | encode)); 5282 } 5283 5284 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) { 5285 _instruction_uses_vl = true; 5286 assert(UseAVX > 1, ""); 5287 _tuple_type = EVEX_T1S; 5288 _input_size_in_bits = EVEX_8bit; 5289 InstructionMark im(this); 5290 assert(dst != xnoreg, "sanity"); 5291 int dst_enc = dst->encoding(); 5292 // swap src<->dst for encoding 5293 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len); 5294 emit_int8(0x78); 5295 emit_operand(dst, src); 5296 } 5297 5298 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL 5299 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) { 5300 _instruction_uses_vl = true; 5301 assert(UseAVX > 1, ""); 5302 int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38); 5303 emit_int8(0x79); 5304 emit_int8((unsigned char)(0xC0 | encode)); 5305 } 5306 5307 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) { 5308 _instruction_uses_vl = true; 5309 assert(UseAVX > 1, ""); 5310 _tuple_type = EVEX_T1S; 5311 _input_size_in_bits = EVEX_16bit; 5312 InstructionMark im(this); 5313 assert(dst != xnoreg, "sanity"); 5314 int dst_enc = dst->encoding(); 5315 // swap src<->dst for encoding 5316 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len); 5317 emit_int8(0x79); 5318 emit_operand(dst, src); 5319 } 5320 5321 // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL 5322 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) { 5323 _instruction_uses_vl = true; 5324 assert(UseAVX > 1, ""); 5325 int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38); 5326 emit_int8(0x58); 5327 emit_int8((unsigned char)(0xC0 | encode)); 5328 } 5329 5330 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) { 5331 _instruction_uses_vl = true; 5332 assert(UseAVX > 1, ""); 5333 _tuple_type = EVEX_T1S; 5334 _input_size_in_bits = EVEX_32bit; 5335 InstructionMark im(this); 5336 assert(dst != xnoreg, "sanity"); 5337 int dst_enc = dst->encoding(); 5338 // swap src<->dst for encoding 5339 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len); 5340 emit_int8(0x58); 5341 emit_operand(dst, src); 5342 } 5343 5344 // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL 5345 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) { 5346 _instruction_uses_vl = true; 5347 assert(UseAVX > 1, ""); 5348 int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, 5349 /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false); 5350 emit_int8(0x59); 5351 emit_int8((unsigned char)(0xC0 | encode)); 5352 } 5353 5354 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) { 5355 _instruction_uses_vl = true; 5356 assert(UseAVX > 1, ""); 5357 _tuple_type = EVEX_T1S; 5358 _input_size_in_bits = EVEX_64bit; 5359 InstructionMark im(this); 5360 assert(dst != xnoreg, "sanity"); 5361 int dst_enc = dst->encoding(); 5362 // swap src<->dst for encoding 5363 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len); 5364 emit_int8(0x59); 5365 emit_operand(dst, src); 5366 } 5367 5368 // duplicate single precision fp from src into 4|8|16 locations in dest : requires AVX512VL 5369 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) { 5370 _instruction_uses_vl = true; 5371 assert(UseAVX > 1, ""); 5372 int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, 5373 /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); 5374 emit_int8(0x18); 5375 emit_int8((unsigned char)(0xC0 | encode)); 5376 } 5377 5378 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) { 5379 assert(UseAVX > 1, ""); 5380 _tuple_type = EVEX_T1S; 5381 _input_size_in_bits = EVEX_32bit; 5382 InstructionMark im(this); 5383 assert(dst != xnoreg, "sanity"); 5384 int dst_enc = dst->encoding(); 5385 // swap src<->dst for encoding 5386 vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len); 5387 emit_int8(0x18); 5388 emit_operand(dst, src); 5389 } 5390 5391 // duplicate double precision fp from src into 2|4|8 locations in dest : requires AVX512VL 5392 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) { 5393 _instruction_uses_vl = true; 5394 assert(UseAVX > 1, ""); 5395 int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, 5396 /*vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); 5397 emit_int8(0x19); 5398 emit_int8((unsigned char)(0xC0 | encode)); 5399 } 5400 5401 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) { 5402 _instruction_uses_vl = true; 5403 assert(UseAVX > 1, ""); 5404 _tuple_type = EVEX_T1S; 5405 _input_size_in_bits = EVEX_64bit; 5406 InstructionMark im(this); 5407 assert(dst != xnoreg, "sanity"); 5408 int dst_enc = dst->encoding(); 5409 // swap src<->dst for encoding 5410 vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len); 5411 emit_int8(0x19); 5412 emit_operand(dst, src); 5413 } 5414 5415 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL 5416 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) { 5417 _instruction_uses_vl = true; 5418 assert(VM_Version::supports_evex(), ""); 5419 int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, 5420 /*vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); 5421 emit_int8(0x7A); 5422 emit_int8((unsigned char)(0xC0 | encode)); 5423 } 5424 5425 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL 5426 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) { 5427 _instruction_uses_vl = true; 5428 assert(VM_Version::supports_evex(), ""); 5429 int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, 5430 /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); 5431 emit_int8(0x7B); 5432 emit_int8((unsigned char)(0xC0 | encode)); 5433 } 5434 5435 // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL 5436 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) { 5437 _instruction_uses_vl = true; 5438 assert(VM_Version::supports_evex(), ""); 5439 int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, 5440 /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); 5441 emit_int8(0x7C); 5442 emit_int8((unsigned char)(0xC0 | encode)); 5443 } 5444 5445 // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL 5446 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) { 5447 _instruction_uses_vl = true; 5448 assert(VM_Version::supports_evex(), ""); 5449 int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, 5450 /* vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false); 5451 emit_int8(0x7C); 5452 emit_int8((unsigned char)(0xC0 | encode)); 5453 } 5454 5455 // Carry-Less Multiplication Quadword 5456 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) { 5457 assert(VM_Version::supports_clmul(), ""); 5458 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, 5459 VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true); 5460 emit_int8(0x44); 5461 emit_int8((unsigned char)(0xC0 | encode)); 5462 emit_int8((unsigned char)mask); 5463 } 5464 5465 // Carry-Less Multiplication Quadword 5466 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) { 5467 assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), ""); 5468 int vector_len = AVX_128bit; 5469 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A, /* legacy_mode */ true); 5470 emit_int8(0x44); 5471 emit_int8((unsigned char)(0xC0 | encode)); 5472 emit_int8((unsigned char)mask); 5473 } 5474 5475 void Assembler::vzeroupper() { 5476 assert(VM_Version::supports_avx(), ""); 5477 if (UseAVX < 3) 5478 { 5479 (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE); 5480 emit_int8(0x77); 5481 } 5482 } 5483 5484 5485 #ifndef _LP64 5486 // 32bit only pieces of the assembler 5487 5488 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { 5489 // NO PREFIX AS NEVER 64BIT 5490 InstructionMark im(this); 5491 emit_int8((unsigned char)0x81); 5492 emit_int8((unsigned char)(0xF8 | src1->encoding())); 5493 emit_data(imm32, rspec, 0); 5494 } 5495 5496 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { 5497 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs 5498 InstructionMark im(this); 5499 emit_int8((unsigned char)0x81); 5500 emit_operand(rdi, src1); 5501 emit_data(imm32, rspec, 0); 5502 } 5503 5504 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax, 5505 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded 5506 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. 5507 void Assembler::cmpxchg8(Address adr) { 5508 InstructionMark im(this); 5509 emit_int8(0x0F); 5510 emit_int8((unsigned char)0xC7); 5511 emit_operand(rcx, adr); 5512 } 5513 5514 void Assembler::decl(Register dst) { 5515 // Don't use it directly. Use MacroAssembler::decrementl() instead. 5516 emit_int8(0x48 | dst->encoding()); 5517 } 5518 5519 #endif // _LP64 5520 5521 // 64bit typically doesn't use the x87 but needs to for the trig funcs 5522 5523 void Assembler::fabs() { 5524 emit_int8((unsigned char)0xD9); 5525 emit_int8((unsigned char)0xE1); 5526 } 5527 5528 void Assembler::fadd(int i) { 5529 emit_farith(0xD8, 0xC0, i); 5530 } 5531 5532 void Assembler::fadd_d(Address src) { 5533 InstructionMark im(this); 5534 emit_int8((unsigned char)0xDC); 5535 emit_operand32(rax, src); 5536 } 5537 5538 void Assembler::fadd_s(Address src) { 5539 InstructionMark im(this); 5540 emit_int8((unsigned char)0xD8); 5541 emit_operand32(rax, src); 5542 } 5543 5544 void Assembler::fadda(int i) { 5545 emit_farith(0xDC, 0xC0, i); 5546 } 5547 5548 void Assembler::faddp(int i) { 5549 emit_farith(0xDE, 0xC0, i); 5550 } 5551 5552 void Assembler::fchs() { 5553 emit_int8((unsigned char)0xD9); 5554 emit_int8((unsigned char)0xE0); 5555 } 5556 5557 void Assembler::fcom(int i) { 5558 emit_farith(0xD8, 0xD0, i); 5559 } 5560 5561 void Assembler::fcomp(int i) { 5562 emit_farith(0xD8, 0xD8, i); 5563 } 5564 5565 void Assembler::fcomp_d(Address src) { 5566 InstructionMark im(this); 5567 emit_int8((unsigned char)0xDC); 5568 emit_operand32(rbx, src); 5569 } 5570 5571 void Assembler::fcomp_s(Address src) { 5572 InstructionMark im(this); 5573 emit_int8((unsigned char)0xD8); 5574 emit_operand32(rbx, src); 5575 } 5576 5577 void Assembler::fcompp() { 5578 emit_int8((unsigned char)0xDE); 5579 emit_int8((unsigned char)0xD9); 5580 } 5581 5582 void Assembler::fcos() { 5583 emit_int8((unsigned char)0xD9); 5584 emit_int8((unsigned char)0xFF); 5585 } 5586 5587 void Assembler::fdecstp() { 5588 emit_int8((unsigned char)0xD9); 5589 emit_int8((unsigned char)0xF6); 5590 } 5591 5592 void Assembler::fdiv(int i) { 5593 emit_farith(0xD8, 0xF0, i); 5594 } 5595 5596 void Assembler::fdiv_d(Address src) { 5597 InstructionMark im(this); 5598 emit_int8((unsigned char)0xDC); 5599 emit_operand32(rsi, src); 5600 } 5601 5602 void Assembler::fdiv_s(Address src) { 5603 InstructionMark im(this); 5604 emit_int8((unsigned char)0xD8); 5605 emit_operand32(rsi, src); 5606 } 5607 5608 void Assembler::fdiva(int i) { 5609 emit_farith(0xDC, 0xF8, i); 5610 } 5611 5612 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) 5613 // is erroneous for some of the floating-point instructions below. 5614 5615 void Assembler::fdivp(int i) { 5616 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) 5617 } 5618 5619 void Assembler::fdivr(int i) { 5620 emit_farith(0xD8, 0xF8, i); 5621 } 5622 5623 void Assembler::fdivr_d(Address src) { 5624 InstructionMark im(this); 5625 emit_int8((unsigned char)0xDC); 5626 emit_operand32(rdi, src); 5627 } 5628 5629 void Assembler::fdivr_s(Address src) { 5630 InstructionMark im(this); 5631 emit_int8((unsigned char)0xD8); 5632 emit_operand32(rdi, src); 5633 } 5634 5635 void Assembler::fdivra(int i) { 5636 emit_farith(0xDC, 0xF0, i); 5637 } 5638 5639 void Assembler::fdivrp(int i) { 5640 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) 5641 } 5642 5643 void Assembler::ffree(int i) { 5644 emit_farith(0xDD, 0xC0, i); 5645 } 5646 5647 void Assembler::fild_d(Address adr) { 5648 InstructionMark im(this); 5649 emit_int8((unsigned char)0xDF); 5650 emit_operand32(rbp, adr); 5651 } 5652 5653 void Assembler::fild_s(Address adr) { 5654 InstructionMark im(this); 5655 emit_int8((unsigned char)0xDB); 5656 emit_operand32(rax, adr); 5657 } 5658 5659 void Assembler::fincstp() { 5660 emit_int8((unsigned char)0xD9); 5661 emit_int8((unsigned char)0xF7); 5662 } 5663 5664 void Assembler::finit() { 5665 emit_int8((unsigned char)0x9B); 5666 emit_int8((unsigned char)0xDB); 5667 emit_int8((unsigned char)0xE3); 5668 } 5669 5670 void Assembler::fist_s(Address adr) { 5671 InstructionMark im(this); 5672 emit_int8((unsigned char)0xDB); 5673 emit_operand32(rdx, adr); 5674 } 5675 5676 void Assembler::fistp_d(Address adr) { 5677 InstructionMark im(this); 5678 emit_int8((unsigned char)0xDF); 5679 emit_operand32(rdi, adr); 5680 } 5681 5682 void Assembler::fistp_s(Address adr) { 5683 InstructionMark im(this); 5684 emit_int8((unsigned char)0xDB); 5685 emit_operand32(rbx, adr); 5686 } 5687 5688 void Assembler::fld1() { 5689 emit_int8((unsigned char)0xD9); 5690 emit_int8((unsigned char)0xE8); 5691 } 5692 5693 void Assembler::fld_d(Address adr) { 5694 InstructionMark im(this); 5695 emit_int8((unsigned char)0xDD); 5696 emit_operand32(rax, adr); 5697 } 5698 5699 void Assembler::fld_s(Address adr) { 5700 InstructionMark im(this); 5701 emit_int8((unsigned char)0xD9); 5702 emit_operand32(rax, adr); 5703 } 5704 5705 5706 void Assembler::fld_s(int index) { 5707 emit_farith(0xD9, 0xC0, index); 5708 } 5709 5710 void Assembler::fld_x(Address adr) { 5711 InstructionMark im(this); 5712 emit_int8((unsigned char)0xDB); 5713 emit_operand32(rbp, adr); 5714 } 5715 5716 void Assembler::fldcw(Address src) { 5717 InstructionMark im(this); 5718 emit_int8((unsigned char)0xD9); 5719 emit_operand32(rbp, src); 5720 } 5721 5722 void Assembler::fldenv(Address src) { 5723 InstructionMark im(this); 5724 emit_int8((unsigned char)0xD9); 5725 emit_operand32(rsp, src); 5726 } 5727 5728 void Assembler::fldlg2() { 5729 emit_int8((unsigned char)0xD9); 5730 emit_int8((unsigned char)0xEC); 5731 } 5732 5733 void Assembler::fldln2() { 5734 emit_int8((unsigned char)0xD9); 5735 emit_int8((unsigned char)0xED); 5736 } 5737 5738 void Assembler::fldz() { 5739 emit_int8((unsigned char)0xD9); 5740 emit_int8((unsigned char)0xEE); 5741 } 5742 5743 void Assembler::flog() { 5744 fldln2(); 5745 fxch(); 5746 fyl2x(); 5747 } 5748 5749 void Assembler::flog10() { 5750 fldlg2(); 5751 fxch(); 5752 fyl2x(); 5753 } 5754 5755 void Assembler::fmul(int i) { 5756 emit_farith(0xD8, 0xC8, i); 5757 } 5758 5759 void Assembler::fmul_d(Address src) { 5760 InstructionMark im(this); 5761 emit_int8((unsigned char)0xDC); 5762 emit_operand32(rcx, src); 5763 } 5764 5765 void Assembler::fmul_s(Address src) { 5766 InstructionMark im(this); 5767 emit_int8((unsigned char)0xD8); 5768 emit_operand32(rcx, src); 5769 } 5770 5771 void Assembler::fmula(int i) { 5772 emit_farith(0xDC, 0xC8, i); 5773 } 5774 5775 void Assembler::fmulp(int i) { 5776 emit_farith(0xDE, 0xC8, i); 5777 } 5778 5779 void Assembler::fnsave(Address dst) { 5780 InstructionMark im(this); 5781 emit_int8((unsigned char)0xDD); 5782 emit_operand32(rsi, dst); 5783 } 5784 5785 void Assembler::fnstcw(Address src) { 5786 InstructionMark im(this); 5787 emit_int8((unsigned char)0x9B); 5788 emit_int8((unsigned char)0xD9); 5789 emit_operand32(rdi, src); 5790 } 5791 5792 void Assembler::fnstsw_ax() { 5793 emit_int8((unsigned char)0xDF); 5794 emit_int8((unsigned char)0xE0); 5795 } 5796 5797 void Assembler::fprem() { 5798 emit_int8((unsigned char)0xD9); 5799 emit_int8((unsigned char)0xF8); 5800 } 5801 5802 void Assembler::fprem1() { 5803 emit_int8((unsigned char)0xD9); 5804 emit_int8((unsigned char)0xF5); 5805 } 5806 5807 void Assembler::frstor(Address src) { 5808 InstructionMark im(this); 5809 emit_int8((unsigned char)0xDD); 5810 emit_operand32(rsp, src); 5811 } 5812 5813 void Assembler::fsin() { 5814 emit_int8((unsigned char)0xD9); 5815 emit_int8((unsigned char)0xFE); 5816 } 5817 5818 void Assembler::fsqrt() { 5819 emit_int8((unsigned char)0xD9); 5820 emit_int8((unsigned char)0xFA); 5821 } 5822 5823 void Assembler::fst_d(Address adr) { 5824 InstructionMark im(this); 5825 emit_int8((unsigned char)0xDD); 5826 emit_operand32(rdx, adr); 5827 } 5828 5829 void Assembler::fst_s(Address adr) { 5830 InstructionMark im(this); 5831 emit_int8((unsigned char)0xD9); 5832 emit_operand32(rdx, adr); 5833 } 5834 5835 void Assembler::fstp_d(Address adr) { 5836 InstructionMark im(this); 5837 emit_int8((unsigned char)0xDD); 5838 emit_operand32(rbx, adr); 5839 } 5840 5841 void Assembler::fstp_d(int index) { 5842 emit_farith(0xDD, 0xD8, index); 5843 } 5844 5845 void Assembler::fstp_s(Address adr) { 5846 InstructionMark im(this); 5847 emit_int8((unsigned char)0xD9); 5848 emit_operand32(rbx, adr); 5849 } 5850 5851 void Assembler::fstp_x(Address adr) { 5852 InstructionMark im(this); 5853 emit_int8((unsigned char)0xDB); 5854 emit_operand32(rdi, adr); 5855 } 5856 5857 void Assembler::fsub(int i) { 5858 emit_farith(0xD8, 0xE0, i); 5859 } 5860 5861 void Assembler::fsub_d(Address src) { 5862 InstructionMark im(this); 5863 emit_int8((unsigned char)0xDC); 5864 emit_operand32(rsp, src); 5865 } 5866 5867 void Assembler::fsub_s(Address src) { 5868 InstructionMark im(this); 5869 emit_int8((unsigned char)0xD8); 5870 emit_operand32(rsp, src); 5871 } 5872 5873 void Assembler::fsuba(int i) { 5874 emit_farith(0xDC, 0xE8, i); 5875 } 5876 5877 void Assembler::fsubp(int i) { 5878 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) 5879 } 5880 5881 void Assembler::fsubr(int i) { 5882 emit_farith(0xD8, 0xE8, i); 5883 } 5884 5885 void Assembler::fsubr_d(Address src) { 5886 InstructionMark im(this); 5887 emit_int8((unsigned char)0xDC); 5888 emit_operand32(rbp, src); 5889 } 5890 5891 void Assembler::fsubr_s(Address src) { 5892 InstructionMark im(this); 5893 emit_int8((unsigned char)0xD8); 5894 emit_operand32(rbp, src); 5895 } 5896 5897 void Assembler::fsubra(int i) { 5898 emit_farith(0xDC, 0xE0, i); 5899 } 5900 5901 void Assembler::fsubrp(int i) { 5902 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) 5903 } 5904 5905 void Assembler::ftan() { 5906 emit_int8((unsigned char)0xD9); 5907 emit_int8((unsigned char)0xF2); 5908 emit_int8((unsigned char)0xDD); 5909 emit_int8((unsigned char)0xD8); 5910 } 5911 5912 void Assembler::ftst() { 5913 emit_int8((unsigned char)0xD9); 5914 emit_int8((unsigned char)0xE4); 5915 } 5916 5917 void Assembler::fucomi(int i) { 5918 // make sure the instruction is supported (introduced for P6, together with cmov) 5919 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 5920 emit_farith(0xDB, 0xE8, i); 5921 } 5922 5923 void Assembler::fucomip(int i) { 5924 // make sure the instruction is supported (introduced for P6, together with cmov) 5925 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 5926 emit_farith(0xDF, 0xE8, i); 5927 } 5928 5929 void Assembler::fwait() { 5930 emit_int8((unsigned char)0x9B); 5931 } 5932 5933 void Assembler::fxch(int i) { 5934 emit_farith(0xD9, 0xC8, i); 5935 } 5936 5937 void Assembler::fyl2x() { 5938 emit_int8((unsigned char)0xD9); 5939 emit_int8((unsigned char)0xF1); 5940 } 5941 5942 void Assembler::frndint() { 5943 emit_int8((unsigned char)0xD9); 5944 emit_int8((unsigned char)0xFC); 5945 } 5946 5947 void Assembler::f2xm1() { 5948 emit_int8((unsigned char)0xD9); 5949 emit_int8((unsigned char)0xF0); 5950 } 5951 5952 void Assembler::fldl2e() { 5953 emit_int8((unsigned char)0xD9); 5954 emit_int8((unsigned char)0xEA); 5955 } 5956 5957 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. 5958 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; 5959 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding. 5960 static int simd_opc[4] = { 0, 0, 0x38, 0x3A }; 5961 5962 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding. 5963 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 5964 if (pre > 0) { 5965 emit_int8(simd_pre[pre]); 5966 } 5967 if (rex_w) { 5968 prefixq(adr, xreg); 5969 } else { 5970 prefix(adr, xreg); 5971 } 5972 if (opc > 0) { 5973 emit_int8(0x0F); 5974 int opc2 = simd_opc[opc]; 5975 if (opc2 > 0) { 5976 emit_int8(opc2); 5977 } 5978 } 5979 } 5980 5981 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 5982 if (pre > 0) { 5983 emit_int8(simd_pre[pre]); 5984 } 5985 int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : 5986 prefix_and_encode(dst_enc, src_enc); 5987 if (opc > 0) { 5988 emit_int8(0x0F); 5989 int opc2 = simd_opc[opc]; 5990 if (opc2 > 0) { 5991 emit_int8(opc2); 5992 } 5993 } 5994 return encode; 5995 } 5996 5997 5998 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, int vector_len) { 5999 if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { 6000 prefix(VEX_3bytes); 6001 6002 int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0); 6003 byte1 = (~byte1) & 0xE0; 6004 byte1 |= opc; 6005 emit_int8(byte1); 6006 6007 int byte2 = ((~nds_enc) & 0xf) << 3; 6008 byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre; 6009 emit_int8(byte2); 6010 } else { 6011 prefix(VEX_2bytes); 6012 6013 int byte1 = vex_r ? VEX_R : 0; 6014 byte1 = (~byte1) & 0x80; 6015 byte1 |= ((~nds_enc) & 0xf) << 3; 6016 byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre; 6017 emit_int8(byte1); 6018 } 6019 } 6020 6021 // This is a 4 byte encoding 6022 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v, 6023 int nds_enc, VexSimdPrefix pre, VexOpcode opc, 6024 bool is_extended_context, bool is_merge_context, 6025 int vector_len, bool no_mask_reg ){ 6026 // EVEX 0x62 prefix 6027 prefix(EVEX_4bytes); 6028 _evex_encoding = (vex_w ? VEX_W : 0) | (evex_r ? EVEX_Rb : 0); 6029 6030 // P0: byte 2, initialized to RXBR`00mm 6031 // instead of not'd 6032 int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0); 6033 byte2 = (~byte2) & 0xF0; 6034 // confine opc opcode extensions in mm bits to lower two bits 6035 // of form {0F, 0F_38, 0F_3A} 6036 byte2 |= opc; 6037 emit_int8(byte2); 6038 6039 // P1: byte 3 as Wvvvv1pp 6040 int byte3 = ((~nds_enc) & 0xf) << 3; 6041 // p[10] is always 1 6042 byte3 |= EVEX_F; 6043 byte3 |= (vex_w & 1) << 7; 6044 // confine pre opcode extensions in pp bits to lower two bits 6045 // of form {66, F3, F2} 6046 byte3 |= pre; 6047 emit_int8(byte3); 6048 6049 // P2: byte 4 as zL'Lbv'aaa 6050 int byte4 = (no_mask_reg) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now) 6051 // EVEX.v` for extending EVEX.vvvv or VIDX 6052 byte4 |= (evex_v ? 0: EVEX_V); 6053 // third EXEC.b for broadcast actions 6054 byte4 |= (is_extended_context ? EVEX_Rb : 0); 6055 // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024 6056 byte4 |= ((vector_len) & 0x3) << 5; 6057 // last is EVEX.z for zero/merge actions 6058 byte4 |= (is_merge_context ? EVEX_Z : 0); 6059 emit_int8(byte4); 6060 } 6061 6062 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, 6063 VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) { 6064 bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0; 6065 bool vex_b = adr.base_needs_rex(); 6066 bool vex_x = adr.index_needs_rex(); 6067 _avx_vector_len = vector_len; 6068 6069 // if vector length is turned off, revert to AVX for vectors smaller than 512-bit 6070 if (_legacy_mode_vl && _instruction_uses_vl) { 6071 switch (vector_len) { 6072 case AVX_128bit: 6073 case AVX_256bit: 6074 legacy_mode = true; 6075 break; 6076 } 6077 } 6078 6079 if ((UseAVX > 2) && (legacy_mode == false)) 6080 { 6081 bool evex_r = (xreg_enc >= 16); 6082 bool evex_v = (nds_enc >= 16); 6083 _is_evex_instruction = true; 6084 evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg); 6085 } else { 6086 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len); 6087 } 6088 _instruction_uses_vl = false; 6089 } 6090 6091 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, 6092 bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) { 6093 bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0; 6094 bool vex_b = ((src_enc & 8) == 8) ? 1 : 0; 6095 bool vex_x = false; 6096 _avx_vector_len = vector_len; 6097 6098 // if vector length is turned off, revert to AVX for vectors smaller than 512-bit 6099 if (_legacy_mode_vl && _instruction_uses_vl) { 6100 switch (vector_len) { 6101 case AVX_128bit: 6102 case AVX_256bit: 6103 legacy_mode = true; 6104 break; 6105 } 6106 } 6107 6108 if ((UseAVX > 2) && (legacy_mode == false)) 6109 { 6110 bool evex_r = (dst_enc >= 16); 6111 bool evex_v = (nds_enc >= 16); 6112 // can use vex_x as bank extender on rm encoding 6113 vex_x = (src_enc >= 16); 6114 evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg); 6115 } else { 6116 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len); 6117 } 6118 6119 _instruction_uses_vl = false; 6120 6121 // return modrm byte components for operands 6122 return (((dst_enc & 7) << 3) | (src_enc & 7)); 6123 } 6124 6125 6126 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, 6127 bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) { 6128 if (UseAVX > 0) { 6129 int xreg_enc = xreg->encoding(); 6130 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 6131 vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg); 6132 } else { 6133 assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); 6134 rex_prefix(adr, xreg, pre, opc, rex_w); 6135 } 6136 } 6137 6138 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, 6139 bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) { 6140 int dst_enc = dst->encoding(); 6141 int src_enc = src->encoding(); 6142 if (UseAVX > 0) { 6143 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 6144 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg); 6145 } else { 6146 assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); 6147 return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); 6148 } 6149 } 6150 6151 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre, 6152 bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) { 6153 int dst_enc = dst->encoding(); 6154 int src_enc = src->encoding(); 6155 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 6156 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg); 6157 } 6158 6159 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre, 6160 bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) { 6161 int dst_enc = dst->encoding(); 6162 int src_enc = src->encoding(); 6163 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 6164 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg); 6165 } 6166 6167 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) { 6168 InstructionMark im(this); 6169 simd_prefix(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode); 6170 emit_int8(opcode); 6171 emit_operand(dst, src); 6172 } 6173 6174 void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg) { 6175 InstructionMark im(this); 6176 simd_prefix_q(dst, dst, src, pre, no_mask_reg); 6177 emit_int8(opcode); 6178 emit_operand(dst, src); 6179 } 6180 6181 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) { 6182 int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode); 6183 emit_int8(opcode); 6184 emit_int8((unsigned char)(0xC0 | encode)); 6185 } 6186 6187 void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) { 6188 int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit); 6189 emit_int8(opcode); 6190 emit_int8((unsigned char)(0xC0 | encode)); 6191 } 6192 6193 // Versions with no second source register (non-destructive source). 6194 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) { 6195 InstructionMark im(this); 6196 simd_prefix(dst, xnoreg, src, pre, opNoRegMask); 6197 emit_int8(opcode); 6198 emit_operand(dst, src); 6199 } 6200 6201 void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) { 6202 InstructionMark im(this); 6203 simd_prefix_q(dst, xnoreg, src, pre, opNoRegMask); 6204 emit_int8(opcode); 6205 emit_operand(dst, src); 6206 } 6207 6208 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) { 6209 int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode); 6210 emit_int8(opcode); 6211 emit_int8((unsigned char)(0xC0 | encode)); 6212 } 6213 6214 void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) { 6215 int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, true); 6216 emit_int8(opcode); 6217 emit_int8((unsigned char)(0xC0 | encode)); 6218 } 6219 6220 // 3-operands AVX instructions 6221 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, Address src, 6222 VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) { 6223 InstructionMark im(this); 6224 vex_prefix(dst, nds, src, pre, vector_len, no_mask_reg, legacy_mode); 6225 emit_int8(opcode); 6226 emit_operand(dst, src); 6227 } 6228 6229 void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, 6230 Address src, VexSimdPrefix pre, int vector_len, bool no_mask_reg) { 6231 InstructionMark im(this); 6232 vex_prefix_q(dst, nds, src, pre, vector_len, no_mask_reg); 6233 emit_int8(opcode); 6234 emit_operand(dst, src); 6235 } 6236 6237 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, 6238 VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) { 6239 int encode = vex_prefix_and_encode(dst, nds, src, pre, vector_len, VEX_OPCODE_0F, legacy_mode, no_mask_reg); 6240 emit_int8(opcode); 6241 emit_int8((unsigned char)(0xC0 | encode)); 6242 } 6243 6244 void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, 6245 VexSimdPrefix pre, int vector_len, bool no_mask_reg) { 6246 int src_enc = src->encoding(); 6247 int dst_enc = dst->encoding(); 6248 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 6249 int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg); 6250 emit_int8(opcode); 6251 emit_int8((unsigned char)(0xC0 | encode)); 6252 } 6253 6254 #ifndef _LP64 6255 6256 void Assembler::incl(Register dst) { 6257 // Don't use it directly. Use MacroAssembler::incrementl() instead. 6258 emit_int8(0x40 | dst->encoding()); 6259 } 6260 6261 void Assembler::lea(Register dst, Address src) { 6262 leal(dst, src); 6263 } 6264 6265 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { 6266 InstructionMark im(this); 6267 emit_int8((unsigned char)0xC7); 6268 emit_operand(rax, dst); 6269 emit_data((int)imm32, rspec, 0); 6270 } 6271 6272 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { 6273 InstructionMark im(this); 6274 int encode = prefix_and_encode(dst->encoding()); 6275 emit_int8((unsigned char)(0xB8 | encode)); 6276 emit_data((int)imm32, rspec, 0); 6277 } 6278 6279 void Assembler::popa() { // 32bit 6280 emit_int8(0x61); 6281 } 6282 6283 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { 6284 InstructionMark im(this); 6285 emit_int8(0x68); 6286 emit_data(imm32, rspec, 0); 6287 } 6288 6289 void Assembler::pusha() { // 32bit 6290 emit_int8(0x60); 6291 } 6292 6293 void Assembler::set_byte_if_not_zero(Register dst) { 6294 emit_int8(0x0F); 6295 emit_int8((unsigned char)0x95); 6296 emit_int8((unsigned char)(0xE0 | dst->encoding())); 6297 } 6298 6299 void Assembler::shldl(Register dst, Register src) { 6300 emit_int8(0x0F); 6301 emit_int8((unsigned char)0xA5); 6302 emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding())); 6303 } 6304 6305 // 0F A4 / r ib 6306 void Assembler::shldl(Register dst, Register src, int8_t imm8) { 6307 emit_int8(0x0F); 6308 emit_int8((unsigned char)0xA4); 6309 emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding())); 6310 emit_int8(imm8); 6311 } 6312 6313 void Assembler::shrdl(Register dst, Register src) { 6314 emit_int8(0x0F); 6315 emit_int8((unsigned char)0xAD); 6316 emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding())); 6317 } 6318 6319 #else // LP64 6320 6321 void Assembler::set_byte_if_not_zero(Register dst) { 6322 int enc = prefix_and_encode(dst->encoding(), true); 6323 emit_int8(0x0F); 6324 emit_int8((unsigned char)0x95); 6325 emit_int8((unsigned char)(0xE0 | enc)); 6326 } 6327 6328 // 64bit only pieces of the assembler 6329 // This should only be used by 64bit instructions that can use rip-relative 6330 // it cannot be used by instructions that want an immediate value. 6331 6332 bool Assembler::reachable(AddressLiteral adr) { 6333 int64_t disp; 6334 // None will force a 64bit literal to the code stream. Likely a placeholder 6335 // for something that will be patched later and we need to certain it will 6336 // always be reachable. 6337 if (adr.reloc() == relocInfo::none) { 6338 return false; 6339 } 6340 if (adr.reloc() == relocInfo::internal_word_type) { 6341 // This should be rip relative and easily reachable. 6342 return true; 6343 } 6344 if (adr.reloc() == relocInfo::virtual_call_type || 6345 adr.reloc() == relocInfo::opt_virtual_call_type || 6346 adr.reloc() == relocInfo::static_call_type || 6347 adr.reloc() == relocInfo::static_stub_type ) { 6348 // This should be rip relative within the code cache and easily 6349 // reachable until we get huge code caches. (At which point 6350 // ic code is going to have issues). 6351 return true; 6352 } 6353 if (adr.reloc() != relocInfo::external_word_type && 6354 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special 6355 adr.reloc() != relocInfo::poll_type && // relocs to identify them 6356 adr.reloc() != relocInfo::runtime_call_type ) { 6357 return false; 6358 } 6359 6360 // Stress the correction code 6361 if (ForceUnreachable) { 6362 // Must be runtimecall reloc, see if it is in the codecache 6363 // Flipping stuff in the codecache to be unreachable causes issues 6364 // with things like inline caches where the additional instructions 6365 // are not handled. 6366 if (CodeCache::find_blob(adr._target) == NULL) { 6367 return false; 6368 } 6369 } 6370 // For external_word_type/runtime_call_type if it is reachable from where we 6371 // are now (possibly a temp buffer) and where we might end up 6372 // anywhere in the codeCache then we are always reachable. 6373 // This would have to change if we ever save/restore shared code 6374 // to be more pessimistic. 6375 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int)); 6376 if (!is_simm32(disp)) return false; 6377 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int)); 6378 if (!is_simm32(disp)) return false; 6379 6380 disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int)); 6381 6382 // Because rip relative is a disp + address_of_next_instruction and we 6383 // don't know the value of address_of_next_instruction we apply a fudge factor 6384 // to make sure we will be ok no matter the size of the instruction we get placed into. 6385 // We don't have to fudge the checks above here because they are already worst case. 6386 6387 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal 6388 // + 4 because better safe than sorry. 6389 const int fudge = 12 + 4; 6390 if (disp < 0) { 6391 disp -= fudge; 6392 } else { 6393 disp += fudge; 6394 } 6395 return is_simm32(disp); 6396 } 6397 6398 // Check if the polling page is not reachable from the code cache using rip-relative 6399 // addressing. 6400 bool Assembler::is_polling_page_far() { 6401 intptr_t addr = (intptr_t)os::get_polling_page(); 6402 return ForceUnreachable || 6403 !is_simm32(addr - (intptr_t)CodeCache::low_bound()) || 6404 !is_simm32(addr - (intptr_t)CodeCache::high_bound()); 6405 } 6406 6407 void Assembler::emit_data64(jlong data, 6408 relocInfo::relocType rtype, 6409 int format) { 6410 if (rtype == relocInfo::none) { 6411 emit_int64(data); 6412 } else { 6413 emit_data64(data, Relocation::spec_simple(rtype), format); 6414 } 6415 } 6416 6417 void Assembler::emit_data64(jlong data, 6418 RelocationHolder const& rspec, 6419 int format) { 6420 assert(imm_operand == 0, "default format must be immediate in this file"); 6421 assert(imm_operand == format, "must be immediate"); 6422 assert(inst_mark() != NULL, "must be inside InstructionMark"); 6423 // Do not use AbstractAssembler::relocate, which is not intended for 6424 // embedded words. Instead, relocate to the enclosing instruction. 6425 code_section()->relocate(inst_mark(), rspec, format); 6426 #ifdef ASSERT 6427 check_relocation(rspec, format); 6428 #endif 6429 emit_int64(data); 6430 } 6431 6432 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { 6433 if (reg_enc >= 8) { 6434 prefix(REX_B); 6435 reg_enc -= 8; 6436 } else if (byteinst && reg_enc >= 4) { 6437 prefix(REX); 6438 } 6439 return reg_enc; 6440 } 6441 6442 int Assembler::prefixq_and_encode(int reg_enc) { 6443 if (reg_enc < 8) { 6444 prefix(REX_W); 6445 } else { 6446 prefix(REX_WB); 6447 reg_enc -= 8; 6448 } 6449 return reg_enc; 6450 } 6451 6452 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { 6453 if (dst_enc < 8) { 6454 if (src_enc >= 8) { 6455 prefix(REX_B); 6456 src_enc -= 8; 6457 } else if (byteinst && src_enc >= 4) { 6458 prefix(REX); 6459 } 6460 } else { 6461 if (src_enc < 8) { 6462 prefix(REX_R); 6463 } else { 6464 prefix(REX_RB); 6465 src_enc -= 8; 6466 } 6467 dst_enc -= 8; 6468 } 6469 return dst_enc << 3 | src_enc; 6470 } 6471 6472 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { 6473 if (dst_enc < 8) { 6474 if (src_enc < 8) { 6475 prefix(REX_W); 6476 } else { 6477 prefix(REX_WB); 6478 src_enc -= 8; 6479 } 6480 } else { 6481 if (src_enc < 8) { 6482 prefix(REX_WR); 6483 } else { 6484 prefix(REX_WRB); 6485 src_enc -= 8; 6486 } 6487 dst_enc -= 8; 6488 } 6489 return dst_enc << 3 | src_enc; 6490 } 6491 6492 void Assembler::prefix(Register reg) { 6493 if (reg->encoding() >= 8) { 6494 prefix(REX_B); 6495 } 6496 } 6497 6498 void Assembler::prefix(Register dst, Register src, Prefix p) { 6499 if (src->encoding() >= 8) { 6500 p = (Prefix)(p | REX_B); 6501 } 6502 if (dst->encoding() >= 8) { 6503 p = (Prefix)( p | REX_R); 6504 } 6505 if (p != Prefix_EMPTY) { 6506 // do not generate an empty prefix 6507 prefix(p); 6508 } 6509 } 6510 6511 void Assembler::prefix(Register dst, Address adr, Prefix p) { 6512 if (adr.base_needs_rex()) { 6513 if (adr.index_needs_rex()) { 6514 assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X"); 6515 } else { 6516 prefix(REX_B); 6517 } 6518 } else { 6519 if (adr.index_needs_rex()) { 6520 assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X"); 6521 } 6522 } 6523 if (dst->encoding() >= 8) { 6524 p = (Prefix)(p | REX_R); 6525 } 6526 if (p != Prefix_EMPTY) { 6527 // do not generate an empty prefix 6528 prefix(p); 6529 } 6530 } 6531 6532 void Assembler::prefix(Address adr) { 6533 if (adr.base_needs_rex()) { 6534 if (adr.index_needs_rex()) { 6535 prefix(REX_XB); 6536 } else { 6537 prefix(REX_B); 6538 } 6539 } else { 6540 if (adr.index_needs_rex()) { 6541 prefix(REX_X); 6542 } 6543 } 6544 } 6545 6546 void Assembler::prefixq(Address adr) { 6547 if (adr.base_needs_rex()) { 6548 if (adr.index_needs_rex()) { 6549 prefix(REX_WXB); 6550 } else { 6551 prefix(REX_WB); 6552 } 6553 } else { 6554 if (adr.index_needs_rex()) { 6555 prefix(REX_WX); 6556 } else { 6557 prefix(REX_W); 6558 } 6559 } 6560 } 6561 6562 6563 void Assembler::prefix(Address adr, Register reg, bool byteinst) { 6564 if (reg->encoding() < 8) { 6565 if (adr.base_needs_rex()) { 6566 if (adr.index_needs_rex()) { 6567 prefix(REX_XB); 6568 } else { 6569 prefix(REX_B); 6570 } 6571 } else { 6572 if (adr.index_needs_rex()) { 6573 prefix(REX_X); 6574 } else if (byteinst && reg->encoding() >= 4 ) { 6575 prefix(REX); 6576 } 6577 } 6578 } else { 6579 if (adr.base_needs_rex()) { 6580 if (adr.index_needs_rex()) { 6581 prefix(REX_RXB); 6582 } else { 6583 prefix(REX_RB); 6584 } 6585 } else { 6586 if (adr.index_needs_rex()) { 6587 prefix(REX_RX); 6588 } else { 6589 prefix(REX_R); 6590 } 6591 } 6592 } 6593 } 6594 6595 void Assembler::prefixq(Address adr, Register src) { 6596 if (src->encoding() < 8) { 6597 if (adr.base_needs_rex()) { 6598 if (adr.index_needs_rex()) { 6599 prefix(REX_WXB); 6600 } else { 6601 prefix(REX_WB); 6602 } 6603 } else { 6604 if (adr.index_needs_rex()) { 6605 prefix(REX_WX); 6606 } else { 6607 prefix(REX_W); 6608 } 6609 } 6610 } else { 6611 if (adr.base_needs_rex()) { 6612 if (adr.index_needs_rex()) { 6613 prefix(REX_WRXB); 6614 } else { 6615 prefix(REX_WRB); 6616 } 6617 } else { 6618 if (adr.index_needs_rex()) { 6619 prefix(REX_WRX); 6620 } else { 6621 prefix(REX_WR); 6622 } 6623 } 6624 } 6625 } 6626 6627 void Assembler::prefix(Address adr, XMMRegister reg) { 6628 if (reg->encoding() < 8) { 6629 if (adr.base_needs_rex()) { 6630 if (adr.index_needs_rex()) { 6631 prefix(REX_XB); 6632 } else { 6633 prefix(REX_B); 6634 } 6635 } else { 6636 if (adr.index_needs_rex()) { 6637 prefix(REX_X); 6638 } 6639 } 6640 } else { 6641 if (adr.base_needs_rex()) { 6642 if (adr.index_needs_rex()) { 6643 prefix(REX_RXB); 6644 } else { 6645 prefix(REX_RB); 6646 } 6647 } else { 6648 if (adr.index_needs_rex()) { 6649 prefix(REX_RX); 6650 } else { 6651 prefix(REX_R); 6652 } 6653 } 6654 } 6655 } 6656 6657 void Assembler::prefixq(Address adr, XMMRegister src) { 6658 if (src->encoding() < 8) { 6659 if (adr.base_needs_rex()) { 6660 if (adr.index_needs_rex()) { 6661 prefix(REX_WXB); 6662 } else { 6663 prefix(REX_WB); 6664 } 6665 } else { 6666 if (adr.index_needs_rex()) { 6667 prefix(REX_WX); 6668 } else { 6669 prefix(REX_W); 6670 } 6671 } 6672 } else { 6673 if (adr.base_needs_rex()) { 6674 if (adr.index_needs_rex()) { 6675 prefix(REX_WRXB); 6676 } else { 6677 prefix(REX_WRB); 6678 } 6679 } else { 6680 if (adr.index_needs_rex()) { 6681 prefix(REX_WRX); 6682 } else { 6683 prefix(REX_WR); 6684 } 6685 } 6686 } 6687 } 6688 6689 void Assembler::adcq(Register dst, int32_t imm32) { 6690 (void) prefixq_and_encode(dst->encoding()); 6691 emit_arith(0x81, 0xD0, dst, imm32); 6692 } 6693 6694 void Assembler::adcq(Register dst, Address src) { 6695 InstructionMark im(this); 6696 prefixq(src, dst); 6697 emit_int8(0x13); 6698 emit_operand(dst, src); 6699 } 6700 6701 void Assembler::adcq(Register dst, Register src) { 6702 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 6703 emit_arith(0x13, 0xC0, dst, src); 6704 } 6705 6706 void Assembler::addq(Address dst, int32_t imm32) { 6707 InstructionMark im(this); 6708 prefixq(dst); 6709 emit_arith_operand(0x81, rax, dst,imm32); 6710 } 6711 6712 void Assembler::addq(Address dst, Register src) { 6713 InstructionMark im(this); 6714 prefixq(dst, src); 6715 emit_int8(0x01); 6716 emit_operand(src, dst); 6717 } 6718 6719 void Assembler::addq(Register dst, int32_t imm32) { 6720 (void) prefixq_and_encode(dst->encoding()); 6721 emit_arith(0x81, 0xC0, dst, imm32); 6722 } 6723 6724 void Assembler::addq(Register dst, Address src) { 6725 InstructionMark im(this); 6726 prefixq(src, dst); 6727 emit_int8(0x03); 6728 emit_operand(dst, src); 6729 } 6730 6731 void Assembler::addq(Register dst, Register src) { 6732 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 6733 emit_arith(0x03, 0xC0, dst, src); 6734 } 6735 6736 void Assembler::adcxq(Register dst, Register src) { 6737 //assert(VM_Version::supports_adx(), "adx instructions not supported"); 6738 emit_int8((unsigned char)0x66); 6739 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 6740 emit_int8(0x0F); 6741 emit_int8(0x38); 6742 emit_int8((unsigned char)0xF6); 6743 emit_int8((unsigned char)(0xC0 | encode)); 6744 } 6745 6746 void Assembler::adoxq(Register dst, Register src) { 6747 //assert(VM_Version::supports_adx(), "adx instructions not supported"); 6748 emit_int8((unsigned char)0xF3); 6749 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 6750 emit_int8(0x0F); 6751 emit_int8(0x38); 6752 emit_int8((unsigned char)0xF6); 6753 emit_int8((unsigned char)(0xC0 | encode)); 6754 } 6755 6756 void Assembler::andq(Address dst, int32_t imm32) { 6757 InstructionMark im(this); 6758 prefixq(dst); 6759 emit_int8((unsigned char)0x81); 6760 emit_operand(rsp, dst, 4); 6761 emit_int32(imm32); 6762 } 6763 6764 void Assembler::andq(Register dst, int32_t imm32) { 6765 (void) prefixq_and_encode(dst->encoding()); 6766 emit_arith(0x81, 0xE0, dst, imm32); 6767 } 6768 6769 void Assembler::andq(Register dst, Address src) { 6770 InstructionMark im(this); 6771 prefixq(src, dst); 6772 emit_int8(0x23); 6773 emit_operand(dst, src); 6774 } 6775 6776 void Assembler::andq(Register dst, Register src) { 6777 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 6778 emit_arith(0x23, 0xC0, dst, src); 6779 } 6780 6781 void Assembler::andnq(Register dst, Register src1, Register src2) { 6782 assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 6783 int encode = vex_prefix_0F38_and_encode_q_legacy(dst, src1, src2); 6784 emit_int8((unsigned char)0xF2); 6785 emit_int8((unsigned char)(0xC0 | encode)); 6786 } 6787 6788 void Assembler::andnq(Register dst, Register src1, Address src2) { 6789 InstructionMark im(this); 6790 assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 6791 vex_prefix_0F38_q_legacy(dst, src1, src2); 6792 emit_int8((unsigned char)0xF2); 6793 emit_operand(dst, src2); 6794 } 6795 6796 void Assembler::bsfq(Register dst, Register src) { 6797 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 6798 emit_int8(0x0F); 6799 emit_int8((unsigned char)0xBC); 6800 emit_int8((unsigned char)(0xC0 | encode)); 6801 } 6802 6803 void Assembler::bsrq(Register dst, Register src) { 6804 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 6805 emit_int8(0x0F); 6806 emit_int8((unsigned char)0xBD); 6807 emit_int8((unsigned char)(0xC0 | encode)); 6808 } 6809 6810 void Assembler::bswapq(Register reg) { 6811 int encode = prefixq_and_encode(reg->encoding()); 6812 emit_int8(0x0F); 6813 emit_int8((unsigned char)(0xC8 | encode)); 6814 } 6815 6816 void Assembler::blsiq(Register dst, Register src) { 6817 assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 6818 int encode = vex_prefix_0F38_and_encode_q_legacy(rbx, dst, src); 6819 emit_int8((unsigned char)0xF3); 6820 emit_int8((unsigned char)(0xC0 | encode)); 6821 } 6822 6823 void Assembler::blsiq(Register dst, Address src) { 6824 InstructionMark im(this); 6825 assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 6826 vex_prefix_0F38_q_legacy(rbx, dst, src); 6827 emit_int8((unsigned char)0xF3); 6828 emit_operand(rbx, src); 6829 } 6830 6831 void Assembler::blsmskq(Register dst, Register src) { 6832 assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 6833 int encode = vex_prefix_0F38_and_encode_q_legacy(rdx, dst, src); 6834 emit_int8((unsigned char)0xF3); 6835 emit_int8((unsigned char)(0xC0 | encode)); 6836 } 6837 6838 void Assembler::blsmskq(Register dst, Address src) { 6839 InstructionMark im(this); 6840 assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 6841 vex_prefix_0F38_q_legacy(rdx, dst, src); 6842 emit_int8((unsigned char)0xF3); 6843 emit_operand(rdx, src); 6844 } 6845 6846 void Assembler::blsrq(Register dst, Register src) { 6847 assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 6848 int encode = vex_prefix_0F38_and_encode_q_legacy(rcx, dst, src); 6849 emit_int8((unsigned char)0xF3); 6850 emit_int8((unsigned char)(0xC0 | encode)); 6851 } 6852 6853 void Assembler::blsrq(Register dst, Address src) { 6854 InstructionMark im(this); 6855 assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported"); 6856 vex_prefix_0F38_q_legacy(rcx, dst, src); 6857 emit_int8((unsigned char)0xF3); 6858 emit_operand(rcx, src); 6859 } 6860 6861 void Assembler::cdqq() { 6862 prefix(REX_W); 6863 emit_int8((unsigned char)0x99); 6864 } 6865 6866 void Assembler::clflush(Address adr) { 6867 prefix(adr); 6868 emit_int8(0x0F); 6869 emit_int8((unsigned char)0xAE); 6870 emit_operand(rdi, adr); 6871 } 6872 6873 void Assembler::cmovq(Condition cc, Register dst, Register src) { 6874 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 6875 emit_int8(0x0F); 6876 emit_int8(0x40 | cc); 6877 emit_int8((unsigned char)(0xC0 | encode)); 6878 } 6879 6880 void Assembler::cmovq(Condition cc, Register dst, Address src) { 6881 InstructionMark im(this); 6882 prefixq(src, dst); 6883 emit_int8(0x0F); 6884 emit_int8(0x40 | cc); 6885 emit_operand(dst, src); 6886 } 6887 6888 void Assembler::cmpq(Address dst, int32_t imm32) { 6889 InstructionMark im(this); 6890 prefixq(dst); 6891 emit_int8((unsigned char)0x81); 6892 emit_operand(rdi, dst, 4); 6893 emit_int32(imm32); 6894 } 6895 6896 void Assembler::cmpq(Register dst, int32_t imm32) { 6897 (void) prefixq_and_encode(dst->encoding()); 6898 emit_arith(0x81, 0xF8, dst, imm32); 6899 } 6900 6901 void Assembler::cmpq(Address dst, Register src) { 6902 InstructionMark im(this); 6903 prefixq(dst, src); 6904 emit_int8(0x3B); 6905 emit_operand(src, dst); 6906 } 6907 6908 void Assembler::cmpq(Register dst, Register src) { 6909 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 6910 emit_arith(0x3B, 0xC0, dst, src); 6911 } 6912 6913 void Assembler::cmpq(Register dst, Address src) { 6914 InstructionMark im(this); 6915 prefixq(src, dst); 6916 emit_int8(0x3B); 6917 emit_operand(dst, src); 6918 } 6919 6920 void Assembler::cmpxchgq(Register reg, Address adr) { 6921 InstructionMark im(this); 6922 prefixq(adr, reg); 6923 emit_int8(0x0F); 6924 emit_int8((unsigned char)0xB1); 6925 emit_operand(reg, adr); 6926 } 6927 6928 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { 6929 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 6930 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true); 6931 emit_int8(0x2A); 6932 emit_int8((unsigned char)(0xC0 | encode)); 6933 } 6934 6935 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { 6936 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 6937 if (VM_Version::supports_evex()) { 6938 _tuple_type = EVEX_T1S; 6939 _input_size_in_bits = EVEX_32bit; 6940 } 6941 InstructionMark im(this); 6942 simd_prefix_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true); 6943 emit_int8(0x2A); 6944 emit_operand(dst, src); 6945 } 6946 6947 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { 6948 NOT_LP64(assert(VM_Version::supports_sse(), "")); 6949 if (VM_Version::supports_evex()) { 6950 _tuple_type = EVEX_T1S; 6951 _input_size_in_bits = EVEX_32bit; 6952 } 6953 InstructionMark im(this); 6954 simd_prefix_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true); 6955 emit_int8(0x2A); 6956 emit_operand(dst, src); 6957 } 6958 6959 void Assembler::cvttsd2siq(Register dst, XMMRegister src) { 6960 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 6961 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true); 6962 emit_int8(0x2C); 6963 emit_int8((unsigned char)(0xC0 | encode)); 6964 } 6965 6966 void Assembler::cvttss2siq(Register dst, XMMRegister src) { 6967 NOT_LP64(assert(VM_Version::supports_sse(), "")); 6968 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true); 6969 emit_int8(0x2C); 6970 emit_int8((unsigned char)(0xC0 | encode)); 6971 } 6972 6973 void Assembler::decl(Register dst) { 6974 // Don't use it directly. Use MacroAssembler::decrementl() instead. 6975 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 6976 int encode = prefix_and_encode(dst->encoding()); 6977 emit_int8((unsigned char)0xFF); 6978 emit_int8((unsigned char)(0xC8 | encode)); 6979 } 6980 6981 void Assembler::decq(Register dst) { 6982 // Don't use it directly. Use MacroAssembler::decrementq() instead. 6983 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 6984 int encode = prefixq_and_encode(dst->encoding()); 6985 emit_int8((unsigned char)0xFF); 6986 emit_int8(0xC8 | encode); 6987 } 6988 6989 void Assembler::decq(Address dst) { 6990 // Don't use it directly. Use MacroAssembler::decrementq() instead. 6991 InstructionMark im(this); 6992 prefixq(dst); 6993 emit_int8((unsigned char)0xFF); 6994 emit_operand(rcx, dst); 6995 } 6996 6997 void Assembler::fxrstor(Address src) { 6998 prefixq(src); 6999 emit_int8(0x0F); 7000 emit_int8((unsigned char)0xAE); 7001 emit_operand(as_Register(1), src); 7002 } 7003 7004 void Assembler::xrstor(Address src) { 7005 prefixq(src); 7006 emit_int8(0x0F); 7007 emit_int8((unsigned char)0xAE); 7008 emit_operand(as_Register(5), src); 7009 } 7010 7011 void Assembler::fxsave(Address dst) { 7012 prefixq(dst); 7013 emit_int8(0x0F); 7014 emit_int8((unsigned char)0xAE); 7015 emit_operand(as_Register(0), dst); 7016 } 7017 7018 void Assembler::xsave(Address dst) { 7019 prefixq(dst); 7020 emit_int8(0x0F); 7021 emit_int8((unsigned char)0xAE); 7022 emit_operand(as_Register(4), dst); 7023 } 7024 7025 void Assembler::idivq(Register src) { 7026 int encode = prefixq_and_encode(src->encoding()); 7027 emit_int8((unsigned char)0xF7); 7028 emit_int8((unsigned char)(0xF8 | encode)); 7029 } 7030 7031 void Assembler::imulq(Register dst, Register src) { 7032 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 7033 emit_int8(0x0F); 7034 emit_int8((unsigned char)0xAF); 7035 emit_int8((unsigned char)(0xC0 | encode)); 7036 } 7037 7038 void Assembler::imulq(Register dst, Register src, int value) { 7039 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 7040 if (is8bit(value)) { 7041 emit_int8(0x6B); 7042 emit_int8((unsigned char)(0xC0 | encode)); 7043 emit_int8(value & 0xFF); 7044 } else { 7045 emit_int8(0x69); 7046 emit_int8((unsigned char)(0xC0 | encode)); 7047 emit_int32(value); 7048 } 7049 } 7050 7051 void Assembler::imulq(Register dst, Address src) { 7052 InstructionMark im(this); 7053 prefixq(src, dst); 7054 emit_int8(0x0F); 7055 emit_int8((unsigned char) 0xAF); 7056 emit_operand(dst, src); 7057 } 7058 7059 void Assembler::incl(Register dst) { 7060 // Don't use it directly. Use MacroAssembler::incrementl() instead. 7061 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 7062 int encode = prefix_and_encode(dst->encoding()); 7063 emit_int8((unsigned char)0xFF); 7064 emit_int8((unsigned char)(0xC0 | encode)); 7065 } 7066 7067 void Assembler::incq(Register dst) { 7068 // Don't use it directly. Use MacroAssembler::incrementq() instead. 7069 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 7070 int encode = prefixq_and_encode(dst->encoding()); 7071 emit_int8((unsigned char)0xFF); 7072 emit_int8((unsigned char)(0xC0 | encode)); 7073 } 7074 7075 void Assembler::incq(Address dst) { 7076 // Don't use it directly. Use MacroAssembler::incrementq() instead. 7077 InstructionMark im(this); 7078 prefixq(dst); 7079 emit_int8((unsigned char)0xFF); 7080 emit_operand(rax, dst); 7081 } 7082 7083 void Assembler::lea(Register dst, Address src) { 7084 leaq(dst, src); 7085 } 7086 7087 void Assembler::leaq(Register dst, Address src) { 7088 InstructionMark im(this); 7089 prefixq(src, dst); 7090 emit_int8((unsigned char)0x8D); 7091 emit_operand(dst, src); 7092 } 7093 7094 void Assembler::mov64(Register dst, int64_t imm64) { 7095 InstructionMark im(this); 7096 int encode = prefixq_and_encode(dst->encoding()); 7097 emit_int8((unsigned char)(0xB8 | encode)); 7098 emit_int64(imm64); 7099 } 7100 7101 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { 7102 InstructionMark im(this); 7103 int encode = prefixq_and_encode(dst->encoding()); 7104 emit_int8(0xB8 | encode); 7105 emit_data64(imm64, rspec); 7106 } 7107 7108 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) { 7109 InstructionMark im(this); 7110 int encode = prefix_and_encode(dst->encoding()); 7111 emit_int8((unsigned char)(0xB8 | encode)); 7112 emit_data((int)imm32, rspec, narrow_oop_operand); 7113 } 7114 7115 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) { 7116 InstructionMark im(this); 7117 prefix(dst); 7118 emit_int8((unsigned char)0xC7); 7119 emit_operand(rax, dst, 4); 7120 emit_data((int)imm32, rspec, narrow_oop_operand); 7121 } 7122 7123 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) { 7124 InstructionMark im(this); 7125 int encode = prefix_and_encode(src1->encoding()); 7126 emit_int8((unsigned char)0x81); 7127 emit_int8((unsigned char)(0xF8 | encode)); 7128 emit_data((int)imm32, rspec, narrow_oop_operand); 7129 } 7130 7131 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) { 7132 InstructionMark im(this); 7133 prefix(src1); 7134 emit_int8((unsigned char)0x81); 7135 emit_operand(rax, src1, 4); 7136 emit_data((int)imm32, rspec, narrow_oop_operand); 7137 } 7138 7139 void Assembler::lzcntq(Register dst, Register src) { 7140 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 7141 emit_int8((unsigned char)0xF3); 7142 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 7143 emit_int8(0x0F); 7144 emit_int8((unsigned char)0xBD); 7145 emit_int8((unsigned char)(0xC0 | encode)); 7146 } 7147 7148 void Assembler::movdq(XMMRegister dst, Register src) { 7149 // table D-1 says MMX/SSE2 7150 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 7151 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66, /* no_mask_reg */ true); 7152 emit_int8(0x6E); 7153 emit_int8((unsigned char)(0xC0 | encode)); 7154 } 7155 7156 void Assembler::movdq(Register dst, XMMRegister src) { 7157 // table D-1 says MMX/SSE2 7158 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 7159 // swap src/dst to get correct prefix 7160 int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66, /* no_mask_reg */ true); 7161 emit_int8(0x7E); 7162 emit_int8((unsigned char)(0xC0 | encode)); 7163 } 7164 7165 void Assembler::movq(Register dst, Register src) { 7166 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 7167 emit_int8((unsigned char)0x8B); 7168 emit_int8((unsigned char)(0xC0 | encode)); 7169 } 7170 7171 void Assembler::movq(Register dst, Address src) { 7172 InstructionMark im(this); 7173 prefixq(src, dst); 7174 emit_int8((unsigned char)0x8B); 7175 emit_operand(dst, src); 7176 } 7177 7178 void Assembler::movq(Address dst, Register src) { 7179 InstructionMark im(this); 7180 prefixq(dst, src); 7181 emit_int8((unsigned char)0x89); 7182 emit_operand(src, dst); 7183 } 7184 7185 void Assembler::movsbq(Register dst, Address src) { 7186 InstructionMark im(this); 7187 prefixq(src, dst); 7188 emit_int8(0x0F); 7189 emit_int8((unsigned char)0xBE); 7190 emit_operand(dst, src); 7191 } 7192 7193 void Assembler::movsbq(Register dst, Register src) { 7194 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 7195 emit_int8(0x0F); 7196 emit_int8((unsigned char)0xBE); 7197 emit_int8((unsigned char)(0xC0 | encode)); 7198 } 7199 7200 void Assembler::movslq(Register dst, int32_t imm32) { 7201 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx) 7202 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx) 7203 // as a result we shouldn't use until tested at runtime... 7204 ShouldNotReachHere(); 7205 InstructionMark im(this); 7206 int encode = prefixq_and_encode(dst->encoding()); 7207 emit_int8((unsigned char)(0xC7 | encode)); 7208 emit_int32(imm32); 7209 } 7210 7211 void Assembler::movslq(Address dst, int32_t imm32) { 7212 assert(is_simm32(imm32), "lost bits"); 7213 InstructionMark im(this); 7214 prefixq(dst); 7215 emit_int8((unsigned char)0xC7); 7216 emit_operand(rax, dst, 4); 7217 emit_int32(imm32); 7218 } 7219 7220 void Assembler::movslq(Register dst, Address src) { 7221 InstructionMark im(this); 7222 prefixq(src, dst); 7223 emit_int8(0x63); 7224 emit_operand(dst, src); 7225 } 7226 7227 void Assembler::movslq(Register dst, Register src) { 7228 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 7229 emit_int8(0x63); 7230 emit_int8((unsigned char)(0xC0 | encode)); 7231 } 7232 7233 void Assembler::movswq(Register dst, Address src) { 7234 InstructionMark im(this); 7235 prefixq(src, dst); 7236 emit_int8(0x0F); 7237 emit_int8((unsigned char)0xBF); 7238 emit_operand(dst, src); 7239 } 7240 7241 void Assembler::movswq(Register dst, Register src) { 7242 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 7243 emit_int8((unsigned char)0x0F); 7244 emit_int8((unsigned char)0xBF); 7245 emit_int8((unsigned char)(0xC0 | encode)); 7246 } 7247 7248 void Assembler::movzbq(Register dst, Address src) { 7249 InstructionMark im(this); 7250 prefixq(src, dst); 7251 emit_int8((unsigned char)0x0F); 7252 emit_int8((unsigned char)0xB6); 7253 emit_operand(dst, src); 7254 } 7255 7256 void Assembler::movzbq(Register dst, Register src) { 7257 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 7258 emit_int8(0x0F); 7259 emit_int8((unsigned char)0xB6); 7260 emit_int8(0xC0 | encode); 7261 } 7262 7263 void Assembler::movzwq(Register dst, Address src) { 7264 InstructionMark im(this); 7265 prefixq(src, dst); 7266 emit_int8((unsigned char)0x0F); 7267 emit_int8((unsigned char)0xB7); 7268 emit_operand(dst, src); 7269 } 7270 7271 void Assembler::movzwq(Register dst, Register src) { 7272 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 7273 emit_int8((unsigned char)0x0F); 7274 emit_int8((unsigned char)0xB7); 7275 emit_int8((unsigned char)(0xC0 | encode)); 7276 } 7277 7278 void Assembler::mulq(Address src) { 7279 InstructionMark im(this); 7280 prefixq(src); 7281 emit_int8((unsigned char)0xF7); 7282 emit_operand(rsp, src); 7283 } 7284 7285 void Assembler::mulq(Register src) { 7286 int encode = prefixq_and_encode(src->encoding()); 7287 emit_int8((unsigned char)0xF7); 7288 emit_int8((unsigned char)(0xE0 | encode)); 7289 } 7290 7291 void Assembler::mulxq(Register dst1, Register dst2, Register src) { 7292 assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported"); 7293 int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, 7294 /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false); 7295 emit_int8((unsigned char)0xF6); 7296 emit_int8((unsigned char)(0xC0 | encode)); 7297 } 7298 7299 void Assembler::negq(Register dst) { 7300 int encode = prefixq_and_encode(dst->encoding()); 7301 emit_int8((unsigned char)0xF7); 7302 emit_int8((unsigned char)(0xD8 | encode)); 7303 } 7304 7305 void Assembler::notq(Register dst) { 7306 int encode = prefixq_and_encode(dst->encoding()); 7307 emit_int8((unsigned char)0xF7); 7308 emit_int8((unsigned char)(0xD0 | encode)); 7309 } 7310 7311 void Assembler::orq(Address dst, int32_t imm32) { 7312 InstructionMark im(this); 7313 prefixq(dst); 7314 emit_int8((unsigned char)0x81); 7315 emit_operand(rcx, dst, 4); 7316 emit_int32(imm32); 7317 } 7318 7319 void Assembler::orq(Register dst, int32_t imm32) { 7320 (void) prefixq_and_encode(dst->encoding()); 7321 emit_arith(0x81, 0xC8, dst, imm32); 7322 } 7323 7324 void Assembler::orq(Register dst, Address src) { 7325 InstructionMark im(this); 7326 prefixq(src, dst); 7327 emit_int8(0x0B); 7328 emit_operand(dst, src); 7329 } 7330 7331 void Assembler::orq(Register dst, Register src) { 7332 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 7333 emit_arith(0x0B, 0xC0, dst, src); 7334 } 7335 7336 void Assembler::popa() { // 64bit 7337 movq(r15, Address(rsp, 0)); 7338 movq(r14, Address(rsp, wordSize)); 7339 movq(r13, Address(rsp, 2 * wordSize)); 7340 movq(r12, Address(rsp, 3 * wordSize)); 7341 movq(r11, Address(rsp, 4 * wordSize)); 7342 movq(r10, Address(rsp, 5 * wordSize)); 7343 movq(r9, Address(rsp, 6 * wordSize)); 7344 movq(r8, Address(rsp, 7 * wordSize)); 7345 movq(rdi, Address(rsp, 8 * wordSize)); 7346 movq(rsi, Address(rsp, 9 * wordSize)); 7347 movq(rbp, Address(rsp, 10 * wordSize)); 7348 // skip rsp 7349 movq(rbx, Address(rsp, 12 * wordSize)); 7350 movq(rdx, Address(rsp, 13 * wordSize)); 7351 movq(rcx, Address(rsp, 14 * wordSize)); 7352 movq(rax, Address(rsp, 15 * wordSize)); 7353 7354 addq(rsp, 16 * wordSize); 7355 } 7356 7357 void Assembler::popcntq(Register dst, Address src) { 7358 assert(VM_Version::supports_popcnt(), "must support"); 7359 InstructionMark im(this); 7360 emit_int8((unsigned char)0xF3); 7361 prefixq(src, dst); 7362 emit_int8((unsigned char)0x0F); 7363 emit_int8((unsigned char)0xB8); 7364 emit_operand(dst, src); 7365 } 7366 7367 void Assembler::popcntq(Register dst, Register src) { 7368 assert(VM_Version::supports_popcnt(), "must support"); 7369 emit_int8((unsigned char)0xF3); 7370 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 7371 emit_int8((unsigned char)0x0F); 7372 emit_int8((unsigned char)0xB8); 7373 emit_int8((unsigned char)(0xC0 | encode)); 7374 } 7375 7376 void Assembler::popq(Address dst) { 7377 InstructionMark im(this); 7378 prefixq(dst); 7379 emit_int8((unsigned char)0x8F); 7380 emit_operand(rax, dst); 7381 } 7382 7383 void Assembler::pusha() { // 64bit 7384 // we have to store original rsp. ABI says that 128 bytes 7385 // below rsp are local scratch. 7386 movq(Address(rsp, -5 * wordSize), rsp); 7387 7388 subq(rsp, 16 * wordSize); 7389 7390 movq(Address(rsp, 15 * wordSize), rax); 7391 movq(Address(rsp, 14 * wordSize), rcx); 7392 movq(Address(rsp, 13 * wordSize), rdx); 7393 movq(Address(rsp, 12 * wordSize), rbx); 7394 // skip rsp 7395 movq(Address(rsp, 10 * wordSize), rbp); 7396 movq(Address(rsp, 9 * wordSize), rsi); 7397 movq(Address(rsp, 8 * wordSize), rdi); 7398 movq(Address(rsp, 7 * wordSize), r8); 7399 movq(Address(rsp, 6 * wordSize), r9); 7400 movq(Address(rsp, 5 * wordSize), r10); 7401 movq(Address(rsp, 4 * wordSize), r11); 7402 movq(Address(rsp, 3 * wordSize), r12); 7403 movq(Address(rsp, 2 * wordSize), r13); 7404 movq(Address(rsp, wordSize), r14); 7405 movq(Address(rsp, 0), r15); 7406 } 7407 7408 void Assembler::pushq(Address src) { 7409 InstructionMark im(this); 7410 prefixq(src); 7411 emit_int8((unsigned char)0xFF); 7412 emit_operand(rsi, src); 7413 } 7414 7415 void Assembler::rclq(Register dst, int imm8) { 7416 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 7417 int encode = prefixq_and_encode(dst->encoding()); 7418 if (imm8 == 1) { 7419 emit_int8((unsigned char)0xD1); 7420 emit_int8((unsigned char)(0xD0 | encode)); 7421 } else { 7422 emit_int8((unsigned char)0xC1); 7423 emit_int8((unsigned char)(0xD0 | encode)); 7424 emit_int8(imm8); 7425 } 7426 } 7427 7428 void Assembler::rcrq(Register dst, int imm8) { 7429 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 7430 int encode = prefixq_and_encode(dst->encoding()); 7431 if (imm8 == 1) { 7432 emit_int8((unsigned char)0xD1); 7433 emit_int8((unsigned char)(0xD8 | encode)); 7434 } else { 7435 emit_int8((unsigned char)0xC1); 7436 emit_int8((unsigned char)(0xD8 | encode)); 7437 emit_int8(imm8); 7438 } 7439 } 7440 7441 void Assembler::rorq(Register dst, int imm8) { 7442 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 7443 int encode = prefixq_and_encode(dst->encoding()); 7444 if (imm8 == 1) { 7445 emit_int8((unsigned char)0xD1); 7446 emit_int8((unsigned char)(0xC8 | encode)); 7447 } else { 7448 emit_int8((unsigned char)0xC1); 7449 emit_int8((unsigned char)(0xc8 | encode)); 7450 emit_int8(imm8); 7451 } 7452 } 7453 7454 void Assembler::rorxq(Register dst, Register src, int imm8) { 7455 assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported"); 7456 int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, 7457 /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false); 7458 emit_int8((unsigned char)0xF0); 7459 emit_int8((unsigned char)(0xC0 | encode)); 7460 emit_int8(imm8); 7461 } 7462 7463 void Assembler::sarq(Register dst, int imm8) { 7464 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 7465 int encode = prefixq_and_encode(dst->encoding()); 7466 if (imm8 == 1) { 7467 emit_int8((unsigned char)0xD1); 7468 emit_int8((unsigned char)(0xF8 | encode)); 7469 } else { 7470 emit_int8((unsigned char)0xC1); 7471 emit_int8((unsigned char)(0xF8 | encode)); 7472 emit_int8(imm8); 7473 } 7474 } 7475 7476 void Assembler::sarq(Register dst) { 7477 int encode = prefixq_and_encode(dst->encoding()); 7478 emit_int8((unsigned char)0xD3); 7479 emit_int8((unsigned char)(0xF8 | encode)); 7480 } 7481 7482 void Assembler::sbbq(Address dst, int32_t imm32) { 7483 InstructionMark im(this); 7484 prefixq(dst); 7485 emit_arith_operand(0x81, rbx, dst, imm32); 7486 } 7487 7488 void Assembler::sbbq(Register dst, int32_t imm32) { 7489 (void) prefixq_and_encode(dst->encoding()); 7490 emit_arith(0x81, 0xD8, dst, imm32); 7491 } 7492 7493 void Assembler::sbbq(Register dst, Address src) { 7494 InstructionMark im(this); 7495 prefixq(src, dst); 7496 emit_int8(0x1B); 7497 emit_operand(dst, src); 7498 } 7499 7500 void Assembler::sbbq(Register dst, Register src) { 7501 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 7502 emit_arith(0x1B, 0xC0, dst, src); 7503 } 7504 7505 void Assembler::shlq(Register dst, int imm8) { 7506 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 7507 int encode = prefixq_and_encode(dst->encoding()); 7508 if (imm8 == 1) { 7509 emit_int8((unsigned char)0xD1); 7510 emit_int8((unsigned char)(0xE0 | encode)); 7511 } else { 7512 emit_int8((unsigned char)0xC1); 7513 emit_int8((unsigned char)(0xE0 | encode)); 7514 emit_int8(imm8); 7515 } 7516 } 7517 7518 void Assembler::shlq(Register dst) { 7519 int encode = prefixq_and_encode(dst->encoding()); 7520 emit_int8((unsigned char)0xD3); 7521 emit_int8((unsigned char)(0xE0 | encode)); 7522 } 7523 7524 void Assembler::shrq(Register dst, int imm8) { 7525 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 7526 int encode = prefixq_and_encode(dst->encoding()); 7527 emit_int8((unsigned char)0xC1); 7528 emit_int8((unsigned char)(0xE8 | encode)); 7529 emit_int8(imm8); 7530 } 7531 7532 void Assembler::shrq(Register dst) { 7533 int encode = prefixq_and_encode(dst->encoding()); 7534 emit_int8((unsigned char)0xD3); 7535 emit_int8(0xE8 | encode); 7536 } 7537 7538 void Assembler::subq(Address dst, int32_t imm32) { 7539 InstructionMark im(this); 7540 prefixq(dst); 7541 emit_arith_operand(0x81, rbp, dst, imm32); 7542 } 7543 7544 void Assembler::subq(Address dst, Register src) { 7545 InstructionMark im(this); 7546 prefixq(dst, src); 7547 emit_int8(0x29); 7548 emit_operand(src, dst); 7549 } 7550 7551 void Assembler::subq(Register dst, int32_t imm32) { 7552 (void) prefixq_and_encode(dst->encoding()); 7553 emit_arith(0x81, 0xE8, dst, imm32); 7554 } 7555 7556 // Force generation of a 4 byte immediate value even if it fits into 8bit 7557 void Assembler::subq_imm32(Register dst, int32_t imm32) { 7558 (void) prefixq_and_encode(dst->encoding()); 7559 emit_arith_imm32(0x81, 0xE8, dst, imm32); 7560 } 7561 7562 void Assembler::subq(Register dst, Address src) { 7563 InstructionMark im(this); 7564 prefixq(src, dst); 7565 emit_int8(0x2B); 7566 emit_operand(dst, src); 7567 } 7568 7569 void Assembler::subq(Register dst, Register src) { 7570 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 7571 emit_arith(0x2B, 0xC0, dst, src); 7572 } 7573 7574 void Assembler::testq(Register dst, int32_t imm32) { 7575 // not using emit_arith because test 7576 // doesn't support sign-extension of 7577 // 8bit operands 7578 int encode = dst->encoding(); 7579 if (encode == 0) { 7580 prefix(REX_W); 7581 emit_int8((unsigned char)0xA9); 7582 } else { 7583 encode = prefixq_and_encode(encode); 7584 emit_int8((unsigned char)0xF7); 7585 emit_int8((unsigned char)(0xC0 | encode)); 7586 } 7587 emit_int32(imm32); 7588 } 7589 7590 void Assembler::testq(Register dst, Register src) { 7591 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 7592 emit_arith(0x85, 0xC0, dst, src); 7593 } 7594 7595 void Assembler::xaddq(Address dst, Register src) { 7596 InstructionMark im(this); 7597 prefixq(dst, src); 7598 emit_int8(0x0F); 7599 emit_int8((unsigned char)0xC1); 7600 emit_operand(src, dst); 7601 } 7602 7603 void Assembler::xchgq(Register dst, Address src) { 7604 InstructionMark im(this); 7605 prefixq(src, dst); 7606 emit_int8((unsigned char)0x87); 7607 emit_operand(dst, src); 7608 } 7609 7610 void Assembler::xchgq(Register dst, Register src) { 7611 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 7612 emit_int8((unsigned char)0x87); 7613 emit_int8((unsigned char)(0xc0 | encode)); 7614 } 7615 7616 void Assembler::xorq(Register dst, Register src) { 7617 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 7618 emit_arith(0x33, 0xC0, dst, src); 7619 } 7620 7621 void Assembler::xorq(Register dst, Address src) { 7622 InstructionMark im(this); 7623 prefixq(src, dst); 7624 emit_int8(0x33); 7625 emit_operand(dst, src); 7626 } 7627 7628 #endif // !LP64