1 /* 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "assembler_x86.inline.hpp" 27 #include "gc_interface/collectedHeap.inline.hpp" 28 #include "interpreter/interpreter.hpp" 29 #include "memory/cardTableModRefBS.hpp" 30 #include "memory/resourceArea.hpp" 31 #include "prims/methodHandles.hpp" 32 #include "runtime/biasedLocking.hpp" 33 #include "runtime/interfaceSupport.hpp" 34 #include "runtime/objectMonitor.hpp" 35 #include "runtime/os.hpp" 36 #include "runtime/sharedRuntime.hpp" 37 #include "runtime/stubRoutines.hpp" 38 #ifndef SERIALGC 39 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 40 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" 41 #include "gc_implementation/g1/heapRegion.hpp" 42 #endif 43 44 #ifdef PRODUCT 45 #define BLOCK_COMMENT(str) /* nothing */ 46 #define STOP(error) stop(error) 47 #else 48 #define BLOCK_COMMENT(str) block_comment(str) 49 #define STOP(error) block_comment(error); stop(error) 50 #endif 51 52 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 53 // Implementation of AddressLiteral 54 55 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { 56 _is_lval = false; 57 _target = target; 58 switch (rtype) { 59 case relocInfo::oop_type: 60 case relocInfo::metadata_type: 61 // Oops are a special case. Normally they would be their own section 62 // but in cases like icBuffer they are literals in the code stream that 63 // we don't have a section for. We use none so that we get a literal address 64 // which is always patchable. 65 break; 66 case relocInfo::external_word_type: 67 _rspec = external_word_Relocation::spec(target); 68 break; 69 case relocInfo::internal_word_type: 70 _rspec = internal_word_Relocation::spec(target); 71 break; 72 case relocInfo::opt_virtual_call_type: 73 _rspec = opt_virtual_call_Relocation::spec(); 74 break; 75 case relocInfo::static_call_type: 76 _rspec = static_call_Relocation::spec(); 77 break; 78 case relocInfo::runtime_call_type: 79 _rspec = runtime_call_Relocation::spec(); 80 break; 81 case relocInfo::poll_type: 82 case relocInfo::poll_return_type: 83 _rspec = Relocation::spec_simple(rtype); 84 break; 85 case relocInfo::none: 86 break; 87 default: 88 ShouldNotReachHere(); 89 break; 90 } 91 } 92 93 // Implementation of Address 94 95 #ifdef _LP64 96 97 Address Address::make_array(ArrayAddress adr) { 98 // Not implementable on 64bit machines 99 // Should have been handled higher up the call chain. 100 ShouldNotReachHere(); 101 return Address(); 102 } 103 104 // exceedingly dangerous constructor 105 Address::Address(int disp, address loc, relocInfo::relocType rtype) { 106 _base = noreg; 107 _index = noreg; 108 _scale = no_scale; 109 _disp = disp; 110 switch (rtype) { 111 case relocInfo::external_word_type: 112 _rspec = external_word_Relocation::spec(loc); 113 break; 114 case relocInfo::internal_word_type: 115 _rspec = internal_word_Relocation::spec(loc); 116 break; 117 case relocInfo::runtime_call_type: 118 // HMM 119 _rspec = runtime_call_Relocation::spec(); 120 break; 121 case relocInfo::poll_type: 122 case relocInfo::poll_return_type: 123 _rspec = Relocation::spec_simple(rtype); 124 break; 125 case relocInfo::none: 126 break; 127 default: 128 ShouldNotReachHere(); 129 } 130 } 131 #else // LP64 132 133 Address Address::make_array(ArrayAddress adr) { 134 AddressLiteral base = adr.base(); 135 Address index = adr.index(); 136 assert(index._disp == 0, "must not have disp"); // maybe it can? 137 Address array(index._base, index._index, index._scale, (intptr_t) base.target()); 138 array._rspec = base._rspec; 139 return array; 140 } 141 142 // exceedingly dangerous constructor 143 Address::Address(address loc, RelocationHolder spec) { 144 _base = noreg; 145 _index = noreg; 146 _scale = no_scale; 147 _disp = (intptr_t) loc; 148 _rspec = spec; 149 } 150 151 #endif // _LP64 152 153 154 155 // Convert the raw encoding form into the form expected by the constructor for 156 // Address. An index of 4 (rsp) corresponds to having no index, so convert 157 // that to noreg for the Address constructor. 158 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) { 159 RelocationHolder rspec; 160 if (disp_reloc != relocInfo::none) { 161 rspec = Relocation::spec_simple(disp_reloc); 162 } 163 bool valid_index = index != rsp->encoding(); 164 if (valid_index) { 165 Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); 166 madr._rspec = rspec; 167 return madr; 168 } else { 169 Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); 170 madr._rspec = rspec; 171 return madr; 172 } 173 } 174 175 // Implementation of Assembler 176 177 int AbstractAssembler::code_fill_byte() { 178 return (u_char)'\xF4'; // hlt 179 } 180 181 // make this go away someday 182 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) { 183 if (rtype == relocInfo::none) 184 emit_long(data); 185 else emit_data(data, Relocation::spec_simple(rtype), format); 186 } 187 188 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) { 189 assert(imm_operand == 0, "default format must be immediate in this file"); 190 assert(inst_mark() != NULL, "must be inside InstructionMark"); 191 if (rspec.type() != relocInfo::none) { 192 #ifdef ASSERT 193 check_relocation(rspec, format); 194 #endif 195 // Do not use AbstractAssembler::relocate, which is not intended for 196 // embedded words. Instead, relocate to the enclosing instruction. 197 198 // hack. call32 is too wide for mask so use disp32 199 if (format == call32_operand) 200 code_section()->relocate(inst_mark(), rspec, disp32_operand); 201 else 202 code_section()->relocate(inst_mark(), rspec, format); 203 } 204 emit_long(data); 205 } 206 207 static int encode(Register r) { 208 int enc = r->encoding(); 209 if (enc >= 8) { 210 enc -= 8; 211 } 212 return enc; 213 } 214 215 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) { 216 assert(dst->has_byte_register(), "must have byte register"); 217 assert(isByte(op1) && isByte(op2), "wrong opcode"); 218 assert(isByte(imm8), "not a byte"); 219 assert((op1 & 0x01) == 0, "should be 8bit operation"); 220 emit_byte(op1); 221 emit_byte(op2 | encode(dst)); 222 emit_byte(imm8); 223 } 224 225 226 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 227 assert(isByte(op1) && isByte(op2), "wrong opcode"); 228 assert((op1 & 0x01) == 1, "should be 32bit operation"); 229 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 230 if (is8bit(imm32)) { 231 emit_byte(op1 | 0x02); // set sign bit 232 emit_byte(op2 | encode(dst)); 233 emit_byte(imm32 & 0xFF); 234 } else { 235 emit_byte(op1); 236 emit_byte(op2 | encode(dst)); 237 emit_long(imm32); 238 } 239 } 240 241 // Force generation of a 4 byte immediate value even if it fits into 8bit 242 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) { 243 assert(isByte(op1) && isByte(op2), "wrong opcode"); 244 assert((op1 & 0x01) == 1, "should be 32bit operation"); 245 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 246 emit_byte(op1); 247 emit_byte(op2 | encode(dst)); 248 emit_long(imm32); 249 } 250 251 // immediate-to-memory forms 252 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 253 assert((op1 & 0x01) == 1, "should be 32bit operation"); 254 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 255 if (is8bit(imm32)) { 256 emit_byte(op1 | 0x02); // set sign bit 257 emit_operand(rm, adr, 1); 258 emit_byte(imm32 & 0xFF); 259 } else { 260 emit_byte(op1); 261 emit_operand(rm, adr, 4); 262 emit_long(imm32); 263 } 264 } 265 266 267 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) { 268 assert(isByte(op1) && isByte(op2), "wrong opcode"); 269 emit_byte(op1); 270 emit_byte(op2 | encode(dst) << 3 | encode(src)); 271 } 272 273 274 void Assembler::emit_operand(Register reg, Register base, Register index, 275 Address::ScaleFactor scale, int disp, 276 RelocationHolder const& rspec, 277 int rip_relative_correction) { 278 relocInfo::relocType rtype = (relocInfo::relocType) rspec.type(); 279 280 // Encode the registers as needed in the fields they are used in 281 282 int regenc = encode(reg) << 3; 283 int indexenc = index->is_valid() ? encode(index) << 3 : 0; 284 int baseenc = base->is_valid() ? encode(base) : 0; 285 286 if (base->is_valid()) { 287 if (index->is_valid()) { 288 assert(scale != Address::no_scale, "inconsistent address"); 289 // [base + index*scale + disp] 290 if (disp == 0 && rtype == relocInfo::none && 291 base != rbp LP64_ONLY(&& base != r13)) { 292 // [base + index*scale] 293 // [00 reg 100][ss index base] 294 assert(index != rsp, "illegal addressing mode"); 295 emit_byte(0x04 | regenc); 296 emit_byte(scale << 6 | indexenc | baseenc); 297 } else if (is8bit(disp) && rtype == relocInfo::none) { 298 // [base + index*scale + imm8] 299 // [01 reg 100][ss index base] imm8 300 assert(index != rsp, "illegal addressing mode"); 301 emit_byte(0x44 | regenc); 302 emit_byte(scale << 6 | indexenc | baseenc); 303 emit_byte(disp & 0xFF); 304 } else { 305 // [base + index*scale + disp32] 306 // [10 reg 100][ss index base] disp32 307 assert(index != rsp, "illegal addressing mode"); 308 emit_byte(0x84 | regenc); 309 emit_byte(scale << 6 | indexenc | baseenc); 310 emit_data(disp, rspec, disp32_operand); 311 } 312 } else if (base == rsp LP64_ONLY(|| base == r12)) { 313 // [rsp + disp] 314 if (disp == 0 && rtype == relocInfo::none) { 315 // [rsp] 316 // [00 reg 100][00 100 100] 317 emit_byte(0x04 | regenc); 318 emit_byte(0x24); 319 } else if (is8bit(disp) && rtype == relocInfo::none) { 320 // [rsp + imm8] 321 // [01 reg 100][00 100 100] disp8 322 emit_byte(0x44 | regenc); 323 emit_byte(0x24); 324 emit_byte(disp & 0xFF); 325 } else { 326 // [rsp + imm32] 327 // [10 reg 100][00 100 100] disp32 328 emit_byte(0x84 | regenc); 329 emit_byte(0x24); 330 emit_data(disp, rspec, disp32_operand); 331 } 332 } else { 333 // [base + disp] 334 assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode"); 335 if (disp == 0 && rtype == relocInfo::none && 336 base != rbp LP64_ONLY(&& base != r13)) { 337 // [base] 338 // [00 reg base] 339 emit_byte(0x00 | regenc | baseenc); 340 } else if (is8bit(disp) && rtype == relocInfo::none) { 341 // [base + disp8] 342 // [01 reg base] disp8 343 emit_byte(0x40 | regenc | baseenc); 344 emit_byte(disp & 0xFF); 345 } else { 346 // [base + disp32] 347 // [10 reg base] disp32 348 emit_byte(0x80 | regenc | baseenc); 349 emit_data(disp, rspec, disp32_operand); 350 } 351 } 352 } else { 353 if (index->is_valid()) { 354 assert(scale != Address::no_scale, "inconsistent address"); 355 // [index*scale + disp] 356 // [00 reg 100][ss index 101] disp32 357 assert(index != rsp, "illegal addressing mode"); 358 emit_byte(0x04 | regenc); 359 emit_byte(scale << 6 | indexenc | 0x05); 360 emit_data(disp, rspec, disp32_operand); 361 } else if (rtype != relocInfo::none ) { 362 // [disp] (64bit) RIP-RELATIVE (32bit) abs 363 // [00 000 101] disp32 364 365 emit_byte(0x05 | regenc); 366 // Note that the RIP-rel. correction applies to the generated 367 // disp field, but _not_ to the target address in the rspec. 368 369 // disp was created by converting the target address minus the pc 370 // at the start of the instruction. That needs more correction here. 371 // intptr_t disp = target - next_ip; 372 assert(inst_mark() != NULL, "must be inside InstructionMark"); 373 address next_ip = pc() + sizeof(int32_t) + rip_relative_correction; 374 int64_t adjusted = disp; 375 // Do rip-rel adjustment for 64bit 376 LP64_ONLY(adjusted -= (next_ip - inst_mark())); 377 assert(is_simm32(adjusted), 378 "must be 32bit offset (RIP relative address)"); 379 emit_data((int32_t) adjusted, rspec, disp32_operand); 380 381 } else { 382 // 32bit never did this, did everything as the rip-rel/disp code above 383 // [disp] ABSOLUTE 384 // [00 reg 100][00 100 101] disp32 385 emit_byte(0x04 | regenc); 386 emit_byte(0x25); 387 emit_data(disp, rspec, disp32_operand); 388 } 389 } 390 } 391 392 void Assembler::emit_operand(XMMRegister reg, Register base, Register index, 393 Address::ScaleFactor scale, int disp, 394 RelocationHolder const& rspec) { 395 emit_operand((Register)reg, base, index, scale, disp, rspec); 396 } 397 398 // Secret local extension to Assembler::WhichOperand: 399 #define end_pc_operand (_WhichOperand_limit) 400 401 address Assembler::locate_operand(address inst, WhichOperand which) { 402 // Decode the given instruction, and return the address of 403 // an embedded 32-bit operand word. 404 405 // If "which" is disp32_operand, selects the displacement portion 406 // of an effective address specifier. 407 // If "which" is imm64_operand, selects the trailing immediate constant. 408 // If "which" is call32_operand, selects the displacement of a call or jump. 409 // Caller is responsible for ensuring that there is such an operand, 410 // and that it is 32/64 bits wide. 411 412 // If "which" is end_pc_operand, find the end of the instruction. 413 414 address ip = inst; 415 bool is_64bit = false; 416 417 debug_only(bool has_disp32 = false); 418 int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn 419 420 again_after_prefix: 421 switch (0xFF & *ip++) { 422 423 // These convenience macros generate groups of "case" labels for the switch. 424 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3 425 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \ 426 case (x)+4: case (x)+5: case (x)+6: case (x)+7 427 #define REP16(x) REP8((x)+0): \ 428 case REP8((x)+8) 429 430 case CS_segment: 431 case SS_segment: 432 case DS_segment: 433 case ES_segment: 434 case FS_segment: 435 case GS_segment: 436 // Seems dubious 437 LP64_ONLY(assert(false, "shouldn't have that prefix")); 438 assert(ip == inst+1, "only one prefix allowed"); 439 goto again_after_prefix; 440 441 case 0x67: 442 case REX: 443 case REX_B: 444 case REX_X: 445 case REX_XB: 446 case REX_R: 447 case REX_RB: 448 case REX_RX: 449 case REX_RXB: 450 NOT_LP64(assert(false, "64bit prefixes")); 451 goto again_after_prefix; 452 453 case REX_W: 454 case REX_WB: 455 case REX_WX: 456 case REX_WXB: 457 case REX_WR: 458 case REX_WRB: 459 case REX_WRX: 460 case REX_WRXB: 461 NOT_LP64(assert(false, "64bit prefixes")); 462 is_64bit = true; 463 goto again_after_prefix; 464 465 case 0xFF: // pushq a; decl a; incl a; call a; jmp a 466 case 0x88: // movb a, r 467 case 0x89: // movl a, r 468 case 0x8A: // movb r, a 469 case 0x8B: // movl r, a 470 case 0x8F: // popl a 471 debug_only(has_disp32 = true); 472 break; 473 474 case 0x68: // pushq #32 475 if (which == end_pc_operand) { 476 return ip + 4; 477 } 478 assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate"); 479 return ip; // not produced by emit_operand 480 481 case 0x66: // movw ... (size prefix) 482 again_after_size_prefix2: 483 switch (0xFF & *ip++) { 484 case REX: 485 case REX_B: 486 case REX_X: 487 case REX_XB: 488 case REX_R: 489 case REX_RB: 490 case REX_RX: 491 case REX_RXB: 492 case REX_W: 493 case REX_WB: 494 case REX_WX: 495 case REX_WXB: 496 case REX_WR: 497 case REX_WRB: 498 case REX_WRX: 499 case REX_WRXB: 500 NOT_LP64(assert(false, "64bit prefix found")); 501 goto again_after_size_prefix2; 502 case 0x8B: // movw r, a 503 case 0x89: // movw a, r 504 debug_only(has_disp32 = true); 505 break; 506 case 0xC7: // movw a, #16 507 debug_only(has_disp32 = true); 508 tail_size = 2; // the imm16 509 break; 510 case 0x0F: // several SSE/SSE2 variants 511 ip--; // reparse the 0x0F 512 goto again_after_prefix; 513 default: 514 ShouldNotReachHere(); 515 } 516 break; 517 518 case REP8(0xB8): // movl/q r, #32/#64(oop?) 519 if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4); 520 // these asserts are somewhat nonsensical 521 #ifndef _LP64 522 assert(which == imm_operand || which == disp32_operand, 523 err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip)); 524 #else 525 assert((which == call32_operand || which == imm_operand) && is_64bit || 526 which == narrow_oop_operand && !is_64bit, 527 err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, ip)); 528 #endif // _LP64 529 return ip; 530 531 case 0x69: // imul r, a, #32 532 case 0xC7: // movl a, #32(oop?) 533 tail_size = 4; 534 debug_only(has_disp32 = true); // has both kinds of operands! 535 break; 536 537 case 0x0F: // movx..., etc. 538 switch (0xFF & *ip++) { 539 case 0x3A: // pcmpestri 540 tail_size = 1; 541 case 0x38: // ptest, pmovzxbw 542 ip++; // skip opcode 543 debug_only(has_disp32 = true); // has both kinds of operands! 544 break; 545 546 case 0x70: // pshufd r, r/a, #8 547 debug_only(has_disp32 = true); // has both kinds of operands! 548 case 0x73: // psrldq r, #8 549 tail_size = 1; 550 break; 551 552 case 0x12: // movlps 553 case 0x28: // movaps 554 case 0x2E: // ucomiss 555 case 0x2F: // comiss 556 case 0x54: // andps 557 case 0x55: // andnps 558 case 0x56: // orps 559 case 0x57: // xorps 560 case 0x6E: // movd 561 case 0x7E: // movd 562 case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush 563 debug_only(has_disp32 = true); 564 break; 565 566 case 0xAD: // shrd r, a, %cl 567 case 0xAF: // imul r, a 568 case 0xBE: // movsbl r, a (movsxb) 569 case 0xBF: // movswl r, a (movsxw) 570 case 0xB6: // movzbl r, a (movzxb) 571 case 0xB7: // movzwl r, a (movzxw) 572 case REP16(0x40): // cmovl cc, r, a 573 case 0xB0: // cmpxchgb 574 case 0xB1: // cmpxchg 575 case 0xC1: // xaddl 576 case 0xC7: // cmpxchg8 577 case REP16(0x90): // setcc a 578 debug_only(has_disp32 = true); 579 // fall out of the switch to decode the address 580 break; 581 582 case 0xC4: // pinsrw r, a, #8 583 debug_only(has_disp32 = true); 584 case 0xC5: // pextrw r, r, #8 585 tail_size = 1; // the imm8 586 break; 587 588 case 0xAC: // shrd r, a, #8 589 debug_only(has_disp32 = true); 590 tail_size = 1; // the imm8 591 break; 592 593 case REP16(0x80): // jcc rdisp32 594 if (which == end_pc_operand) return ip + 4; 595 assert(which == call32_operand, "jcc has no disp32 or imm"); 596 return ip; 597 default: 598 ShouldNotReachHere(); 599 } 600 break; 601 602 case 0x81: // addl a, #32; addl r, #32 603 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 604 // on 32bit in the case of cmpl, the imm might be an oop 605 tail_size = 4; 606 debug_only(has_disp32 = true); // has both kinds of operands! 607 break; 608 609 case 0x83: // addl a, #8; addl r, #8 610 // also: orl, adcl, sbbl, andl, subl, xorl, cmpl 611 debug_only(has_disp32 = true); // has both kinds of operands! 612 tail_size = 1; 613 break; 614 615 case 0x9B: 616 switch (0xFF & *ip++) { 617 case 0xD9: // fnstcw a 618 debug_only(has_disp32 = true); 619 break; 620 default: 621 ShouldNotReachHere(); 622 } 623 break; 624 625 case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a 626 case REP4(0x10): // adc... 627 case REP4(0x20): // and... 628 case REP4(0x30): // xor... 629 case REP4(0x08): // or... 630 case REP4(0x18): // sbb... 631 case REP4(0x28): // sub... 632 case 0xF7: // mull a 633 case 0x8D: // lea r, a 634 case 0x87: // xchg r, a 635 case REP4(0x38): // cmp... 636 case 0x85: // test r, a 637 debug_only(has_disp32 = true); // has both kinds of operands! 638 break; 639 640 case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8 641 case 0xC6: // movb a, #8 642 case 0x80: // cmpb a, #8 643 case 0x6B: // imul r, a, #8 644 debug_only(has_disp32 = true); // has both kinds of operands! 645 tail_size = 1; // the imm8 646 break; 647 648 case 0xC4: // VEX_3bytes 649 case 0xC5: // VEX_2bytes 650 assert((UseAVX > 0), "shouldn't have VEX prefix"); 651 assert(ip == inst+1, "no prefixes allowed"); 652 // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions 653 // but they have prefix 0x0F and processed when 0x0F processed above. 654 // 655 // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES 656 // instructions (these instructions are not supported in 64-bit mode). 657 // To distinguish them bits [7:6] are set in the VEX second byte since 658 // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set 659 // those VEX bits REX and vvvv bits are inverted. 660 // 661 // Fortunately C2 doesn't generate these instructions so we don't need 662 // to check for them in product version. 663 664 // Check second byte 665 NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions")); 666 667 // First byte 668 if ((0xFF & *inst) == VEX_3bytes) { 669 ip++; // third byte 670 is_64bit = ((VEX_W & *ip) == VEX_W); 671 } 672 ip++; // opcode 673 // To find the end of instruction (which == end_pc_operand). 674 switch (0xFF & *ip) { 675 case 0x61: // pcmpestri r, r/a, #8 676 case 0x70: // pshufd r, r/a, #8 677 case 0x73: // psrldq r, #8 678 tail_size = 1; // the imm8 679 break; 680 default: 681 break; 682 } 683 ip++; // skip opcode 684 debug_only(has_disp32 = true); // has both kinds of operands! 685 break; 686 687 case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 688 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl 689 case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a 690 case 0xDD: // fld_d a; fst_d a; fstp_d a 691 case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a 692 case 0xDF: // fild_d a; fistp_d a 693 case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a 694 case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a 695 case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a 696 debug_only(has_disp32 = true); 697 break; 698 699 case 0xE8: // call rdisp32 700 case 0xE9: // jmp rdisp32 701 if (which == end_pc_operand) return ip + 4; 702 assert(which == call32_operand, "call has no disp32 or imm"); 703 return ip; 704 705 case 0xF0: // Lock 706 assert(os::is_MP(), "only on MP"); 707 goto again_after_prefix; 708 709 case 0xF3: // For SSE 710 case 0xF2: // For SSE2 711 switch (0xFF & *ip++) { 712 case REX: 713 case REX_B: 714 case REX_X: 715 case REX_XB: 716 case REX_R: 717 case REX_RB: 718 case REX_RX: 719 case REX_RXB: 720 case REX_W: 721 case REX_WB: 722 case REX_WX: 723 case REX_WXB: 724 case REX_WR: 725 case REX_WRB: 726 case REX_WRX: 727 case REX_WRXB: 728 NOT_LP64(assert(false, "found 64bit prefix")); 729 ip++; 730 default: 731 ip++; 732 } 733 debug_only(has_disp32 = true); // has both kinds of operands! 734 break; 735 736 default: 737 ShouldNotReachHere(); 738 739 #undef REP8 740 #undef REP16 741 } 742 743 assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); 744 #ifdef _LP64 745 assert(which != imm_operand, "instruction is not a movq reg, imm64"); 746 #else 747 // assert(which != imm_operand || has_imm32, "instruction has no imm32 field"); 748 assert(which != imm_operand || has_disp32, "instruction has no imm32 field"); 749 #endif // LP64 750 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field"); 751 752 // parse the output of emit_operand 753 int op2 = 0xFF & *ip++; 754 int base = op2 & 0x07; 755 int op3 = -1; 756 const int b100 = 4; 757 const int b101 = 5; 758 if (base == b100 && (op2 >> 6) != 3) { 759 op3 = 0xFF & *ip++; 760 base = op3 & 0x07; // refetch the base 761 } 762 // now ip points at the disp (if any) 763 764 switch (op2 >> 6) { 765 case 0: 766 // [00 reg 100][ss index base] 767 // [00 reg 100][00 100 esp] 768 // [00 reg base] 769 // [00 reg 100][ss index 101][disp32] 770 // [00 reg 101] [disp32] 771 772 if (base == b101) { 773 if (which == disp32_operand) 774 return ip; // caller wants the disp32 775 ip += 4; // skip the disp32 776 } 777 break; 778 779 case 1: 780 // [01 reg 100][ss index base][disp8] 781 // [01 reg 100][00 100 esp][disp8] 782 // [01 reg base] [disp8] 783 ip += 1; // skip the disp8 784 break; 785 786 case 2: 787 // [10 reg 100][ss index base][disp32] 788 // [10 reg 100][00 100 esp][disp32] 789 // [10 reg base] [disp32] 790 if (which == disp32_operand) 791 return ip; // caller wants the disp32 792 ip += 4; // skip the disp32 793 break; 794 795 case 3: 796 // [11 reg base] (not a memory addressing mode) 797 break; 798 } 799 800 if (which == end_pc_operand) { 801 return ip + tail_size; 802 } 803 804 #ifdef _LP64 805 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32"); 806 #else 807 assert(which == imm_operand, "instruction has only an imm field"); 808 #endif // LP64 809 return ip; 810 } 811 812 address Assembler::locate_next_instruction(address inst) { 813 // Secretly share code with locate_operand: 814 return locate_operand(inst, end_pc_operand); 815 } 816 817 818 #ifdef ASSERT 819 void Assembler::check_relocation(RelocationHolder const& rspec, int format) { 820 address inst = inst_mark(); 821 assert(inst != NULL && inst < pc(), "must point to beginning of instruction"); 822 address opnd; 823 824 Relocation* r = rspec.reloc(); 825 if (r->type() == relocInfo::none) { 826 return; 827 } else if (r->is_call() || format == call32_operand) { 828 // assert(format == imm32_operand, "cannot specify a nonzero format"); 829 opnd = locate_operand(inst, call32_operand); 830 } else if (r->is_data()) { 831 assert(format == imm_operand || format == disp32_operand 832 LP64_ONLY(|| format == narrow_oop_operand), "format ok"); 833 opnd = locate_operand(inst, (WhichOperand)format); 834 } else { 835 assert(format == imm_operand, "cannot specify a format"); 836 return; 837 } 838 assert(opnd == pc(), "must put operand where relocs can find it"); 839 } 840 #endif // ASSERT 841 842 void Assembler::emit_operand32(Register reg, Address adr) { 843 assert(reg->encoding() < 8, "no extended registers"); 844 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 845 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 846 adr._rspec); 847 } 848 849 void Assembler::emit_operand(Register reg, Address adr, 850 int rip_relative_correction) { 851 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 852 adr._rspec, 853 rip_relative_correction); 854 } 855 856 void Assembler::emit_operand(XMMRegister reg, Address adr) { 857 emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp, 858 adr._rspec); 859 } 860 861 // MMX operations 862 void Assembler::emit_operand(MMXRegister reg, Address adr) { 863 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 864 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 865 } 866 867 // work around gcc (3.2.1-7a) bug 868 void Assembler::emit_operand(Address adr, MMXRegister reg) { 869 assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers"); 870 emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec); 871 } 872 873 874 void Assembler::emit_farith(int b1, int b2, int i) { 875 assert(isByte(b1) && isByte(b2), "wrong opcode"); 876 assert(0 <= i && i < 8, "illegal stack offset"); 877 emit_byte(b1); 878 emit_byte(b2 + i); 879 } 880 881 882 // Now the Assembler instructions (identical for 32/64 bits) 883 884 void Assembler::adcl(Address dst, int32_t imm32) { 885 InstructionMark im(this); 886 prefix(dst); 887 emit_arith_operand(0x81, rdx, dst, imm32); 888 } 889 890 void Assembler::adcl(Address dst, Register src) { 891 InstructionMark im(this); 892 prefix(dst, src); 893 emit_byte(0x11); 894 emit_operand(src, dst); 895 } 896 897 void Assembler::adcl(Register dst, int32_t imm32) { 898 prefix(dst); 899 emit_arith(0x81, 0xD0, dst, imm32); 900 } 901 902 void Assembler::adcl(Register dst, Address src) { 903 InstructionMark im(this); 904 prefix(src, dst); 905 emit_byte(0x13); 906 emit_operand(dst, src); 907 } 908 909 void Assembler::adcl(Register dst, Register src) { 910 (void) prefix_and_encode(dst->encoding(), src->encoding()); 911 emit_arith(0x13, 0xC0, dst, src); 912 } 913 914 void Assembler::addl(Address dst, int32_t imm32) { 915 InstructionMark im(this); 916 prefix(dst); 917 emit_arith_operand(0x81, rax, dst, imm32); 918 } 919 920 void Assembler::addl(Address dst, Register src) { 921 InstructionMark im(this); 922 prefix(dst, src); 923 emit_byte(0x01); 924 emit_operand(src, dst); 925 } 926 927 void Assembler::addl(Register dst, int32_t imm32) { 928 prefix(dst); 929 emit_arith(0x81, 0xC0, dst, imm32); 930 } 931 932 void Assembler::addl(Register dst, Address src) { 933 InstructionMark im(this); 934 prefix(src, dst); 935 emit_byte(0x03); 936 emit_operand(dst, src); 937 } 938 939 void Assembler::addl(Register dst, Register src) { 940 (void) prefix_and_encode(dst->encoding(), src->encoding()); 941 emit_arith(0x03, 0xC0, dst, src); 942 } 943 944 void Assembler::addr_nop_4() { 945 assert(UseAddressNop, "no CPU support"); 946 // 4 bytes: NOP DWORD PTR [EAX+0] 947 emit_byte(0x0F); 948 emit_byte(0x1F); 949 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 950 emit_byte(0); // 8-bits offset (1 byte) 951 } 952 953 void Assembler::addr_nop_5() { 954 assert(UseAddressNop, "no CPU support"); 955 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 956 emit_byte(0x0F); 957 emit_byte(0x1F); 958 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 959 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 960 emit_byte(0); // 8-bits offset (1 byte) 961 } 962 963 void Assembler::addr_nop_7() { 964 assert(UseAddressNop, "no CPU support"); 965 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 966 emit_byte(0x0F); 967 emit_byte(0x1F); 968 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 969 emit_long(0); // 32-bits offset (4 bytes) 970 } 971 972 void Assembler::addr_nop_8() { 973 assert(UseAddressNop, "no CPU support"); 974 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 975 emit_byte(0x0F); 976 emit_byte(0x1F); 977 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 978 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 979 emit_long(0); // 32-bits offset (4 bytes) 980 } 981 982 void Assembler::addsd(XMMRegister dst, XMMRegister src) { 983 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 984 emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); 985 } 986 987 void Assembler::addsd(XMMRegister dst, Address src) { 988 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 989 emit_simd_arith(0x58, dst, src, VEX_SIMD_F2); 990 } 991 992 void Assembler::addss(XMMRegister dst, XMMRegister src) { 993 NOT_LP64(assert(VM_Version::supports_sse(), "")); 994 emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); 995 } 996 997 void Assembler::addss(XMMRegister dst, Address src) { 998 NOT_LP64(assert(VM_Version::supports_sse(), "")); 999 emit_simd_arith(0x58, dst, src, VEX_SIMD_F3); 1000 } 1001 1002 void Assembler::aesdec(XMMRegister dst, Address src) { 1003 assert(VM_Version::supports_aes(), ""); 1004 InstructionMark im(this); 1005 simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 1006 emit_byte(0xde); 1007 emit_operand(dst, src); 1008 } 1009 1010 void Assembler::aesdec(XMMRegister dst, XMMRegister src) { 1011 assert(VM_Version::supports_aes(), ""); 1012 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 1013 emit_byte(0xde); 1014 emit_byte(0xC0 | encode); 1015 } 1016 1017 void Assembler::aesdeclast(XMMRegister dst, Address src) { 1018 assert(VM_Version::supports_aes(), ""); 1019 InstructionMark im(this); 1020 simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 1021 emit_byte(0xdf); 1022 emit_operand(dst, src); 1023 } 1024 1025 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) { 1026 assert(VM_Version::supports_aes(), ""); 1027 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 1028 emit_byte(0xdf); 1029 emit_byte(0xC0 | encode); 1030 } 1031 1032 void Assembler::aesenc(XMMRegister dst, Address src) { 1033 assert(VM_Version::supports_aes(), ""); 1034 InstructionMark im(this); 1035 simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 1036 emit_byte(0xdc); 1037 emit_operand(dst, src); 1038 } 1039 1040 void Assembler::aesenc(XMMRegister dst, XMMRegister src) { 1041 assert(VM_Version::supports_aes(), ""); 1042 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 1043 emit_byte(0xdc); 1044 emit_byte(0xC0 | encode); 1045 } 1046 1047 void Assembler::aesenclast(XMMRegister dst, Address src) { 1048 assert(VM_Version::supports_aes(), ""); 1049 InstructionMark im(this); 1050 simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 1051 emit_byte(0xdd); 1052 emit_operand(dst, src); 1053 } 1054 1055 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) { 1056 assert(VM_Version::supports_aes(), ""); 1057 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 1058 emit_byte(0xdd); 1059 emit_byte(0xC0 | encode); 1060 } 1061 1062 1063 void Assembler::andl(Address dst, int32_t imm32) { 1064 InstructionMark im(this); 1065 prefix(dst); 1066 emit_byte(0x81); 1067 emit_operand(rsp, dst, 4); 1068 emit_long(imm32); 1069 } 1070 1071 void Assembler::andl(Register dst, int32_t imm32) { 1072 prefix(dst); 1073 emit_arith(0x81, 0xE0, dst, imm32); 1074 } 1075 1076 void Assembler::andl(Register dst, Address src) { 1077 InstructionMark im(this); 1078 prefix(src, dst); 1079 emit_byte(0x23); 1080 emit_operand(dst, src); 1081 } 1082 1083 void Assembler::andl(Register dst, Register src) { 1084 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1085 emit_arith(0x23, 0xC0, dst, src); 1086 } 1087 1088 void Assembler::bsfl(Register dst, Register src) { 1089 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1090 emit_byte(0x0F); 1091 emit_byte(0xBC); 1092 emit_byte(0xC0 | encode); 1093 } 1094 1095 void Assembler::bsrl(Register dst, Register src) { 1096 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 1097 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1098 emit_byte(0x0F); 1099 emit_byte(0xBD); 1100 emit_byte(0xC0 | encode); 1101 } 1102 1103 void Assembler::bswapl(Register reg) { // bswap 1104 int encode = prefix_and_encode(reg->encoding()); 1105 emit_byte(0x0F); 1106 emit_byte(0xC8 | encode); 1107 } 1108 1109 void Assembler::call(Label& L, relocInfo::relocType rtype) { 1110 // suspect disp32 is always good 1111 int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); 1112 1113 if (L.is_bound()) { 1114 const int long_size = 5; 1115 int offs = (int)( target(L) - pc() ); 1116 assert(offs <= 0, "assembler error"); 1117 InstructionMark im(this); 1118 // 1110 1000 #32-bit disp 1119 emit_byte(0xE8); 1120 emit_data(offs - long_size, rtype, operand); 1121 } else { 1122 InstructionMark im(this); 1123 // 1110 1000 #32-bit disp 1124 L.add_patch_at(code(), locator()); 1125 1126 emit_byte(0xE8); 1127 emit_data(int(0), rtype, operand); 1128 } 1129 } 1130 1131 void Assembler::call(Register dst) { 1132 int encode = prefix_and_encode(dst->encoding()); 1133 emit_byte(0xFF); 1134 emit_byte(0xD0 | encode); 1135 } 1136 1137 1138 void Assembler::call(Address adr) { 1139 InstructionMark im(this); 1140 prefix(adr); 1141 emit_byte(0xFF); 1142 emit_operand(rdx, adr); 1143 } 1144 1145 void Assembler::call_literal(address entry, RelocationHolder const& rspec) { 1146 assert(entry != NULL, "call most probably wrong"); 1147 InstructionMark im(this); 1148 emit_byte(0xE8); 1149 intptr_t disp = entry - (_code_pos + sizeof(int32_t)); 1150 assert(is_simm32(disp), "must be 32bit offset (call2)"); 1151 // Technically, should use call32_operand, but this format is 1152 // implied by the fact that we're emitting a call instruction. 1153 1154 int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand); 1155 emit_data((int) disp, rspec, operand); 1156 } 1157 1158 void Assembler::cdql() { 1159 emit_byte(0x99); 1160 } 1161 1162 void Assembler::cmovl(Condition cc, Register dst, Register src) { 1163 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1164 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1165 emit_byte(0x0F); 1166 emit_byte(0x40 | cc); 1167 emit_byte(0xC0 | encode); 1168 } 1169 1170 1171 void Assembler::cmovl(Condition cc, Register dst, Address src) { 1172 NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); 1173 prefix(src, dst); 1174 emit_byte(0x0F); 1175 emit_byte(0x40 | cc); 1176 emit_operand(dst, src); 1177 } 1178 1179 void Assembler::cmpb(Address dst, int imm8) { 1180 InstructionMark im(this); 1181 prefix(dst); 1182 emit_byte(0x80); 1183 emit_operand(rdi, dst, 1); 1184 emit_byte(imm8); 1185 } 1186 1187 void Assembler::cmpl(Address dst, int32_t imm32) { 1188 InstructionMark im(this); 1189 prefix(dst); 1190 emit_byte(0x81); 1191 emit_operand(rdi, dst, 4); 1192 emit_long(imm32); 1193 } 1194 1195 void Assembler::cmpl(Register dst, int32_t imm32) { 1196 prefix(dst); 1197 emit_arith(0x81, 0xF8, dst, imm32); 1198 } 1199 1200 void Assembler::cmpl(Register dst, Register src) { 1201 (void) prefix_and_encode(dst->encoding(), src->encoding()); 1202 emit_arith(0x3B, 0xC0, dst, src); 1203 } 1204 1205 1206 void Assembler::cmpl(Register dst, Address src) { 1207 InstructionMark im(this); 1208 prefix(src, dst); 1209 emit_byte(0x3B); 1210 emit_operand(dst, src); 1211 } 1212 1213 void Assembler::cmpw(Address dst, int imm16) { 1214 InstructionMark im(this); 1215 assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers"); 1216 emit_byte(0x66); 1217 emit_byte(0x81); 1218 emit_operand(rdi, dst, 2); 1219 emit_word(imm16); 1220 } 1221 1222 // The 32-bit cmpxchg compares the value at adr with the contents of rax, 1223 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,. 1224 // The ZF is set if the compared values were equal, and cleared otherwise. 1225 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg 1226 InstructionMark im(this); 1227 prefix(adr, reg); 1228 emit_byte(0x0F); 1229 emit_byte(0xB1); 1230 emit_operand(reg, adr); 1231 } 1232 1233 void Assembler::comisd(XMMRegister dst, Address src) { 1234 // NOTE: dbx seems to decode this as comiss even though the 1235 // 0x66 is there. Strangly ucomisd comes out correct 1236 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1237 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); 1238 } 1239 1240 void Assembler::comisd(XMMRegister dst, XMMRegister src) { 1241 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1242 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66); 1243 } 1244 1245 void Assembler::comiss(XMMRegister dst, Address src) { 1246 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1247 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE); 1248 } 1249 1250 void Assembler::comiss(XMMRegister dst, XMMRegister src) { 1251 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1252 emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE); 1253 } 1254 1255 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { 1256 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1257 emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3); 1258 } 1259 1260 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { 1261 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1262 emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE); 1263 } 1264 1265 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { 1266 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1267 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); 1268 } 1269 1270 void Assembler::cvtsd2ss(XMMRegister dst, Address src) { 1271 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1272 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2); 1273 } 1274 1275 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { 1276 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1277 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 1278 emit_byte(0x2A); 1279 emit_byte(0xC0 | encode); 1280 } 1281 1282 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { 1283 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1284 emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2); 1285 } 1286 1287 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { 1288 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1289 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); 1290 emit_byte(0x2A); 1291 emit_byte(0xC0 | encode); 1292 } 1293 1294 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { 1295 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1296 emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3); 1297 } 1298 1299 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { 1300 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1301 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); 1302 } 1303 1304 void Assembler::cvtss2sd(XMMRegister dst, Address src) { 1305 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1306 emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3); 1307 } 1308 1309 1310 void Assembler::cvttsd2sil(Register dst, XMMRegister src) { 1311 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1312 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); 1313 emit_byte(0x2C); 1314 emit_byte(0xC0 | encode); 1315 } 1316 1317 void Assembler::cvttss2sil(Register dst, XMMRegister src) { 1318 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1319 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); 1320 emit_byte(0x2C); 1321 emit_byte(0xC0 | encode); 1322 } 1323 1324 void Assembler::decl(Address dst) { 1325 // Don't use it directly. Use MacroAssembler::decrement() instead. 1326 InstructionMark im(this); 1327 prefix(dst); 1328 emit_byte(0xFF); 1329 emit_operand(rcx, dst); 1330 } 1331 1332 void Assembler::divsd(XMMRegister dst, Address src) { 1333 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1334 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); 1335 } 1336 1337 void Assembler::divsd(XMMRegister dst, XMMRegister src) { 1338 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1339 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2); 1340 } 1341 1342 void Assembler::divss(XMMRegister dst, Address src) { 1343 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1344 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); 1345 } 1346 1347 void Assembler::divss(XMMRegister dst, XMMRegister src) { 1348 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1349 emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3); 1350 } 1351 1352 void Assembler::emms() { 1353 NOT_LP64(assert(VM_Version::supports_mmx(), "")); 1354 emit_byte(0x0F); 1355 emit_byte(0x77); 1356 } 1357 1358 void Assembler::hlt() { 1359 emit_byte(0xF4); 1360 } 1361 1362 void Assembler::idivl(Register src) { 1363 int encode = prefix_and_encode(src->encoding()); 1364 emit_byte(0xF7); 1365 emit_byte(0xF8 | encode); 1366 } 1367 1368 void Assembler::divl(Register src) { // Unsigned 1369 int encode = prefix_and_encode(src->encoding()); 1370 emit_byte(0xF7); 1371 emit_byte(0xF0 | encode); 1372 } 1373 1374 void Assembler::imull(Register dst, Register src) { 1375 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1376 emit_byte(0x0F); 1377 emit_byte(0xAF); 1378 emit_byte(0xC0 | encode); 1379 } 1380 1381 1382 void Assembler::imull(Register dst, Register src, int value) { 1383 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1384 if (is8bit(value)) { 1385 emit_byte(0x6B); 1386 emit_byte(0xC0 | encode); 1387 emit_byte(value & 0xFF); 1388 } else { 1389 emit_byte(0x69); 1390 emit_byte(0xC0 | encode); 1391 emit_long(value); 1392 } 1393 } 1394 1395 void Assembler::incl(Address dst) { 1396 // Don't use it directly. Use MacroAssembler::increment() instead. 1397 InstructionMark im(this); 1398 prefix(dst); 1399 emit_byte(0xFF); 1400 emit_operand(rax, dst); 1401 } 1402 1403 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) { 1404 InstructionMark im(this); 1405 assert((0 <= cc) && (cc < 16), "illegal cc"); 1406 if (L.is_bound()) { 1407 address dst = target(L); 1408 assert(dst != NULL, "jcc most probably wrong"); 1409 1410 const int short_size = 2; 1411 const int long_size = 6; 1412 intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos; 1413 if (maybe_short && is8bit(offs - short_size)) { 1414 // 0111 tttn #8-bit disp 1415 emit_byte(0x70 | cc); 1416 emit_byte((offs - short_size) & 0xFF); 1417 } else { 1418 // 0000 1111 1000 tttn #32-bit disp 1419 assert(is_simm32(offs - long_size), 1420 "must be 32bit offset (call4)"); 1421 emit_byte(0x0F); 1422 emit_byte(0x80 | cc); 1423 emit_long(offs - long_size); 1424 } 1425 } else { 1426 // Note: could eliminate cond. jumps to this jump if condition 1427 // is the same however, seems to be rather unlikely case. 1428 // Note: use jccb() if label to be bound is very close to get 1429 // an 8-bit displacement 1430 L.add_patch_at(code(), locator()); 1431 emit_byte(0x0F); 1432 emit_byte(0x80 | cc); 1433 emit_long(0); 1434 } 1435 } 1436 1437 void Assembler::jccb(Condition cc, Label& L) { 1438 if (L.is_bound()) { 1439 const int short_size = 2; 1440 address entry = target(L); 1441 #ifdef ASSERT 1442 intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); 1443 intptr_t delta = short_branch_delta(); 1444 if (delta != 0) { 1445 dist += (dist < 0 ? (-delta) :delta); 1446 } 1447 assert(is8bit(dist), "Dispacement too large for a short jmp"); 1448 #endif 1449 intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; 1450 // 0111 tttn #8-bit disp 1451 emit_byte(0x70 | cc); 1452 emit_byte((offs - short_size) & 0xFF); 1453 } else { 1454 InstructionMark im(this); 1455 L.add_patch_at(code(), locator()); 1456 emit_byte(0x70 | cc); 1457 emit_byte(0); 1458 } 1459 } 1460 1461 void Assembler::jmp(Address adr) { 1462 InstructionMark im(this); 1463 prefix(adr); 1464 emit_byte(0xFF); 1465 emit_operand(rsp, adr); 1466 } 1467 1468 void Assembler::jmp(Label& L, bool maybe_short) { 1469 if (L.is_bound()) { 1470 address entry = target(L); 1471 assert(entry != NULL, "jmp most probably wrong"); 1472 InstructionMark im(this); 1473 const int short_size = 2; 1474 const int long_size = 5; 1475 intptr_t offs = entry - _code_pos; 1476 if (maybe_short && is8bit(offs - short_size)) { 1477 emit_byte(0xEB); 1478 emit_byte((offs - short_size) & 0xFF); 1479 } else { 1480 emit_byte(0xE9); 1481 emit_long(offs - long_size); 1482 } 1483 } else { 1484 // By default, forward jumps are always 32-bit displacements, since 1485 // we can't yet know where the label will be bound. If you're sure that 1486 // the forward jump will not run beyond 256 bytes, use jmpb to 1487 // force an 8-bit displacement. 1488 InstructionMark im(this); 1489 L.add_patch_at(code(), locator()); 1490 emit_byte(0xE9); 1491 emit_long(0); 1492 } 1493 } 1494 1495 void Assembler::jmp(Register entry) { 1496 int encode = prefix_and_encode(entry->encoding()); 1497 emit_byte(0xFF); 1498 emit_byte(0xE0 | encode); 1499 } 1500 1501 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { 1502 InstructionMark im(this); 1503 emit_byte(0xE9); 1504 assert(dest != NULL, "must have a target"); 1505 intptr_t disp = dest - (_code_pos + sizeof(int32_t)); 1506 assert(is_simm32(disp), "must be 32bit offset (jmp)"); 1507 emit_data(disp, rspec.reloc(), call32_operand); 1508 } 1509 1510 void Assembler::jmpb(Label& L) { 1511 if (L.is_bound()) { 1512 const int short_size = 2; 1513 address entry = target(L); 1514 assert(entry != NULL, "jmp most probably wrong"); 1515 #ifdef ASSERT 1516 intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); 1517 intptr_t delta = short_branch_delta(); 1518 if (delta != 0) { 1519 dist += (dist < 0 ? (-delta) :delta); 1520 } 1521 assert(is8bit(dist), "Dispacement too large for a short jmp"); 1522 #endif 1523 intptr_t offs = entry - _code_pos; 1524 emit_byte(0xEB); 1525 emit_byte((offs - short_size) & 0xFF); 1526 } else { 1527 InstructionMark im(this); 1528 L.add_patch_at(code(), locator()); 1529 emit_byte(0xEB); 1530 emit_byte(0); 1531 } 1532 } 1533 1534 void Assembler::ldmxcsr( Address src) { 1535 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1536 InstructionMark im(this); 1537 prefix(src); 1538 emit_byte(0x0F); 1539 emit_byte(0xAE); 1540 emit_operand(as_Register(2), src); 1541 } 1542 1543 void Assembler::leal(Register dst, Address src) { 1544 InstructionMark im(this); 1545 #ifdef _LP64 1546 emit_byte(0x67); // addr32 1547 prefix(src, dst); 1548 #endif // LP64 1549 emit_byte(0x8D); 1550 emit_operand(dst, src); 1551 } 1552 1553 void Assembler::lock() { 1554 emit_byte(0xF0); 1555 } 1556 1557 void Assembler::lzcntl(Register dst, Register src) { 1558 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 1559 emit_byte(0xF3); 1560 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1561 emit_byte(0x0F); 1562 emit_byte(0xBD); 1563 emit_byte(0xC0 | encode); 1564 } 1565 1566 // Emit mfence instruction 1567 void Assembler::mfence() { 1568 NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) 1569 emit_byte( 0x0F ); 1570 emit_byte( 0xAE ); 1571 emit_byte( 0xF0 ); 1572 } 1573 1574 void Assembler::mov(Register dst, Register src) { 1575 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 1576 } 1577 1578 void Assembler::movapd(XMMRegister dst, XMMRegister src) { 1579 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1580 emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66); 1581 } 1582 1583 void Assembler::movaps(XMMRegister dst, XMMRegister src) { 1584 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1585 emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE); 1586 } 1587 1588 void Assembler::movlhps(XMMRegister dst, XMMRegister src) { 1589 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1590 int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE); 1591 emit_byte(0x16); 1592 emit_byte(0xC0 | encode); 1593 } 1594 1595 void Assembler::movb(Register dst, Address src) { 1596 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 1597 InstructionMark im(this); 1598 prefix(src, dst, true); 1599 emit_byte(0x8A); 1600 emit_operand(dst, src); 1601 } 1602 1603 1604 void Assembler::movb(Address dst, int imm8) { 1605 InstructionMark im(this); 1606 prefix(dst); 1607 emit_byte(0xC6); 1608 emit_operand(rax, dst, 1); 1609 emit_byte(imm8); 1610 } 1611 1612 1613 void Assembler::movb(Address dst, Register src) { 1614 assert(src->has_byte_register(), "must have byte register"); 1615 InstructionMark im(this); 1616 prefix(dst, src, true); 1617 emit_byte(0x88); 1618 emit_operand(src, dst); 1619 } 1620 1621 void Assembler::movdl(XMMRegister dst, Register src) { 1622 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1623 int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); 1624 emit_byte(0x6E); 1625 emit_byte(0xC0 | encode); 1626 } 1627 1628 void Assembler::movdl(Register dst, XMMRegister src) { 1629 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1630 // swap src/dst to get correct prefix 1631 int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66); 1632 emit_byte(0x7E); 1633 emit_byte(0xC0 | encode); 1634 } 1635 1636 void Assembler::movdl(XMMRegister dst, Address src) { 1637 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1638 InstructionMark im(this); 1639 simd_prefix(dst, src, VEX_SIMD_66); 1640 emit_byte(0x6E); 1641 emit_operand(dst, src); 1642 } 1643 1644 void Assembler::movdl(Address dst, XMMRegister src) { 1645 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1646 InstructionMark im(this); 1647 simd_prefix(dst, src, VEX_SIMD_66); 1648 emit_byte(0x7E); 1649 emit_operand(src, dst); 1650 } 1651 1652 void Assembler::movdqa(XMMRegister dst, XMMRegister src) { 1653 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1654 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66); 1655 } 1656 1657 void Assembler::movdqu(XMMRegister dst, Address src) { 1658 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1659 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); 1660 } 1661 1662 void Assembler::movdqu(XMMRegister dst, XMMRegister src) { 1663 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1664 emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3); 1665 } 1666 1667 void Assembler::movdqu(Address dst, XMMRegister src) { 1668 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1669 InstructionMark im(this); 1670 simd_prefix(dst, src, VEX_SIMD_F3); 1671 emit_byte(0x7F); 1672 emit_operand(src, dst); 1673 } 1674 1675 // Move Unaligned 256bit Vector 1676 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) { 1677 assert(UseAVX, ""); 1678 bool vector256 = true; 1679 int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256); 1680 emit_byte(0x6F); 1681 emit_byte(0xC0 | encode); 1682 } 1683 1684 void Assembler::vmovdqu(XMMRegister dst, Address src) { 1685 assert(UseAVX, ""); 1686 InstructionMark im(this); 1687 bool vector256 = true; 1688 vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256); 1689 emit_byte(0x6F); 1690 emit_operand(dst, src); 1691 } 1692 1693 void Assembler::vmovdqu(Address dst, XMMRegister src) { 1694 assert(UseAVX, ""); 1695 InstructionMark im(this); 1696 bool vector256 = true; 1697 // swap src<->dst for encoding 1698 assert(src != xnoreg, "sanity"); 1699 vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256); 1700 emit_byte(0x7F); 1701 emit_operand(src, dst); 1702 } 1703 1704 // Uses zero extension on 64bit 1705 1706 void Assembler::movl(Register dst, int32_t imm32) { 1707 int encode = prefix_and_encode(dst->encoding()); 1708 emit_byte(0xB8 | encode); 1709 emit_long(imm32); 1710 } 1711 1712 void Assembler::movl(Register dst, Register src) { 1713 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1714 emit_byte(0x8B); 1715 emit_byte(0xC0 | encode); 1716 } 1717 1718 void Assembler::movl(Register dst, Address src) { 1719 InstructionMark im(this); 1720 prefix(src, dst); 1721 emit_byte(0x8B); 1722 emit_operand(dst, src); 1723 } 1724 1725 void Assembler::movl(Address dst, int32_t imm32) { 1726 InstructionMark im(this); 1727 prefix(dst); 1728 emit_byte(0xC7); 1729 emit_operand(rax, dst, 4); 1730 emit_long(imm32); 1731 } 1732 1733 void Assembler::movl(Address dst, Register src) { 1734 InstructionMark im(this); 1735 prefix(dst, src); 1736 emit_byte(0x89); 1737 emit_operand(src, dst); 1738 } 1739 1740 // New cpus require to use movsd and movss to avoid partial register stall 1741 // when loading from memory. But for old Opteron use movlpd instead of movsd. 1742 // The selection is done in MacroAssembler::movdbl() and movflt(). 1743 void Assembler::movlpd(XMMRegister dst, Address src) { 1744 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1745 emit_simd_arith(0x12, dst, src, VEX_SIMD_66); 1746 } 1747 1748 void Assembler::movq( MMXRegister dst, Address src ) { 1749 assert( VM_Version::supports_mmx(), "" ); 1750 emit_byte(0x0F); 1751 emit_byte(0x6F); 1752 emit_operand(dst, src); 1753 } 1754 1755 void Assembler::movq( Address dst, MMXRegister src ) { 1756 assert( VM_Version::supports_mmx(), "" ); 1757 emit_byte(0x0F); 1758 emit_byte(0x7F); 1759 // workaround gcc (3.2.1-7a) bug 1760 // In that version of gcc with only an emit_operand(MMX, Address) 1761 // gcc will tail jump and try and reverse the parameters completely 1762 // obliterating dst in the process. By having a version available 1763 // that doesn't need to swap the args at the tail jump the bug is 1764 // avoided. 1765 emit_operand(dst, src); 1766 } 1767 1768 void Assembler::movq(XMMRegister dst, Address src) { 1769 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1770 InstructionMark im(this); 1771 simd_prefix(dst, src, VEX_SIMD_F3); 1772 emit_byte(0x7E); 1773 emit_operand(dst, src); 1774 } 1775 1776 void Assembler::movq(Address dst, XMMRegister src) { 1777 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1778 InstructionMark im(this); 1779 simd_prefix(dst, src, VEX_SIMD_66); 1780 emit_byte(0xD6); 1781 emit_operand(src, dst); 1782 } 1783 1784 void Assembler::movsbl(Register dst, Address src) { // movsxb 1785 InstructionMark im(this); 1786 prefix(src, dst); 1787 emit_byte(0x0F); 1788 emit_byte(0xBE); 1789 emit_operand(dst, src); 1790 } 1791 1792 void Assembler::movsbl(Register dst, Register src) { // movsxb 1793 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1794 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1795 emit_byte(0x0F); 1796 emit_byte(0xBE); 1797 emit_byte(0xC0 | encode); 1798 } 1799 1800 void Assembler::movsd(XMMRegister dst, XMMRegister src) { 1801 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1802 emit_simd_arith(0x10, dst, src, VEX_SIMD_F2); 1803 } 1804 1805 void Assembler::movsd(XMMRegister dst, Address src) { 1806 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1807 emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2); 1808 } 1809 1810 void Assembler::movsd(Address dst, XMMRegister src) { 1811 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1812 InstructionMark im(this); 1813 simd_prefix(dst, src, VEX_SIMD_F2); 1814 emit_byte(0x11); 1815 emit_operand(src, dst); 1816 } 1817 1818 void Assembler::movss(XMMRegister dst, XMMRegister src) { 1819 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1820 emit_simd_arith(0x10, dst, src, VEX_SIMD_F3); 1821 } 1822 1823 void Assembler::movss(XMMRegister dst, Address src) { 1824 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1825 emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3); 1826 } 1827 1828 void Assembler::movss(Address dst, XMMRegister src) { 1829 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1830 InstructionMark im(this); 1831 simd_prefix(dst, src, VEX_SIMD_F3); 1832 emit_byte(0x11); 1833 emit_operand(src, dst); 1834 } 1835 1836 void Assembler::movswl(Register dst, Address src) { // movsxw 1837 InstructionMark im(this); 1838 prefix(src, dst); 1839 emit_byte(0x0F); 1840 emit_byte(0xBF); 1841 emit_operand(dst, src); 1842 } 1843 1844 void Assembler::movswl(Register dst, Register src) { // movsxw 1845 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1846 emit_byte(0x0F); 1847 emit_byte(0xBF); 1848 emit_byte(0xC0 | encode); 1849 } 1850 1851 void Assembler::movw(Address dst, int imm16) { 1852 InstructionMark im(this); 1853 1854 emit_byte(0x66); // switch to 16-bit mode 1855 prefix(dst); 1856 emit_byte(0xC7); 1857 emit_operand(rax, dst, 2); 1858 emit_word(imm16); 1859 } 1860 1861 void Assembler::movw(Register dst, Address src) { 1862 InstructionMark im(this); 1863 emit_byte(0x66); 1864 prefix(src, dst); 1865 emit_byte(0x8B); 1866 emit_operand(dst, src); 1867 } 1868 1869 void Assembler::movw(Address dst, Register src) { 1870 InstructionMark im(this); 1871 emit_byte(0x66); 1872 prefix(dst, src); 1873 emit_byte(0x89); 1874 emit_operand(src, dst); 1875 } 1876 1877 void Assembler::movzbl(Register dst, Address src) { // movzxb 1878 InstructionMark im(this); 1879 prefix(src, dst); 1880 emit_byte(0x0F); 1881 emit_byte(0xB6); 1882 emit_operand(dst, src); 1883 } 1884 1885 void Assembler::movzbl(Register dst, Register src) { // movzxb 1886 NOT_LP64(assert(src->has_byte_register(), "must have byte register")); 1887 int encode = prefix_and_encode(dst->encoding(), src->encoding(), true); 1888 emit_byte(0x0F); 1889 emit_byte(0xB6); 1890 emit_byte(0xC0 | encode); 1891 } 1892 1893 void Assembler::movzwl(Register dst, Address src) { // movzxw 1894 InstructionMark im(this); 1895 prefix(src, dst); 1896 emit_byte(0x0F); 1897 emit_byte(0xB7); 1898 emit_operand(dst, src); 1899 } 1900 1901 void Assembler::movzwl(Register dst, Register src) { // movzxw 1902 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 1903 emit_byte(0x0F); 1904 emit_byte(0xB7); 1905 emit_byte(0xC0 | encode); 1906 } 1907 1908 void Assembler::mull(Address src) { 1909 InstructionMark im(this); 1910 prefix(src); 1911 emit_byte(0xF7); 1912 emit_operand(rsp, src); 1913 } 1914 1915 void Assembler::mull(Register src) { 1916 int encode = prefix_and_encode(src->encoding()); 1917 emit_byte(0xF7); 1918 emit_byte(0xE0 | encode); 1919 } 1920 1921 void Assembler::mulsd(XMMRegister dst, Address src) { 1922 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1923 emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); 1924 } 1925 1926 void Assembler::mulsd(XMMRegister dst, XMMRegister src) { 1927 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 1928 emit_simd_arith(0x59, dst, src, VEX_SIMD_F2); 1929 } 1930 1931 void Assembler::mulss(XMMRegister dst, Address src) { 1932 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1933 emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); 1934 } 1935 1936 void Assembler::mulss(XMMRegister dst, XMMRegister src) { 1937 NOT_LP64(assert(VM_Version::supports_sse(), "")); 1938 emit_simd_arith(0x59, dst, src, VEX_SIMD_F3); 1939 } 1940 1941 void Assembler::negl(Register dst) { 1942 int encode = prefix_and_encode(dst->encoding()); 1943 emit_byte(0xF7); 1944 emit_byte(0xD8 | encode); 1945 } 1946 1947 void Assembler::nop(int i) { 1948 #ifdef ASSERT 1949 assert(i > 0, " "); 1950 // The fancy nops aren't currently recognized by debuggers making it a 1951 // pain to disassemble code while debugging. If asserts are on clearly 1952 // speed is not an issue so simply use the single byte traditional nop 1953 // to do alignment. 1954 1955 for (; i > 0 ; i--) emit_byte(0x90); 1956 return; 1957 1958 #endif // ASSERT 1959 1960 if (UseAddressNop && VM_Version::is_intel()) { 1961 // 1962 // Using multi-bytes nops "0x0F 0x1F [address]" for Intel 1963 // 1: 0x90 1964 // 2: 0x66 0x90 1965 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 1966 // 4: 0x0F 0x1F 0x40 0x00 1967 // 5: 0x0F 0x1F 0x44 0x00 0x00 1968 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 1969 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1970 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1971 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1972 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1973 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1974 1975 // The rest coding is Intel specific - don't use consecutive address nops 1976 1977 // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1978 // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1979 // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1980 // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90 1981 1982 while(i >= 15) { 1983 // For Intel don't generate consecutive addess nops (mix with regular nops) 1984 i -= 15; 1985 emit_byte(0x66); // size prefix 1986 emit_byte(0x66); // size prefix 1987 emit_byte(0x66); // size prefix 1988 addr_nop_8(); 1989 emit_byte(0x66); // size prefix 1990 emit_byte(0x66); // size prefix 1991 emit_byte(0x66); // size prefix 1992 emit_byte(0x90); // nop 1993 } 1994 switch (i) { 1995 case 14: 1996 emit_byte(0x66); // size prefix 1997 case 13: 1998 emit_byte(0x66); // size prefix 1999 case 12: 2000 addr_nop_8(); 2001 emit_byte(0x66); // size prefix 2002 emit_byte(0x66); // size prefix 2003 emit_byte(0x66); // size prefix 2004 emit_byte(0x90); // nop 2005 break; 2006 case 11: 2007 emit_byte(0x66); // size prefix 2008 case 10: 2009 emit_byte(0x66); // size prefix 2010 case 9: 2011 emit_byte(0x66); // size prefix 2012 case 8: 2013 addr_nop_8(); 2014 break; 2015 case 7: 2016 addr_nop_7(); 2017 break; 2018 case 6: 2019 emit_byte(0x66); // size prefix 2020 case 5: 2021 addr_nop_5(); 2022 break; 2023 case 4: 2024 addr_nop_4(); 2025 break; 2026 case 3: 2027 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2028 emit_byte(0x66); // size prefix 2029 case 2: 2030 emit_byte(0x66); // size prefix 2031 case 1: 2032 emit_byte(0x90); // nop 2033 break; 2034 default: 2035 assert(i == 0, " "); 2036 } 2037 return; 2038 } 2039 if (UseAddressNop && VM_Version::is_amd()) { 2040 // 2041 // Using multi-bytes nops "0x0F 0x1F [address]" for AMD. 2042 // 1: 0x90 2043 // 2: 0x66 0x90 2044 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 2045 // 4: 0x0F 0x1F 0x40 0x00 2046 // 5: 0x0F 0x1F 0x44 0x00 0x00 2047 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 2048 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2049 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2050 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2051 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2052 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2053 2054 // The rest coding is AMD specific - use consecutive address nops 2055 2056 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2057 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 2058 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2059 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 2060 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 2061 // Size prefixes (0x66) are added for larger sizes 2062 2063 while(i >= 22) { 2064 i -= 11; 2065 emit_byte(0x66); // size prefix 2066 emit_byte(0x66); // size prefix 2067 emit_byte(0x66); // size prefix 2068 addr_nop_8(); 2069 } 2070 // Generate first nop for size between 21-12 2071 switch (i) { 2072 case 21: 2073 i -= 1; 2074 emit_byte(0x66); // size prefix 2075 case 20: 2076 case 19: 2077 i -= 1; 2078 emit_byte(0x66); // size prefix 2079 case 18: 2080 case 17: 2081 i -= 1; 2082 emit_byte(0x66); // size prefix 2083 case 16: 2084 case 15: 2085 i -= 8; 2086 addr_nop_8(); 2087 break; 2088 case 14: 2089 case 13: 2090 i -= 7; 2091 addr_nop_7(); 2092 break; 2093 case 12: 2094 i -= 6; 2095 emit_byte(0x66); // size prefix 2096 addr_nop_5(); 2097 break; 2098 default: 2099 assert(i < 12, " "); 2100 } 2101 2102 // Generate second nop for size between 11-1 2103 switch (i) { 2104 case 11: 2105 emit_byte(0x66); // size prefix 2106 case 10: 2107 emit_byte(0x66); // size prefix 2108 case 9: 2109 emit_byte(0x66); // size prefix 2110 case 8: 2111 addr_nop_8(); 2112 break; 2113 case 7: 2114 addr_nop_7(); 2115 break; 2116 case 6: 2117 emit_byte(0x66); // size prefix 2118 case 5: 2119 addr_nop_5(); 2120 break; 2121 case 4: 2122 addr_nop_4(); 2123 break; 2124 case 3: 2125 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 2126 emit_byte(0x66); // size prefix 2127 case 2: 2128 emit_byte(0x66); // size prefix 2129 case 1: 2130 emit_byte(0x90); // nop 2131 break; 2132 default: 2133 assert(i == 0, " "); 2134 } 2135 return; 2136 } 2137 2138 // Using nops with size prefixes "0x66 0x90". 2139 // From AMD Optimization Guide: 2140 // 1: 0x90 2141 // 2: 0x66 0x90 2142 // 3: 0x66 0x66 0x90 2143 // 4: 0x66 0x66 0x66 0x90 2144 // 5: 0x66 0x66 0x90 0x66 0x90 2145 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 2146 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 2147 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 2148 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2149 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 2150 // 2151 while(i > 12) { 2152 i -= 4; 2153 emit_byte(0x66); // size prefix 2154 emit_byte(0x66); 2155 emit_byte(0x66); 2156 emit_byte(0x90); // nop 2157 } 2158 // 1 - 12 nops 2159 if(i > 8) { 2160 if(i > 9) { 2161 i -= 1; 2162 emit_byte(0x66); 2163 } 2164 i -= 3; 2165 emit_byte(0x66); 2166 emit_byte(0x66); 2167 emit_byte(0x90); 2168 } 2169 // 1 - 8 nops 2170 if(i > 4) { 2171 if(i > 6) { 2172 i -= 1; 2173 emit_byte(0x66); 2174 } 2175 i -= 3; 2176 emit_byte(0x66); 2177 emit_byte(0x66); 2178 emit_byte(0x90); 2179 } 2180 switch (i) { 2181 case 4: 2182 emit_byte(0x66); 2183 case 3: 2184 emit_byte(0x66); 2185 case 2: 2186 emit_byte(0x66); 2187 case 1: 2188 emit_byte(0x90); 2189 break; 2190 default: 2191 assert(i == 0, " "); 2192 } 2193 } 2194 2195 void Assembler::notl(Register dst) { 2196 int encode = prefix_and_encode(dst->encoding()); 2197 emit_byte(0xF7); 2198 emit_byte(0xD0 | encode ); 2199 } 2200 2201 void Assembler::orl(Address dst, int32_t imm32) { 2202 InstructionMark im(this); 2203 prefix(dst); 2204 emit_arith_operand(0x81, rcx, dst, imm32); 2205 } 2206 2207 void Assembler::orl(Register dst, int32_t imm32) { 2208 prefix(dst); 2209 emit_arith(0x81, 0xC8, dst, imm32); 2210 } 2211 2212 void Assembler::orl(Register dst, Address src) { 2213 InstructionMark im(this); 2214 prefix(src, dst); 2215 emit_byte(0x0B); 2216 emit_operand(dst, src); 2217 } 2218 2219 void Assembler::orl(Register dst, Register src) { 2220 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2221 emit_arith(0x0B, 0xC0, dst, src); 2222 } 2223 2224 void Assembler::packuswb(XMMRegister dst, Address src) { 2225 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2226 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2227 emit_simd_arith(0x67, dst, src, VEX_SIMD_66); 2228 } 2229 2230 void Assembler::packuswb(XMMRegister dst, XMMRegister src) { 2231 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2232 emit_simd_arith(0x67, dst, src, VEX_SIMD_66); 2233 } 2234 2235 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { 2236 assert(VM_Version::supports_sse4_2(), ""); 2237 InstructionMark im(this); 2238 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2239 emit_byte(0x61); 2240 emit_operand(dst, src); 2241 emit_byte(imm8); 2242 } 2243 2244 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { 2245 assert(VM_Version::supports_sse4_2(), ""); 2246 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); 2247 emit_byte(0x61); 2248 emit_byte(0xC0 | encode); 2249 emit_byte(imm8); 2250 } 2251 2252 void Assembler::pmovzxbw(XMMRegister dst, Address src) { 2253 assert(VM_Version::supports_sse4_1(), ""); 2254 InstructionMark im(this); 2255 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2256 emit_byte(0x30); 2257 emit_operand(dst, src); 2258 } 2259 2260 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { 2261 assert(VM_Version::supports_sse4_1(), ""); 2262 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2263 emit_byte(0x30); 2264 emit_byte(0xC0 | encode); 2265 } 2266 2267 // generic 2268 void Assembler::pop(Register dst) { 2269 int encode = prefix_and_encode(dst->encoding()); 2270 emit_byte(0x58 | encode); 2271 } 2272 2273 void Assembler::popcntl(Register dst, Address src) { 2274 assert(VM_Version::supports_popcnt(), "must support"); 2275 InstructionMark im(this); 2276 emit_byte(0xF3); 2277 prefix(src, dst); 2278 emit_byte(0x0F); 2279 emit_byte(0xB8); 2280 emit_operand(dst, src); 2281 } 2282 2283 void Assembler::popcntl(Register dst, Register src) { 2284 assert(VM_Version::supports_popcnt(), "must support"); 2285 emit_byte(0xF3); 2286 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2287 emit_byte(0x0F); 2288 emit_byte(0xB8); 2289 emit_byte(0xC0 | encode); 2290 } 2291 2292 void Assembler::popf() { 2293 emit_byte(0x9D); 2294 } 2295 2296 #ifndef _LP64 // no 32bit push/pop on amd64 2297 void Assembler::popl(Address dst) { 2298 // NOTE: this will adjust stack by 8byte on 64bits 2299 InstructionMark im(this); 2300 prefix(dst); 2301 emit_byte(0x8F); 2302 emit_operand(rax, dst); 2303 } 2304 #endif 2305 2306 void Assembler::prefetch_prefix(Address src) { 2307 prefix(src); 2308 emit_byte(0x0F); 2309 } 2310 2311 void Assembler::prefetchnta(Address src) { 2312 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2313 InstructionMark im(this); 2314 prefetch_prefix(src); 2315 emit_byte(0x18); 2316 emit_operand(rax, src); // 0, src 2317 } 2318 2319 void Assembler::prefetchr(Address src) { 2320 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2321 InstructionMark im(this); 2322 prefetch_prefix(src); 2323 emit_byte(0x0D); 2324 emit_operand(rax, src); // 0, src 2325 } 2326 2327 void Assembler::prefetcht0(Address src) { 2328 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2329 InstructionMark im(this); 2330 prefetch_prefix(src); 2331 emit_byte(0x18); 2332 emit_operand(rcx, src); // 1, src 2333 } 2334 2335 void Assembler::prefetcht1(Address src) { 2336 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2337 InstructionMark im(this); 2338 prefetch_prefix(src); 2339 emit_byte(0x18); 2340 emit_operand(rdx, src); // 2, src 2341 } 2342 2343 void Assembler::prefetcht2(Address src) { 2344 NOT_LP64(assert(VM_Version::supports_sse(), "must support")); 2345 InstructionMark im(this); 2346 prefetch_prefix(src); 2347 emit_byte(0x18); 2348 emit_operand(rbx, src); // 3, src 2349 } 2350 2351 void Assembler::prefetchw(Address src) { 2352 assert(VM_Version::supports_3dnow_prefetch(), "must support"); 2353 InstructionMark im(this); 2354 prefetch_prefix(src); 2355 emit_byte(0x0D); 2356 emit_operand(rcx, src); // 1, src 2357 } 2358 2359 void Assembler::prefix(Prefix p) { 2360 a_byte(p); 2361 } 2362 2363 void Assembler::pshufb(XMMRegister dst, XMMRegister src) { 2364 assert(VM_Version::supports_ssse3(), ""); 2365 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2366 emit_byte(0x00); 2367 emit_byte(0xC0 | encode); 2368 } 2369 2370 void Assembler::pshufb(XMMRegister dst, Address src) { 2371 assert(VM_Version::supports_ssse3(), ""); 2372 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2373 InstructionMark im(this); 2374 simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2375 emit_byte(0x00); 2376 emit_operand(dst, src); 2377 } 2378 2379 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { 2380 assert(isByte(mode), "invalid value"); 2381 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2382 emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66); 2383 emit_byte(mode & 0xFF); 2384 2385 } 2386 2387 void Assembler::pshufd(XMMRegister dst, Address src, int mode) { 2388 assert(isByte(mode), "invalid value"); 2389 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2390 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2391 InstructionMark im(this); 2392 simd_prefix(dst, src, VEX_SIMD_66); 2393 emit_byte(0x70); 2394 emit_operand(dst, src); 2395 emit_byte(mode & 0xFF); 2396 } 2397 2398 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { 2399 assert(isByte(mode), "invalid value"); 2400 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2401 emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2); 2402 emit_byte(mode & 0xFF); 2403 } 2404 2405 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { 2406 assert(isByte(mode), "invalid value"); 2407 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2408 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2409 InstructionMark im(this); 2410 simd_prefix(dst, src, VEX_SIMD_F2); 2411 emit_byte(0x70); 2412 emit_operand(dst, src); 2413 emit_byte(mode & 0xFF); 2414 } 2415 2416 void Assembler::psrldq(XMMRegister dst, int shift) { 2417 // Shift 128 bit value in xmm register by number of bytes. 2418 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2419 int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66); 2420 emit_byte(0x73); 2421 emit_byte(0xC0 | encode); 2422 emit_byte(shift); 2423 } 2424 2425 void Assembler::ptest(XMMRegister dst, Address src) { 2426 assert(VM_Version::supports_sse4_1(), ""); 2427 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2428 InstructionMark im(this); 2429 simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2430 emit_byte(0x17); 2431 emit_operand(dst, src); 2432 } 2433 2434 void Assembler::ptest(XMMRegister dst, XMMRegister src) { 2435 assert(VM_Version::supports_sse4_1(), ""); 2436 int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 2437 emit_byte(0x17); 2438 emit_byte(0xC0 | encode); 2439 } 2440 2441 void Assembler::punpcklbw(XMMRegister dst, Address src) { 2442 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2443 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2444 emit_simd_arith(0x60, dst, src, VEX_SIMD_66); 2445 } 2446 2447 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { 2448 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2449 emit_simd_arith(0x60, dst, src, VEX_SIMD_66); 2450 } 2451 2452 void Assembler::punpckldq(XMMRegister dst, Address src) { 2453 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2454 assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); 2455 emit_simd_arith(0x62, dst, src, VEX_SIMD_66); 2456 } 2457 2458 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { 2459 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2460 emit_simd_arith(0x62, dst, src, VEX_SIMD_66); 2461 } 2462 2463 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) { 2464 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2465 emit_simd_arith(0x6C, dst, src, VEX_SIMD_66); 2466 } 2467 2468 void Assembler::push(int32_t imm32) { 2469 // in 64bits we push 64bits onto the stack but only 2470 // take a 32bit immediate 2471 emit_byte(0x68); 2472 emit_long(imm32); 2473 } 2474 2475 void Assembler::push(Register src) { 2476 int encode = prefix_and_encode(src->encoding()); 2477 2478 emit_byte(0x50 | encode); 2479 } 2480 2481 void Assembler::pushf() { 2482 emit_byte(0x9C); 2483 } 2484 2485 #ifndef _LP64 // no 32bit push/pop on amd64 2486 void Assembler::pushl(Address src) { 2487 // Note this will push 64bit on 64bit 2488 InstructionMark im(this); 2489 prefix(src); 2490 emit_byte(0xFF); 2491 emit_operand(rsi, src); 2492 } 2493 #endif 2494 2495 void Assembler::rcll(Register dst, int imm8) { 2496 assert(isShiftCount(imm8), "illegal shift count"); 2497 int encode = prefix_and_encode(dst->encoding()); 2498 if (imm8 == 1) { 2499 emit_byte(0xD1); 2500 emit_byte(0xD0 | encode); 2501 } else { 2502 emit_byte(0xC1); 2503 emit_byte(0xD0 | encode); 2504 emit_byte(imm8); 2505 } 2506 } 2507 2508 // copies data from [esi] to [edi] using rcx pointer sized words 2509 // generic 2510 void Assembler::rep_mov() { 2511 emit_byte(0xF3); 2512 // MOVSQ 2513 LP64_ONLY(prefix(REX_W)); 2514 emit_byte(0xA5); 2515 } 2516 2517 // sets rcx pointer sized words with rax, value at [edi] 2518 // generic 2519 void Assembler::rep_set() { // rep_set 2520 emit_byte(0xF3); 2521 // STOSQ 2522 LP64_ONLY(prefix(REX_W)); 2523 emit_byte(0xAB); 2524 } 2525 2526 // scans rcx pointer sized words at [edi] for occurance of rax, 2527 // generic 2528 void Assembler::repne_scan() { // repne_scan 2529 emit_byte(0xF2); 2530 // SCASQ 2531 LP64_ONLY(prefix(REX_W)); 2532 emit_byte(0xAF); 2533 } 2534 2535 #ifdef _LP64 2536 // scans rcx 4 byte words at [edi] for occurance of rax, 2537 // generic 2538 void Assembler::repne_scanl() { // repne_scan 2539 emit_byte(0xF2); 2540 // SCASL 2541 emit_byte(0xAF); 2542 } 2543 #endif 2544 2545 void Assembler::ret(int imm16) { 2546 if (imm16 == 0) { 2547 emit_byte(0xC3); 2548 } else { 2549 emit_byte(0xC2); 2550 emit_word(imm16); 2551 } 2552 } 2553 2554 void Assembler::sahf() { 2555 #ifdef _LP64 2556 // Not supported in 64bit mode 2557 ShouldNotReachHere(); 2558 #endif 2559 emit_byte(0x9E); 2560 } 2561 2562 void Assembler::sarl(Register dst, int imm8) { 2563 int encode = prefix_and_encode(dst->encoding()); 2564 assert(isShiftCount(imm8), "illegal shift count"); 2565 if (imm8 == 1) { 2566 emit_byte(0xD1); 2567 emit_byte(0xF8 | encode); 2568 } else { 2569 emit_byte(0xC1); 2570 emit_byte(0xF8 | encode); 2571 emit_byte(imm8); 2572 } 2573 } 2574 2575 void Assembler::sarl(Register dst) { 2576 int encode = prefix_and_encode(dst->encoding()); 2577 emit_byte(0xD3); 2578 emit_byte(0xF8 | encode); 2579 } 2580 2581 void Assembler::sbbl(Address dst, int32_t imm32) { 2582 InstructionMark im(this); 2583 prefix(dst); 2584 emit_arith_operand(0x81, rbx, dst, imm32); 2585 } 2586 2587 void Assembler::sbbl(Register dst, int32_t imm32) { 2588 prefix(dst); 2589 emit_arith(0x81, 0xD8, dst, imm32); 2590 } 2591 2592 2593 void Assembler::sbbl(Register dst, Address src) { 2594 InstructionMark im(this); 2595 prefix(src, dst); 2596 emit_byte(0x1B); 2597 emit_operand(dst, src); 2598 } 2599 2600 void Assembler::sbbl(Register dst, Register src) { 2601 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2602 emit_arith(0x1B, 0xC0, dst, src); 2603 } 2604 2605 void Assembler::setb(Condition cc, Register dst) { 2606 assert(0 <= cc && cc < 16, "illegal cc"); 2607 int encode = prefix_and_encode(dst->encoding(), true); 2608 emit_byte(0x0F); 2609 emit_byte(0x90 | cc); 2610 emit_byte(0xC0 | encode); 2611 } 2612 2613 void Assembler::shll(Register dst, int imm8) { 2614 assert(isShiftCount(imm8), "illegal shift count"); 2615 int encode = prefix_and_encode(dst->encoding()); 2616 if (imm8 == 1 ) { 2617 emit_byte(0xD1); 2618 emit_byte(0xE0 | encode); 2619 } else { 2620 emit_byte(0xC1); 2621 emit_byte(0xE0 | encode); 2622 emit_byte(imm8); 2623 } 2624 } 2625 2626 void Assembler::shll(Register dst) { 2627 int encode = prefix_and_encode(dst->encoding()); 2628 emit_byte(0xD3); 2629 emit_byte(0xE0 | encode); 2630 } 2631 2632 void Assembler::shrl(Register dst, int imm8) { 2633 assert(isShiftCount(imm8), "illegal shift count"); 2634 int encode = prefix_and_encode(dst->encoding()); 2635 emit_byte(0xC1); 2636 emit_byte(0xE8 | encode); 2637 emit_byte(imm8); 2638 } 2639 2640 void Assembler::shrl(Register dst) { 2641 int encode = prefix_and_encode(dst->encoding()); 2642 emit_byte(0xD3); 2643 emit_byte(0xE8 | encode); 2644 } 2645 2646 // copies a single word from [esi] to [edi] 2647 void Assembler::smovl() { 2648 emit_byte(0xA5); 2649 } 2650 2651 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { 2652 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2653 emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); 2654 } 2655 2656 void Assembler::sqrtsd(XMMRegister dst, Address src) { 2657 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2658 emit_simd_arith(0x51, dst, src, VEX_SIMD_F2); 2659 } 2660 2661 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { 2662 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2663 emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); 2664 } 2665 2666 void Assembler::sqrtss(XMMRegister dst, Address src) { 2667 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2668 emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); 2669 } 2670 2671 void Assembler::stmxcsr( Address dst) { 2672 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2673 InstructionMark im(this); 2674 prefix(dst); 2675 emit_byte(0x0F); 2676 emit_byte(0xAE); 2677 emit_operand(as_Register(3), dst); 2678 } 2679 2680 void Assembler::subl(Address dst, int32_t imm32) { 2681 InstructionMark im(this); 2682 prefix(dst); 2683 emit_arith_operand(0x81, rbp, dst, imm32); 2684 } 2685 2686 void Assembler::subl(Address dst, Register src) { 2687 InstructionMark im(this); 2688 prefix(dst, src); 2689 emit_byte(0x29); 2690 emit_operand(src, dst); 2691 } 2692 2693 void Assembler::subl(Register dst, int32_t imm32) { 2694 prefix(dst); 2695 emit_arith(0x81, 0xE8, dst, imm32); 2696 } 2697 2698 // Force generation of a 4 byte immediate value even if it fits into 8bit 2699 void Assembler::subl_imm32(Register dst, int32_t imm32) { 2700 prefix(dst); 2701 emit_arith_imm32(0x81, 0xE8, dst, imm32); 2702 } 2703 2704 void Assembler::subl(Register dst, Address src) { 2705 InstructionMark im(this); 2706 prefix(src, dst); 2707 emit_byte(0x2B); 2708 emit_operand(dst, src); 2709 } 2710 2711 void Assembler::subl(Register dst, Register src) { 2712 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2713 emit_arith(0x2B, 0xC0, dst, src); 2714 } 2715 2716 void Assembler::subsd(XMMRegister dst, XMMRegister src) { 2717 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2718 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); 2719 } 2720 2721 void Assembler::subsd(XMMRegister dst, Address src) { 2722 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2723 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2); 2724 } 2725 2726 void Assembler::subss(XMMRegister dst, XMMRegister src) { 2727 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2728 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); 2729 } 2730 2731 void Assembler::subss(XMMRegister dst, Address src) { 2732 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2733 emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3); 2734 } 2735 2736 void Assembler::testb(Register dst, int imm8) { 2737 NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); 2738 (void) prefix_and_encode(dst->encoding(), true); 2739 emit_arith_b(0xF6, 0xC0, dst, imm8); 2740 } 2741 2742 void Assembler::testl(Register dst, int32_t imm32) { 2743 // not using emit_arith because test 2744 // doesn't support sign-extension of 2745 // 8bit operands 2746 int encode = dst->encoding(); 2747 if (encode == 0) { 2748 emit_byte(0xA9); 2749 } else { 2750 encode = prefix_and_encode(encode); 2751 emit_byte(0xF7); 2752 emit_byte(0xC0 | encode); 2753 } 2754 emit_long(imm32); 2755 } 2756 2757 void Assembler::testl(Register dst, Register src) { 2758 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2759 emit_arith(0x85, 0xC0, dst, src); 2760 } 2761 2762 void Assembler::testl(Register dst, Address src) { 2763 InstructionMark im(this); 2764 prefix(src, dst); 2765 emit_byte(0x85); 2766 emit_operand(dst, src); 2767 } 2768 2769 void Assembler::ucomisd(XMMRegister dst, Address src) { 2770 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2771 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); 2772 } 2773 2774 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { 2775 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2776 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66); 2777 } 2778 2779 void Assembler::ucomiss(XMMRegister dst, Address src) { 2780 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2781 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE); 2782 } 2783 2784 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { 2785 NOT_LP64(assert(VM_Version::supports_sse(), "")); 2786 emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE); 2787 } 2788 2789 2790 void Assembler::xaddl(Address dst, Register src) { 2791 InstructionMark im(this); 2792 prefix(dst, src); 2793 emit_byte(0x0F); 2794 emit_byte(0xC1); 2795 emit_operand(src, dst); 2796 } 2797 2798 void Assembler::xchgl(Register dst, Address src) { // xchg 2799 InstructionMark im(this); 2800 prefix(src, dst); 2801 emit_byte(0x87); 2802 emit_operand(dst, src); 2803 } 2804 2805 void Assembler::xchgl(Register dst, Register src) { 2806 int encode = prefix_and_encode(dst->encoding(), src->encoding()); 2807 emit_byte(0x87); 2808 emit_byte(0xc0 | encode); 2809 } 2810 2811 void Assembler::xorl(Register dst, int32_t imm32) { 2812 prefix(dst); 2813 emit_arith(0x81, 0xF0, dst, imm32); 2814 } 2815 2816 void Assembler::xorl(Register dst, Address src) { 2817 InstructionMark im(this); 2818 prefix(src, dst); 2819 emit_byte(0x33); 2820 emit_operand(dst, src); 2821 } 2822 2823 void Assembler::xorl(Register dst, Register src) { 2824 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2825 emit_arith(0x33, 0xC0, dst, src); 2826 } 2827 2828 2829 // AVX 3-operands scalar float-point arithmetic instructions 2830 2831 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { 2832 assert(VM_Version::supports_avx(), ""); 2833 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2834 } 2835 2836 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2837 assert(VM_Version::supports_avx(), ""); 2838 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2839 } 2840 2841 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { 2842 assert(VM_Version::supports_avx(), ""); 2843 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2844 } 2845 2846 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2847 assert(VM_Version::supports_avx(), ""); 2848 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2849 } 2850 2851 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { 2852 assert(VM_Version::supports_avx(), ""); 2853 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2854 } 2855 2856 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2857 assert(VM_Version::supports_avx(), ""); 2858 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2859 } 2860 2861 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { 2862 assert(VM_Version::supports_avx(), ""); 2863 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2864 } 2865 2866 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2867 assert(VM_Version::supports_avx(), ""); 2868 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2869 } 2870 2871 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { 2872 assert(VM_Version::supports_avx(), ""); 2873 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2874 } 2875 2876 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2877 assert(VM_Version::supports_avx(), ""); 2878 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2879 } 2880 2881 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { 2882 assert(VM_Version::supports_avx(), ""); 2883 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2884 } 2885 2886 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2887 assert(VM_Version::supports_avx(), ""); 2888 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2889 } 2890 2891 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { 2892 assert(VM_Version::supports_avx(), ""); 2893 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2894 } 2895 2896 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2897 assert(VM_Version::supports_avx(), ""); 2898 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false); 2899 } 2900 2901 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { 2902 assert(VM_Version::supports_avx(), ""); 2903 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2904 } 2905 2906 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { 2907 assert(VM_Version::supports_avx(), ""); 2908 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false); 2909 } 2910 2911 //====================VECTOR ARITHMETIC===================================== 2912 2913 // Float-point vector arithmetic 2914 2915 void Assembler::addpd(XMMRegister dst, XMMRegister src) { 2916 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2917 emit_simd_arith(0x58, dst, src, VEX_SIMD_66); 2918 } 2919 2920 void Assembler::addps(XMMRegister dst, XMMRegister src) { 2921 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2922 emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE); 2923 } 2924 2925 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2926 assert(VM_Version::supports_avx(), ""); 2927 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256); 2928 } 2929 2930 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2931 assert(VM_Version::supports_avx(), ""); 2932 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256); 2933 } 2934 2935 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2936 assert(VM_Version::supports_avx(), ""); 2937 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256); 2938 } 2939 2940 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2941 assert(VM_Version::supports_avx(), ""); 2942 emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256); 2943 } 2944 2945 void Assembler::subpd(XMMRegister dst, XMMRegister src) { 2946 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2947 emit_simd_arith(0x5C, dst, src, VEX_SIMD_66); 2948 } 2949 2950 void Assembler::subps(XMMRegister dst, XMMRegister src) { 2951 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2952 emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE); 2953 } 2954 2955 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2956 assert(VM_Version::supports_avx(), ""); 2957 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256); 2958 } 2959 2960 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2961 assert(VM_Version::supports_avx(), ""); 2962 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256); 2963 } 2964 2965 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2966 assert(VM_Version::supports_avx(), ""); 2967 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256); 2968 } 2969 2970 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2971 assert(VM_Version::supports_avx(), ""); 2972 emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256); 2973 } 2974 2975 void Assembler::mulpd(XMMRegister dst, XMMRegister src) { 2976 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2977 emit_simd_arith(0x59, dst, src, VEX_SIMD_66); 2978 } 2979 2980 void Assembler::mulps(XMMRegister dst, XMMRegister src) { 2981 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2982 emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE); 2983 } 2984 2985 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2986 assert(VM_Version::supports_avx(), ""); 2987 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256); 2988 } 2989 2990 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 2991 assert(VM_Version::supports_avx(), ""); 2992 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256); 2993 } 2994 2995 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 2996 assert(VM_Version::supports_avx(), ""); 2997 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256); 2998 } 2999 3000 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3001 assert(VM_Version::supports_avx(), ""); 3002 emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256); 3003 } 3004 3005 void Assembler::divpd(XMMRegister dst, XMMRegister src) { 3006 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3007 emit_simd_arith(0x5E, dst, src, VEX_SIMD_66); 3008 } 3009 3010 void Assembler::divps(XMMRegister dst, XMMRegister src) { 3011 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3012 emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE); 3013 } 3014 3015 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3016 assert(VM_Version::supports_avx(), ""); 3017 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256); 3018 } 3019 3020 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3021 assert(VM_Version::supports_avx(), ""); 3022 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256); 3023 } 3024 3025 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3026 assert(VM_Version::supports_avx(), ""); 3027 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256); 3028 } 3029 3030 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3031 assert(VM_Version::supports_avx(), ""); 3032 emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256); 3033 } 3034 3035 void Assembler::andpd(XMMRegister dst, XMMRegister src) { 3036 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3037 emit_simd_arith(0x54, dst, src, VEX_SIMD_66); 3038 } 3039 3040 void Assembler::andps(XMMRegister dst, XMMRegister src) { 3041 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3042 emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE); 3043 } 3044 3045 void Assembler::andps(XMMRegister dst, Address src) { 3046 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3047 emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE); 3048 } 3049 3050 void Assembler::andpd(XMMRegister dst, Address src) { 3051 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3052 emit_simd_arith(0x54, dst, src, VEX_SIMD_66); 3053 } 3054 3055 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3056 assert(VM_Version::supports_avx(), ""); 3057 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256); 3058 } 3059 3060 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3061 assert(VM_Version::supports_avx(), ""); 3062 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256); 3063 } 3064 3065 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3066 assert(VM_Version::supports_avx(), ""); 3067 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256); 3068 } 3069 3070 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3071 assert(VM_Version::supports_avx(), ""); 3072 emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256); 3073 } 3074 3075 void Assembler::xorpd(XMMRegister dst, XMMRegister src) { 3076 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3077 emit_simd_arith(0x57, dst, src, VEX_SIMD_66); 3078 } 3079 3080 void Assembler::xorps(XMMRegister dst, XMMRegister src) { 3081 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3082 emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE); 3083 } 3084 3085 void Assembler::xorpd(XMMRegister dst, Address src) { 3086 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3087 emit_simd_arith(0x57, dst, src, VEX_SIMD_66); 3088 } 3089 3090 void Assembler::xorps(XMMRegister dst, Address src) { 3091 NOT_LP64(assert(VM_Version::supports_sse(), "")); 3092 emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE); 3093 } 3094 3095 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3096 assert(VM_Version::supports_avx(), ""); 3097 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256); 3098 } 3099 3100 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3101 assert(VM_Version::supports_avx(), ""); 3102 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256); 3103 } 3104 3105 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3106 assert(VM_Version::supports_avx(), ""); 3107 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256); 3108 } 3109 3110 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3111 assert(VM_Version::supports_avx(), ""); 3112 emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256); 3113 } 3114 3115 3116 // Integer vector arithmetic 3117 void Assembler::paddb(XMMRegister dst, XMMRegister src) { 3118 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3119 emit_simd_arith(0xFC, dst, src, VEX_SIMD_66); 3120 } 3121 3122 void Assembler::paddw(XMMRegister dst, XMMRegister src) { 3123 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3124 emit_simd_arith(0xFD, dst, src, VEX_SIMD_66); 3125 } 3126 3127 void Assembler::paddd(XMMRegister dst, XMMRegister src) { 3128 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3129 emit_simd_arith(0xFE, dst, src, VEX_SIMD_66); 3130 } 3131 3132 void Assembler::paddq(XMMRegister dst, XMMRegister src) { 3133 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3134 emit_simd_arith(0xD4, dst, src, VEX_SIMD_66); 3135 } 3136 3137 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3138 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3139 emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256); 3140 } 3141 3142 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3143 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3144 emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256); 3145 } 3146 3147 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3148 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3149 emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256); 3150 } 3151 3152 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3153 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3154 emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256); 3155 } 3156 3157 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3158 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3159 emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256); 3160 } 3161 3162 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3163 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3164 emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256); 3165 } 3166 3167 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3168 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3169 emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256); 3170 } 3171 3172 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3173 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3174 emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256); 3175 } 3176 3177 void Assembler::psubb(XMMRegister dst, XMMRegister src) { 3178 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3179 emit_simd_arith(0xF8, dst, src, VEX_SIMD_66); 3180 } 3181 3182 void Assembler::psubw(XMMRegister dst, XMMRegister src) { 3183 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3184 emit_simd_arith(0xF9, dst, src, VEX_SIMD_66); 3185 } 3186 3187 void Assembler::psubd(XMMRegister dst, XMMRegister src) { 3188 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3189 emit_simd_arith(0xFA, dst, src, VEX_SIMD_66); 3190 } 3191 3192 void Assembler::psubq(XMMRegister dst, XMMRegister src) { 3193 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3194 emit_simd_arith(0xFB, dst, src, VEX_SIMD_66); 3195 } 3196 3197 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3198 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3199 emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256); 3200 } 3201 3202 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3203 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3204 emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256); 3205 } 3206 3207 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3208 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3209 emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256); 3210 } 3211 3212 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3213 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3214 emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256); 3215 } 3216 3217 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3218 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3219 emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256); 3220 } 3221 3222 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3223 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3224 emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256); 3225 } 3226 3227 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3228 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3229 emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256); 3230 } 3231 3232 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3233 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3234 emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256); 3235 } 3236 3237 void Assembler::pmullw(XMMRegister dst, XMMRegister src) { 3238 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3239 emit_simd_arith(0xD5, dst, src, VEX_SIMD_66); 3240 } 3241 3242 void Assembler::pmulld(XMMRegister dst, XMMRegister src) { 3243 assert(VM_Version::supports_sse4_1(), ""); 3244 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); 3245 emit_byte(0x40); 3246 emit_byte(0xC0 | encode); 3247 } 3248 3249 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3250 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3251 emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256); 3252 } 3253 3254 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3255 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3256 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38); 3257 emit_byte(0x40); 3258 emit_byte(0xC0 | encode); 3259 } 3260 3261 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3262 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3263 emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256); 3264 } 3265 3266 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3267 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3268 InstructionMark im(this); 3269 int dst_enc = dst->encoding(); 3270 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 3271 vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256); 3272 emit_byte(0x40); 3273 emit_operand(dst, src); 3274 } 3275 3276 // Shift packed integers left by specified number of bits. 3277 void Assembler::psllw(XMMRegister dst, int shift) { 3278 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3279 // XMM6 is for /6 encoding: 66 0F 71 /6 ib 3280 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); 3281 emit_byte(0x71); 3282 emit_byte(0xC0 | encode); 3283 emit_byte(shift & 0xFF); 3284 } 3285 3286 void Assembler::pslld(XMMRegister dst, int shift) { 3287 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3288 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 3289 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); 3290 emit_byte(0x72); 3291 emit_byte(0xC0 | encode); 3292 emit_byte(shift & 0xFF); 3293 } 3294 3295 void Assembler::psllq(XMMRegister dst, int shift) { 3296 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3297 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 3298 int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66); 3299 emit_byte(0x73); 3300 emit_byte(0xC0 | encode); 3301 emit_byte(shift & 0xFF); 3302 } 3303 3304 void Assembler::psllw(XMMRegister dst, XMMRegister shift) { 3305 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3306 emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66); 3307 } 3308 3309 void Assembler::pslld(XMMRegister dst, XMMRegister shift) { 3310 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3311 emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66); 3312 } 3313 3314 void Assembler::psllq(XMMRegister dst, XMMRegister shift) { 3315 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3316 emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66); 3317 } 3318 3319 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3320 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3321 // XMM6 is for /6 encoding: 66 0F 71 /6 ib 3322 emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256); 3323 emit_byte(shift & 0xFF); 3324 } 3325 3326 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3327 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3328 // XMM6 is for /6 encoding: 66 0F 72 /6 ib 3329 emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256); 3330 emit_byte(shift & 0xFF); 3331 } 3332 3333 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3334 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3335 // XMM6 is for /6 encoding: 66 0F 73 /6 ib 3336 emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256); 3337 emit_byte(shift & 0xFF); 3338 } 3339 3340 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3341 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3342 emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256); 3343 } 3344 3345 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3346 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3347 emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256); 3348 } 3349 3350 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3351 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3352 emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256); 3353 } 3354 3355 // Shift packed integers logically right by specified number of bits. 3356 void Assembler::psrlw(XMMRegister dst, int shift) { 3357 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3358 // XMM2 is for /2 encoding: 66 0F 71 /2 ib 3359 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 3360 emit_byte(0x71); 3361 emit_byte(0xC0 | encode); 3362 emit_byte(shift & 0xFF); 3363 } 3364 3365 void Assembler::psrld(XMMRegister dst, int shift) { 3366 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3367 // XMM2 is for /2 encoding: 66 0F 72 /2 ib 3368 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 3369 emit_byte(0x72); 3370 emit_byte(0xC0 | encode); 3371 emit_byte(shift & 0xFF); 3372 } 3373 3374 void Assembler::psrlq(XMMRegister dst, int shift) { 3375 // Do not confuse it with psrldq SSE2 instruction which 3376 // shifts 128 bit value in xmm register by number of bytes. 3377 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3378 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3379 int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); 3380 emit_byte(0x73); 3381 emit_byte(0xC0 | encode); 3382 emit_byte(shift & 0xFF); 3383 } 3384 3385 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) { 3386 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3387 emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66); 3388 } 3389 3390 void Assembler::psrld(XMMRegister dst, XMMRegister shift) { 3391 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3392 emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66); 3393 } 3394 3395 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) { 3396 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3397 emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66); 3398 } 3399 3400 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3401 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3402 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3403 emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256); 3404 emit_byte(shift & 0xFF); 3405 } 3406 3407 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3408 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3409 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3410 emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256); 3411 emit_byte(shift & 0xFF); 3412 } 3413 3414 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3415 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3416 // XMM2 is for /2 encoding: 66 0F 73 /2 ib 3417 emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256); 3418 emit_byte(shift & 0xFF); 3419 } 3420 3421 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3422 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3423 emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256); 3424 } 3425 3426 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3427 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3428 emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256); 3429 } 3430 3431 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3432 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3433 emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256); 3434 } 3435 3436 // Shift packed integers arithmetically right by specified number of bits. 3437 void Assembler::psraw(XMMRegister dst, int shift) { 3438 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3439 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 3440 int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66); 3441 emit_byte(0x71); 3442 emit_byte(0xC0 | encode); 3443 emit_byte(shift & 0xFF); 3444 } 3445 3446 void Assembler::psrad(XMMRegister dst, int shift) { 3447 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3448 // XMM4 is for /4 encoding: 66 0F 72 /4 ib 3449 int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66); 3450 emit_byte(0x72); 3451 emit_byte(0xC0 | encode); 3452 emit_byte(shift & 0xFF); 3453 } 3454 3455 void Assembler::psraw(XMMRegister dst, XMMRegister shift) { 3456 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3457 emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66); 3458 } 3459 3460 void Assembler::psrad(XMMRegister dst, XMMRegister shift) { 3461 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3462 emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66); 3463 } 3464 3465 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3466 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3467 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 3468 emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256); 3469 emit_byte(shift & 0xFF); 3470 } 3471 3472 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) { 3473 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3474 // XMM4 is for /4 encoding: 66 0F 71 /4 ib 3475 emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256); 3476 emit_byte(shift & 0xFF); 3477 } 3478 3479 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3480 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3481 emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256); 3482 } 3483 3484 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) { 3485 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3486 emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256); 3487 } 3488 3489 3490 // AND packed integers 3491 void Assembler::pand(XMMRegister dst, XMMRegister src) { 3492 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3493 emit_simd_arith(0xDB, dst, src, VEX_SIMD_66); 3494 } 3495 3496 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3497 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3498 emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256); 3499 } 3500 3501 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3502 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3503 emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256); 3504 } 3505 3506 void Assembler::por(XMMRegister dst, XMMRegister src) { 3507 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3508 emit_simd_arith(0xEB, dst, src, VEX_SIMD_66); 3509 } 3510 3511 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3512 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3513 emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256); 3514 } 3515 3516 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3517 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3518 emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256); 3519 } 3520 3521 void Assembler::pxor(XMMRegister dst, XMMRegister src) { 3522 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 3523 emit_simd_arith(0xEF, dst, src, VEX_SIMD_66); 3524 } 3525 3526 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { 3527 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3528 emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256); 3529 } 3530 3531 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { 3532 assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2"); 3533 emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256); 3534 } 3535 3536 3537 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3538 assert(VM_Version::supports_avx(), ""); 3539 bool vector256 = true; 3540 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); 3541 emit_byte(0x18); 3542 emit_byte(0xC0 | encode); 3543 // 0x00 - insert into lower 128 bits 3544 // 0x01 - insert into upper 128 bits 3545 emit_byte(0x01); 3546 } 3547 3548 void Assembler::vinsertf128h(XMMRegister dst, Address src) { 3549 assert(VM_Version::supports_avx(), ""); 3550 InstructionMark im(this); 3551 bool vector256 = true; 3552 assert(dst != xnoreg, "sanity"); 3553 int dst_enc = dst->encoding(); 3554 // swap src<->dst for encoding 3555 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); 3556 emit_byte(0x18); 3557 emit_operand(dst, src); 3558 // 0x01 - insert into upper 128 bits 3559 emit_byte(0x01); 3560 } 3561 3562 void Assembler::vextractf128h(Address dst, XMMRegister src) { 3563 assert(VM_Version::supports_avx(), ""); 3564 InstructionMark im(this); 3565 bool vector256 = true; 3566 assert(src != xnoreg, "sanity"); 3567 int src_enc = src->encoding(); 3568 vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); 3569 emit_byte(0x19); 3570 emit_operand(src, dst); 3571 // 0x01 - extract from upper 128 bits 3572 emit_byte(0x01); 3573 } 3574 3575 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { 3576 assert(VM_Version::supports_avx2(), ""); 3577 bool vector256 = true; 3578 int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A); 3579 emit_byte(0x38); 3580 emit_byte(0xC0 | encode); 3581 // 0x00 - insert into lower 128 bits 3582 // 0x01 - insert into upper 128 bits 3583 emit_byte(0x01); 3584 } 3585 3586 void Assembler::vinserti128h(XMMRegister dst, Address src) { 3587 assert(VM_Version::supports_avx2(), ""); 3588 InstructionMark im(this); 3589 bool vector256 = true; 3590 assert(dst != xnoreg, "sanity"); 3591 int dst_enc = dst->encoding(); 3592 // swap src<->dst for encoding 3593 vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); 3594 emit_byte(0x38); 3595 emit_operand(dst, src); 3596 // 0x01 - insert into upper 128 bits 3597 emit_byte(0x01); 3598 } 3599 3600 void Assembler::vextracti128h(Address dst, XMMRegister src) { 3601 assert(VM_Version::supports_avx2(), ""); 3602 InstructionMark im(this); 3603 bool vector256 = true; 3604 assert(src != xnoreg, "sanity"); 3605 int src_enc = src->encoding(); 3606 vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); 3607 emit_byte(0x39); 3608 emit_operand(src, dst); 3609 // 0x01 - extract from upper 128 bits 3610 emit_byte(0x01); 3611 } 3612 3613 void Assembler::vzeroupper() { 3614 assert(VM_Version::supports_avx(), ""); 3615 (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE); 3616 emit_byte(0x77); 3617 } 3618 3619 3620 #ifndef _LP64 3621 // 32bit only pieces of the assembler 3622 3623 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { 3624 // NO PREFIX AS NEVER 64BIT 3625 InstructionMark im(this); 3626 emit_byte(0x81); 3627 emit_byte(0xF8 | src1->encoding()); 3628 emit_data(imm32, rspec, 0); 3629 } 3630 3631 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { 3632 // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs 3633 InstructionMark im(this); 3634 emit_byte(0x81); 3635 emit_operand(rdi, src1); 3636 emit_data(imm32, rspec, 0); 3637 } 3638 3639 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax, 3640 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded 3641 // into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. 3642 void Assembler::cmpxchg8(Address adr) { 3643 InstructionMark im(this); 3644 emit_byte(0x0F); 3645 emit_byte(0xc7); 3646 emit_operand(rcx, adr); 3647 } 3648 3649 void Assembler::decl(Register dst) { 3650 // Don't use it directly. Use MacroAssembler::decrementl() instead. 3651 emit_byte(0x48 | dst->encoding()); 3652 } 3653 3654 #endif // _LP64 3655 3656 // 64bit typically doesn't use the x87 but needs to for the trig funcs 3657 3658 void Assembler::fabs() { 3659 emit_byte(0xD9); 3660 emit_byte(0xE1); 3661 } 3662 3663 void Assembler::fadd(int i) { 3664 emit_farith(0xD8, 0xC0, i); 3665 } 3666 3667 void Assembler::fadd_d(Address src) { 3668 InstructionMark im(this); 3669 emit_byte(0xDC); 3670 emit_operand32(rax, src); 3671 } 3672 3673 void Assembler::fadd_s(Address src) { 3674 InstructionMark im(this); 3675 emit_byte(0xD8); 3676 emit_operand32(rax, src); 3677 } 3678 3679 void Assembler::fadda(int i) { 3680 emit_farith(0xDC, 0xC0, i); 3681 } 3682 3683 void Assembler::faddp(int i) { 3684 emit_farith(0xDE, 0xC0, i); 3685 } 3686 3687 void Assembler::fchs() { 3688 emit_byte(0xD9); 3689 emit_byte(0xE0); 3690 } 3691 3692 void Assembler::fcom(int i) { 3693 emit_farith(0xD8, 0xD0, i); 3694 } 3695 3696 void Assembler::fcomp(int i) { 3697 emit_farith(0xD8, 0xD8, i); 3698 } 3699 3700 void Assembler::fcomp_d(Address src) { 3701 InstructionMark im(this); 3702 emit_byte(0xDC); 3703 emit_operand32(rbx, src); 3704 } 3705 3706 void Assembler::fcomp_s(Address src) { 3707 InstructionMark im(this); 3708 emit_byte(0xD8); 3709 emit_operand32(rbx, src); 3710 } 3711 3712 void Assembler::fcompp() { 3713 emit_byte(0xDE); 3714 emit_byte(0xD9); 3715 } 3716 3717 void Assembler::fcos() { 3718 emit_byte(0xD9); 3719 emit_byte(0xFF); 3720 } 3721 3722 void Assembler::fdecstp() { 3723 emit_byte(0xD9); 3724 emit_byte(0xF6); 3725 } 3726 3727 void Assembler::fdiv(int i) { 3728 emit_farith(0xD8, 0xF0, i); 3729 } 3730 3731 void Assembler::fdiv_d(Address src) { 3732 InstructionMark im(this); 3733 emit_byte(0xDC); 3734 emit_operand32(rsi, src); 3735 } 3736 3737 void Assembler::fdiv_s(Address src) { 3738 InstructionMark im(this); 3739 emit_byte(0xD8); 3740 emit_operand32(rsi, src); 3741 } 3742 3743 void Assembler::fdiva(int i) { 3744 emit_farith(0xDC, 0xF8, i); 3745 } 3746 3747 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) 3748 // is erroneous for some of the floating-point instructions below. 3749 3750 void Assembler::fdivp(int i) { 3751 emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) 3752 } 3753 3754 void Assembler::fdivr(int i) { 3755 emit_farith(0xD8, 0xF8, i); 3756 } 3757 3758 void Assembler::fdivr_d(Address src) { 3759 InstructionMark im(this); 3760 emit_byte(0xDC); 3761 emit_operand32(rdi, src); 3762 } 3763 3764 void Assembler::fdivr_s(Address src) { 3765 InstructionMark im(this); 3766 emit_byte(0xD8); 3767 emit_operand32(rdi, src); 3768 } 3769 3770 void Assembler::fdivra(int i) { 3771 emit_farith(0xDC, 0xF0, i); 3772 } 3773 3774 void Assembler::fdivrp(int i) { 3775 emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) 3776 } 3777 3778 void Assembler::ffree(int i) { 3779 emit_farith(0xDD, 0xC0, i); 3780 } 3781 3782 void Assembler::fild_d(Address adr) { 3783 InstructionMark im(this); 3784 emit_byte(0xDF); 3785 emit_operand32(rbp, adr); 3786 } 3787 3788 void Assembler::fild_s(Address adr) { 3789 InstructionMark im(this); 3790 emit_byte(0xDB); 3791 emit_operand32(rax, adr); 3792 } 3793 3794 void Assembler::fincstp() { 3795 emit_byte(0xD9); 3796 emit_byte(0xF7); 3797 } 3798 3799 void Assembler::finit() { 3800 emit_byte(0x9B); 3801 emit_byte(0xDB); 3802 emit_byte(0xE3); 3803 } 3804 3805 void Assembler::fist_s(Address adr) { 3806 InstructionMark im(this); 3807 emit_byte(0xDB); 3808 emit_operand32(rdx, adr); 3809 } 3810 3811 void Assembler::fistp_d(Address adr) { 3812 InstructionMark im(this); 3813 emit_byte(0xDF); 3814 emit_operand32(rdi, adr); 3815 } 3816 3817 void Assembler::fistp_s(Address adr) { 3818 InstructionMark im(this); 3819 emit_byte(0xDB); 3820 emit_operand32(rbx, adr); 3821 } 3822 3823 void Assembler::fld1() { 3824 emit_byte(0xD9); 3825 emit_byte(0xE8); 3826 } 3827 3828 void Assembler::fld_d(Address adr) { 3829 InstructionMark im(this); 3830 emit_byte(0xDD); 3831 emit_operand32(rax, adr); 3832 } 3833 3834 void Assembler::fld_s(Address adr) { 3835 InstructionMark im(this); 3836 emit_byte(0xD9); 3837 emit_operand32(rax, adr); 3838 } 3839 3840 3841 void Assembler::fld_s(int index) { 3842 emit_farith(0xD9, 0xC0, index); 3843 } 3844 3845 void Assembler::fld_x(Address adr) { 3846 InstructionMark im(this); 3847 emit_byte(0xDB); 3848 emit_operand32(rbp, adr); 3849 } 3850 3851 void Assembler::fldcw(Address src) { 3852 InstructionMark im(this); 3853 emit_byte(0xd9); 3854 emit_operand32(rbp, src); 3855 } 3856 3857 void Assembler::fldenv(Address src) { 3858 InstructionMark im(this); 3859 emit_byte(0xD9); 3860 emit_operand32(rsp, src); 3861 } 3862 3863 void Assembler::fldlg2() { 3864 emit_byte(0xD9); 3865 emit_byte(0xEC); 3866 } 3867 3868 void Assembler::fldln2() { 3869 emit_byte(0xD9); 3870 emit_byte(0xED); 3871 } 3872 3873 void Assembler::fldz() { 3874 emit_byte(0xD9); 3875 emit_byte(0xEE); 3876 } 3877 3878 void Assembler::flog() { 3879 fldln2(); 3880 fxch(); 3881 fyl2x(); 3882 } 3883 3884 void Assembler::flog10() { 3885 fldlg2(); 3886 fxch(); 3887 fyl2x(); 3888 } 3889 3890 void Assembler::fmul(int i) { 3891 emit_farith(0xD8, 0xC8, i); 3892 } 3893 3894 void Assembler::fmul_d(Address src) { 3895 InstructionMark im(this); 3896 emit_byte(0xDC); 3897 emit_operand32(rcx, src); 3898 } 3899 3900 void Assembler::fmul_s(Address src) { 3901 InstructionMark im(this); 3902 emit_byte(0xD8); 3903 emit_operand32(rcx, src); 3904 } 3905 3906 void Assembler::fmula(int i) { 3907 emit_farith(0xDC, 0xC8, i); 3908 } 3909 3910 void Assembler::fmulp(int i) { 3911 emit_farith(0xDE, 0xC8, i); 3912 } 3913 3914 void Assembler::fnsave(Address dst) { 3915 InstructionMark im(this); 3916 emit_byte(0xDD); 3917 emit_operand32(rsi, dst); 3918 } 3919 3920 void Assembler::fnstcw(Address src) { 3921 InstructionMark im(this); 3922 emit_byte(0x9B); 3923 emit_byte(0xD9); 3924 emit_operand32(rdi, src); 3925 } 3926 3927 void Assembler::fnstsw_ax() { 3928 emit_byte(0xdF); 3929 emit_byte(0xE0); 3930 } 3931 3932 void Assembler::fprem() { 3933 emit_byte(0xD9); 3934 emit_byte(0xF8); 3935 } 3936 3937 void Assembler::fprem1() { 3938 emit_byte(0xD9); 3939 emit_byte(0xF5); 3940 } 3941 3942 void Assembler::frstor(Address src) { 3943 InstructionMark im(this); 3944 emit_byte(0xDD); 3945 emit_operand32(rsp, src); 3946 } 3947 3948 void Assembler::fsin() { 3949 emit_byte(0xD9); 3950 emit_byte(0xFE); 3951 } 3952 3953 void Assembler::fsqrt() { 3954 emit_byte(0xD9); 3955 emit_byte(0xFA); 3956 } 3957 3958 void Assembler::fst_d(Address adr) { 3959 InstructionMark im(this); 3960 emit_byte(0xDD); 3961 emit_operand32(rdx, adr); 3962 } 3963 3964 void Assembler::fst_s(Address adr) { 3965 InstructionMark im(this); 3966 emit_byte(0xD9); 3967 emit_operand32(rdx, adr); 3968 } 3969 3970 void Assembler::fstp_d(Address adr) { 3971 InstructionMark im(this); 3972 emit_byte(0xDD); 3973 emit_operand32(rbx, adr); 3974 } 3975 3976 void Assembler::fstp_d(int index) { 3977 emit_farith(0xDD, 0xD8, index); 3978 } 3979 3980 void Assembler::fstp_s(Address adr) { 3981 InstructionMark im(this); 3982 emit_byte(0xD9); 3983 emit_operand32(rbx, adr); 3984 } 3985 3986 void Assembler::fstp_x(Address adr) { 3987 InstructionMark im(this); 3988 emit_byte(0xDB); 3989 emit_operand32(rdi, adr); 3990 } 3991 3992 void Assembler::fsub(int i) { 3993 emit_farith(0xD8, 0xE0, i); 3994 } 3995 3996 void Assembler::fsub_d(Address src) { 3997 InstructionMark im(this); 3998 emit_byte(0xDC); 3999 emit_operand32(rsp, src); 4000 } 4001 4002 void Assembler::fsub_s(Address src) { 4003 InstructionMark im(this); 4004 emit_byte(0xD8); 4005 emit_operand32(rsp, src); 4006 } 4007 4008 void Assembler::fsuba(int i) { 4009 emit_farith(0xDC, 0xE8, i); 4010 } 4011 4012 void Assembler::fsubp(int i) { 4013 emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) 4014 } 4015 4016 void Assembler::fsubr(int i) { 4017 emit_farith(0xD8, 0xE8, i); 4018 } 4019 4020 void Assembler::fsubr_d(Address src) { 4021 InstructionMark im(this); 4022 emit_byte(0xDC); 4023 emit_operand32(rbp, src); 4024 } 4025 4026 void Assembler::fsubr_s(Address src) { 4027 InstructionMark im(this); 4028 emit_byte(0xD8); 4029 emit_operand32(rbp, src); 4030 } 4031 4032 void Assembler::fsubra(int i) { 4033 emit_farith(0xDC, 0xE0, i); 4034 } 4035 4036 void Assembler::fsubrp(int i) { 4037 emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) 4038 } 4039 4040 void Assembler::ftan() { 4041 emit_byte(0xD9); 4042 emit_byte(0xF2); 4043 emit_byte(0xDD); 4044 emit_byte(0xD8); 4045 } 4046 4047 void Assembler::ftst() { 4048 emit_byte(0xD9); 4049 emit_byte(0xE4); 4050 } 4051 4052 void Assembler::fucomi(int i) { 4053 // make sure the instruction is supported (introduced for P6, together with cmov) 4054 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 4055 emit_farith(0xDB, 0xE8, i); 4056 } 4057 4058 void Assembler::fucomip(int i) { 4059 // make sure the instruction is supported (introduced for P6, together with cmov) 4060 guarantee(VM_Version::supports_cmov(), "illegal instruction"); 4061 emit_farith(0xDF, 0xE8, i); 4062 } 4063 4064 void Assembler::fwait() { 4065 emit_byte(0x9B); 4066 } 4067 4068 void Assembler::fxch(int i) { 4069 emit_farith(0xD9, 0xC8, i); 4070 } 4071 4072 void Assembler::fyl2x() { 4073 emit_byte(0xD9); 4074 emit_byte(0xF1); 4075 } 4076 4077 void Assembler::frndint() { 4078 emit_byte(0xD9); 4079 emit_byte(0xFC); 4080 } 4081 4082 void Assembler::f2xm1() { 4083 emit_byte(0xD9); 4084 emit_byte(0xF0); 4085 } 4086 4087 void Assembler::fldl2e() { 4088 emit_byte(0xD9); 4089 emit_byte(0xEA); 4090 } 4091 4092 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. 4093 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; 4094 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding. 4095 static int simd_opc[4] = { 0, 0, 0x38, 0x3A }; 4096 4097 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding. 4098 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 4099 if (pre > 0) { 4100 emit_byte(simd_pre[pre]); 4101 } 4102 if (rex_w) { 4103 prefixq(adr, xreg); 4104 } else { 4105 prefix(adr, xreg); 4106 } 4107 if (opc > 0) { 4108 emit_byte(0x0F); 4109 int opc2 = simd_opc[opc]; 4110 if (opc2 > 0) { 4111 emit_byte(opc2); 4112 } 4113 } 4114 } 4115 4116 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { 4117 if (pre > 0) { 4118 emit_byte(simd_pre[pre]); 4119 } 4120 int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : 4121 prefix_and_encode(dst_enc, src_enc); 4122 if (opc > 0) { 4123 emit_byte(0x0F); 4124 int opc2 = simd_opc[opc]; 4125 if (opc2 > 0) { 4126 emit_byte(opc2); 4127 } 4128 } 4129 return encode; 4130 } 4131 4132 4133 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) { 4134 if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { 4135 prefix(VEX_3bytes); 4136 4137 int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0); 4138 byte1 = (~byte1) & 0xE0; 4139 byte1 |= opc; 4140 a_byte(byte1); 4141 4142 int byte2 = ((~nds_enc) & 0xf) << 3; 4143 byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre; 4144 emit_byte(byte2); 4145 } else { 4146 prefix(VEX_2bytes); 4147 4148 int byte1 = vex_r ? VEX_R : 0; 4149 byte1 = (~byte1) & 0x80; 4150 byte1 |= ((~nds_enc) & 0xf) << 3; 4151 byte1 |= (vector256 ? 4 : 0) | pre; 4152 emit_byte(byte1); 4153 } 4154 } 4155 4156 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){ 4157 bool vex_r = (xreg_enc >= 8); 4158 bool vex_b = adr.base_needs_rex(); 4159 bool vex_x = adr.index_needs_rex(); 4160 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 4161 } 4162 4163 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) { 4164 bool vex_r = (dst_enc >= 8); 4165 bool vex_b = (src_enc >= 8); 4166 bool vex_x = false; 4167 vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); 4168 return (((dst_enc & 7) << 3) | (src_enc & 7)); 4169 } 4170 4171 4172 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 4173 if (UseAVX > 0) { 4174 int xreg_enc = xreg->encoding(); 4175 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 4176 vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256); 4177 } else { 4178 assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); 4179 rex_prefix(adr, xreg, pre, opc, rex_w); 4180 } 4181 } 4182 4183 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { 4184 int dst_enc = dst->encoding(); 4185 int src_enc = src->encoding(); 4186 if (UseAVX > 0) { 4187 int nds_enc = nds->is_valid() ? nds->encoding() : 0; 4188 return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256); 4189 } else { 4190 assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); 4191 return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); 4192 } 4193 } 4194 4195 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) { 4196 InstructionMark im(this); 4197 simd_prefix(dst, dst, src, pre); 4198 emit_byte(opcode); 4199 emit_operand(dst, src); 4200 } 4201 4202 void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) { 4203 int encode = simd_prefix_and_encode(dst, dst, src, pre); 4204 emit_byte(opcode); 4205 emit_byte(0xC0 | encode); 4206 } 4207 4208 // Versions with no second source register (non-destructive source). 4209 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) { 4210 InstructionMark im(this); 4211 simd_prefix(dst, xnoreg, src, pre); 4212 emit_byte(opcode); 4213 emit_operand(dst, src); 4214 } 4215 4216 void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) { 4217 int encode = simd_prefix_and_encode(dst, xnoreg, src, pre); 4218 emit_byte(opcode); 4219 emit_byte(0xC0 | encode); 4220 } 4221 4222 // 3-operands AVX instructions 4223 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, 4224 Address src, VexSimdPrefix pre, bool vector256) { 4225 InstructionMark im(this); 4226 vex_prefix(dst, nds, src, pre, vector256); 4227 emit_byte(opcode); 4228 emit_operand(dst, src); 4229 } 4230 4231 void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, 4232 XMMRegister src, VexSimdPrefix pre, bool vector256) { 4233 int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256); 4234 emit_byte(opcode); 4235 emit_byte(0xC0 | encode); 4236 } 4237 4238 #ifndef _LP64 4239 4240 void Assembler::incl(Register dst) { 4241 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4242 emit_byte(0x40 | dst->encoding()); 4243 } 4244 4245 void Assembler::lea(Register dst, Address src) { 4246 leal(dst, src); 4247 } 4248 4249 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4250 InstructionMark im(this); 4251 emit_byte(0xC7); 4252 emit_operand(rax, dst); 4253 emit_data((int)imm32, rspec, 0); 4254 } 4255 4256 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4257 InstructionMark im(this); 4258 int encode = prefix_and_encode(dst->encoding()); 4259 emit_byte(0xB8 | encode); 4260 emit_data((int)imm32, rspec, 0); 4261 } 4262 4263 void Assembler::popa() { // 32bit 4264 emit_byte(0x61); 4265 } 4266 4267 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { 4268 InstructionMark im(this); 4269 emit_byte(0x68); 4270 emit_data(imm32, rspec, 0); 4271 } 4272 4273 void Assembler::pusha() { // 32bit 4274 emit_byte(0x60); 4275 } 4276 4277 void Assembler::set_byte_if_not_zero(Register dst) { 4278 emit_byte(0x0F); 4279 emit_byte(0x95); 4280 emit_byte(0xE0 | dst->encoding()); 4281 } 4282 4283 void Assembler::shldl(Register dst, Register src) { 4284 emit_byte(0x0F); 4285 emit_byte(0xA5); 4286 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 4287 } 4288 4289 void Assembler::shrdl(Register dst, Register src) { 4290 emit_byte(0x0F); 4291 emit_byte(0xAD); 4292 emit_byte(0xC0 | src->encoding() << 3 | dst->encoding()); 4293 } 4294 4295 #else // LP64 4296 4297 void Assembler::set_byte_if_not_zero(Register dst) { 4298 int enc = prefix_and_encode(dst->encoding(), true); 4299 emit_byte(0x0F); 4300 emit_byte(0x95); 4301 emit_byte(0xE0 | enc); 4302 } 4303 4304 // 64bit only pieces of the assembler 4305 // This should only be used by 64bit instructions that can use rip-relative 4306 // it cannot be used by instructions that want an immediate value. 4307 4308 bool Assembler::reachable(AddressLiteral adr) { 4309 int64_t disp; 4310 // None will force a 64bit literal to the code stream. Likely a placeholder 4311 // for something that will be patched later and we need to certain it will 4312 // always be reachable. 4313 if (adr.reloc() == relocInfo::none) { 4314 return false; 4315 } 4316 if (adr.reloc() == relocInfo::internal_word_type) { 4317 // This should be rip relative and easily reachable. 4318 return true; 4319 } 4320 if (adr.reloc() == relocInfo::virtual_call_type || 4321 adr.reloc() == relocInfo::opt_virtual_call_type || 4322 adr.reloc() == relocInfo::static_call_type || 4323 adr.reloc() == relocInfo::static_stub_type ) { 4324 // This should be rip relative within the code cache and easily 4325 // reachable until we get huge code caches. (At which point 4326 // ic code is going to have issues). 4327 return true; 4328 } 4329 if (adr.reloc() != relocInfo::external_word_type && 4330 adr.reloc() != relocInfo::poll_return_type && // these are really external_word but need special 4331 adr.reloc() != relocInfo::poll_type && // relocs to identify them 4332 adr.reloc() != relocInfo::runtime_call_type ) { 4333 return false; 4334 } 4335 4336 // Stress the correction code 4337 if (ForceUnreachable) { 4338 // Must be runtimecall reloc, see if it is in the codecache 4339 // Flipping stuff in the codecache to be unreachable causes issues 4340 // with things like inline caches where the additional instructions 4341 // are not handled. 4342 if (CodeCache::find_blob(adr._target) == NULL) { 4343 return false; 4344 } 4345 } 4346 // For external_word_type/runtime_call_type if it is reachable from where we 4347 // are now (possibly a temp buffer) and where we might end up 4348 // anywhere in the codeCache then we are always reachable. 4349 // This would have to change if we ever save/restore shared code 4350 // to be more pessimistic. 4351 disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int)); 4352 if (!is_simm32(disp)) return false; 4353 disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int)); 4354 if (!is_simm32(disp)) return false; 4355 4356 disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int)); 4357 4358 // Because rip relative is a disp + address_of_next_instruction and we 4359 // don't know the value of address_of_next_instruction we apply a fudge factor 4360 // to make sure we will be ok no matter the size of the instruction we get placed into. 4361 // We don't have to fudge the checks above here because they are already worst case. 4362 4363 // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal 4364 // + 4 because better safe than sorry. 4365 const int fudge = 12 + 4; 4366 if (disp < 0) { 4367 disp -= fudge; 4368 } else { 4369 disp += fudge; 4370 } 4371 return is_simm32(disp); 4372 } 4373 4374 // Check if the polling page is not reachable from the code cache using rip-relative 4375 // addressing. 4376 bool Assembler::is_polling_page_far() { 4377 intptr_t addr = (intptr_t)os::get_polling_page(); 4378 return ForceUnreachable || 4379 !is_simm32(addr - (intptr_t)CodeCache::low_bound()) || 4380 !is_simm32(addr - (intptr_t)CodeCache::high_bound()); 4381 } 4382 4383 void Assembler::emit_data64(jlong data, 4384 relocInfo::relocType rtype, 4385 int format) { 4386 if (rtype == relocInfo::none) { 4387 emit_long64(data); 4388 } else { 4389 emit_data64(data, Relocation::spec_simple(rtype), format); 4390 } 4391 } 4392 4393 void Assembler::emit_data64(jlong data, 4394 RelocationHolder const& rspec, 4395 int format) { 4396 assert(imm_operand == 0, "default format must be immediate in this file"); 4397 assert(imm_operand == format, "must be immediate"); 4398 assert(inst_mark() != NULL, "must be inside InstructionMark"); 4399 // Do not use AbstractAssembler::relocate, which is not intended for 4400 // embedded words. Instead, relocate to the enclosing instruction. 4401 code_section()->relocate(inst_mark(), rspec, format); 4402 #ifdef ASSERT 4403 check_relocation(rspec, format); 4404 #endif 4405 emit_long64(data); 4406 } 4407 4408 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { 4409 if (reg_enc >= 8) { 4410 prefix(REX_B); 4411 reg_enc -= 8; 4412 } else if (byteinst && reg_enc >= 4) { 4413 prefix(REX); 4414 } 4415 return reg_enc; 4416 } 4417 4418 int Assembler::prefixq_and_encode(int reg_enc) { 4419 if (reg_enc < 8) { 4420 prefix(REX_W); 4421 } else { 4422 prefix(REX_WB); 4423 reg_enc -= 8; 4424 } 4425 return reg_enc; 4426 } 4427 4428 int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { 4429 if (dst_enc < 8) { 4430 if (src_enc >= 8) { 4431 prefix(REX_B); 4432 src_enc -= 8; 4433 } else if (byteinst && src_enc >= 4) { 4434 prefix(REX); 4435 } 4436 } else { 4437 if (src_enc < 8) { 4438 prefix(REX_R); 4439 } else { 4440 prefix(REX_RB); 4441 src_enc -= 8; 4442 } 4443 dst_enc -= 8; 4444 } 4445 return dst_enc << 3 | src_enc; 4446 } 4447 4448 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { 4449 if (dst_enc < 8) { 4450 if (src_enc < 8) { 4451 prefix(REX_W); 4452 } else { 4453 prefix(REX_WB); 4454 src_enc -= 8; 4455 } 4456 } else { 4457 if (src_enc < 8) { 4458 prefix(REX_WR); 4459 } else { 4460 prefix(REX_WRB); 4461 src_enc -= 8; 4462 } 4463 dst_enc -= 8; 4464 } 4465 return dst_enc << 3 | src_enc; 4466 } 4467 4468 void Assembler::prefix(Register reg) { 4469 if (reg->encoding() >= 8) { 4470 prefix(REX_B); 4471 } 4472 } 4473 4474 void Assembler::prefix(Address adr) { 4475 if (adr.base_needs_rex()) { 4476 if (adr.index_needs_rex()) { 4477 prefix(REX_XB); 4478 } else { 4479 prefix(REX_B); 4480 } 4481 } else { 4482 if (adr.index_needs_rex()) { 4483 prefix(REX_X); 4484 } 4485 } 4486 } 4487 4488 void Assembler::prefixq(Address adr) { 4489 if (adr.base_needs_rex()) { 4490 if (adr.index_needs_rex()) { 4491 prefix(REX_WXB); 4492 } else { 4493 prefix(REX_WB); 4494 } 4495 } else { 4496 if (adr.index_needs_rex()) { 4497 prefix(REX_WX); 4498 } else { 4499 prefix(REX_W); 4500 } 4501 } 4502 } 4503 4504 4505 void Assembler::prefix(Address adr, Register reg, bool byteinst) { 4506 if (reg->encoding() < 8) { 4507 if (adr.base_needs_rex()) { 4508 if (adr.index_needs_rex()) { 4509 prefix(REX_XB); 4510 } else { 4511 prefix(REX_B); 4512 } 4513 } else { 4514 if (adr.index_needs_rex()) { 4515 prefix(REX_X); 4516 } else if (byteinst && reg->encoding() >= 4 ) { 4517 prefix(REX); 4518 } 4519 } 4520 } else { 4521 if (adr.base_needs_rex()) { 4522 if (adr.index_needs_rex()) { 4523 prefix(REX_RXB); 4524 } else { 4525 prefix(REX_RB); 4526 } 4527 } else { 4528 if (adr.index_needs_rex()) { 4529 prefix(REX_RX); 4530 } else { 4531 prefix(REX_R); 4532 } 4533 } 4534 } 4535 } 4536 4537 void Assembler::prefixq(Address adr, Register src) { 4538 if (src->encoding() < 8) { 4539 if (adr.base_needs_rex()) { 4540 if (adr.index_needs_rex()) { 4541 prefix(REX_WXB); 4542 } else { 4543 prefix(REX_WB); 4544 } 4545 } else { 4546 if (adr.index_needs_rex()) { 4547 prefix(REX_WX); 4548 } else { 4549 prefix(REX_W); 4550 } 4551 } 4552 } else { 4553 if (adr.base_needs_rex()) { 4554 if (adr.index_needs_rex()) { 4555 prefix(REX_WRXB); 4556 } else { 4557 prefix(REX_WRB); 4558 } 4559 } else { 4560 if (adr.index_needs_rex()) { 4561 prefix(REX_WRX); 4562 } else { 4563 prefix(REX_WR); 4564 } 4565 } 4566 } 4567 } 4568 4569 void Assembler::prefix(Address adr, XMMRegister reg) { 4570 if (reg->encoding() < 8) { 4571 if (adr.base_needs_rex()) { 4572 if (adr.index_needs_rex()) { 4573 prefix(REX_XB); 4574 } else { 4575 prefix(REX_B); 4576 } 4577 } else { 4578 if (adr.index_needs_rex()) { 4579 prefix(REX_X); 4580 } 4581 } 4582 } else { 4583 if (adr.base_needs_rex()) { 4584 if (adr.index_needs_rex()) { 4585 prefix(REX_RXB); 4586 } else { 4587 prefix(REX_RB); 4588 } 4589 } else { 4590 if (adr.index_needs_rex()) { 4591 prefix(REX_RX); 4592 } else { 4593 prefix(REX_R); 4594 } 4595 } 4596 } 4597 } 4598 4599 void Assembler::prefixq(Address adr, XMMRegister src) { 4600 if (src->encoding() < 8) { 4601 if (adr.base_needs_rex()) { 4602 if (adr.index_needs_rex()) { 4603 prefix(REX_WXB); 4604 } else { 4605 prefix(REX_WB); 4606 } 4607 } else { 4608 if (adr.index_needs_rex()) { 4609 prefix(REX_WX); 4610 } else { 4611 prefix(REX_W); 4612 } 4613 } 4614 } else { 4615 if (adr.base_needs_rex()) { 4616 if (adr.index_needs_rex()) { 4617 prefix(REX_WRXB); 4618 } else { 4619 prefix(REX_WRB); 4620 } 4621 } else { 4622 if (adr.index_needs_rex()) { 4623 prefix(REX_WRX); 4624 } else { 4625 prefix(REX_WR); 4626 } 4627 } 4628 } 4629 } 4630 4631 void Assembler::adcq(Register dst, int32_t imm32) { 4632 (void) prefixq_and_encode(dst->encoding()); 4633 emit_arith(0x81, 0xD0, dst, imm32); 4634 } 4635 4636 void Assembler::adcq(Register dst, Address src) { 4637 InstructionMark im(this); 4638 prefixq(src, dst); 4639 emit_byte(0x13); 4640 emit_operand(dst, src); 4641 } 4642 4643 void Assembler::adcq(Register dst, Register src) { 4644 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 4645 emit_arith(0x13, 0xC0, dst, src); 4646 } 4647 4648 void Assembler::addq(Address dst, int32_t imm32) { 4649 InstructionMark im(this); 4650 prefixq(dst); 4651 emit_arith_operand(0x81, rax, dst,imm32); 4652 } 4653 4654 void Assembler::addq(Address dst, Register src) { 4655 InstructionMark im(this); 4656 prefixq(dst, src); 4657 emit_byte(0x01); 4658 emit_operand(src, dst); 4659 } 4660 4661 void Assembler::addq(Register dst, int32_t imm32) { 4662 (void) prefixq_and_encode(dst->encoding()); 4663 emit_arith(0x81, 0xC0, dst, imm32); 4664 } 4665 4666 void Assembler::addq(Register dst, Address src) { 4667 InstructionMark im(this); 4668 prefixq(src, dst); 4669 emit_byte(0x03); 4670 emit_operand(dst, src); 4671 } 4672 4673 void Assembler::addq(Register dst, Register src) { 4674 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4675 emit_arith(0x03, 0xC0, dst, src); 4676 } 4677 4678 void Assembler::andq(Address dst, int32_t imm32) { 4679 InstructionMark im(this); 4680 prefixq(dst); 4681 emit_byte(0x81); 4682 emit_operand(rsp, dst, 4); 4683 emit_long(imm32); 4684 } 4685 4686 void Assembler::andq(Register dst, int32_t imm32) { 4687 (void) prefixq_and_encode(dst->encoding()); 4688 emit_arith(0x81, 0xE0, dst, imm32); 4689 } 4690 4691 void Assembler::andq(Register dst, Address src) { 4692 InstructionMark im(this); 4693 prefixq(src, dst); 4694 emit_byte(0x23); 4695 emit_operand(dst, src); 4696 } 4697 4698 void Assembler::andq(Register dst, Register src) { 4699 (int) prefixq_and_encode(dst->encoding(), src->encoding()); 4700 emit_arith(0x23, 0xC0, dst, src); 4701 } 4702 4703 void Assembler::bsfq(Register dst, Register src) { 4704 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4705 emit_byte(0x0F); 4706 emit_byte(0xBC); 4707 emit_byte(0xC0 | encode); 4708 } 4709 4710 void Assembler::bsrq(Register dst, Register src) { 4711 assert(!VM_Version::supports_lzcnt(), "encoding is treated as LZCNT"); 4712 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4713 emit_byte(0x0F); 4714 emit_byte(0xBD); 4715 emit_byte(0xC0 | encode); 4716 } 4717 4718 void Assembler::bswapq(Register reg) { 4719 int encode = prefixq_and_encode(reg->encoding()); 4720 emit_byte(0x0F); 4721 emit_byte(0xC8 | encode); 4722 } 4723 4724 void Assembler::cdqq() { 4725 prefix(REX_W); 4726 emit_byte(0x99); 4727 } 4728 4729 void Assembler::clflush(Address adr) { 4730 prefix(adr); 4731 emit_byte(0x0F); 4732 emit_byte(0xAE); 4733 emit_operand(rdi, adr); 4734 } 4735 4736 void Assembler::cmovq(Condition cc, Register dst, Register src) { 4737 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4738 emit_byte(0x0F); 4739 emit_byte(0x40 | cc); 4740 emit_byte(0xC0 | encode); 4741 } 4742 4743 void Assembler::cmovq(Condition cc, Register dst, Address src) { 4744 InstructionMark im(this); 4745 prefixq(src, dst); 4746 emit_byte(0x0F); 4747 emit_byte(0x40 | cc); 4748 emit_operand(dst, src); 4749 } 4750 4751 void Assembler::cmpq(Address dst, int32_t imm32) { 4752 InstructionMark im(this); 4753 prefixq(dst); 4754 emit_byte(0x81); 4755 emit_operand(rdi, dst, 4); 4756 emit_long(imm32); 4757 } 4758 4759 void Assembler::cmpq(Register dst, int32_t imm32) { 4760 (void) prefixq_and_encode(dst->encoding()); 4761 emit_arith(0x81, 0xF8, dst, imm32); 4762 } 4763 4764 void Assembler::cmpq(Address dst, Register src) { 4765 InstructionMark im(this); 4766 prefixq(dst, src); 4767 emit_byte(0x3B); 4768 emit_operand(src, dst); 4769 } 4770 4771 void Assembler::cmpq(Register dst, Register src) { 4772 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4773 emit_arith(0x3B, 0xC0, dst, src); 4774 } 4775 4776 void Assembler::cmpq(Register dst, Address src) { 4777 InstructionMark im(this); 4778 prefixq(src, dst); 4779 emit_byte(0x3B); 4780 emit_operand(dst, src); 4781 } 4782 4783 void Assembler::cmpxchgq(Register reg, Address adr) { 4784 InstructionMark im(this); 4785 prefixq(adr, reg); 4786 emit_byte(0x0F); 4787 emit_byte(0xB1); 4788 emit_operand(reg, adr); 4789 } 4790 4791 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { 4792 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4793 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2); 4794 emit_byte(0x2A); 4795 emit_byte(0xC0 | encode); 4796 } 4797 4798 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { 4799 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4800 InstructionMark im(this); 4801 simd_prefix_q(dst, dst, src, VEX_SIMD_F2); 4802 emit_byte(0x2A); 4803 emit_operand(dst, src); 4804 } 4805 4806 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { 4807 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4808 int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3); 4809 emit_byte(0x2A); 4810 emit_byte(0xC0 | encode); 4811 } 4812 4813 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { 4814 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4815 InstructionMark im(this); 4816 simd_prefix_q(dst, dst, src, VEX_SIMD_F3); 4817 emit_byte(0x2A); 4818 emit_operand(dst, src); 4819 } 4820 4821 void Assembler::cvttsd2siq(Register dst, XMMRegister src) { 4822 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4823 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2); 4824 emit_byte(0x2C); 4825 emit_byte(0xC0 | encode); 4826 } 4827 4828 void Assembler::cvttss2siq(Register dst, XMMRegister src) { 4829 NOT_LP64(assert(VM_Version::supports_sse(), "")); 4830 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3); 4831 emit_byte(0x2C); 4832 emit_byte(0xC0 | encode); 4833 } 4834 4835 void Assembler::decl(Register dst) { 4836 // Don't use it directly. Use MacroAssembler::decrementl() instead. 4837 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 4838 int encode = prefix_and_encode(dst->encoding()); 4839 emit_byte(0xFF); 4840 emit_byte(0xC8 | encode); 4841 } 4842 4843 void Assembler::decq(Register dst) { 4844 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4845 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4846 int encode = prefixq_and_encode(dst->encoding()); 4847 emit_byte(0xFF); 4848 emit_byte(0xC8 | encode); 4849 } 4850 4851 void Assembler::decq(Address dst) { 4852 // Don't use it directly. Use MacroAssembler::decrementq() instead. 4853 InstructionMark im(this); 4854 prefixq(dst); 4855 emit_byte(0xFF); 4856 emit_operand(rcx, dst); 4857 } 4858 4859 void Assembler::fxrstor(Address src) { 4860 prefixq(src); 4861 emit_byte(0x0F); 4862 emit_byte(0xAE); 4863 emit_operand(as_Register(1), src); 4864 } 4865 4866 void Assembler::fxsave(Address dst) { 4867 prefixq(dst); 4868 emit_byte(0x0F); 4869 emit_byte(0xAE); 4870 emit_operand(as_Register(0), dst); 4871 } 4872 4873 void Assembler::idivq(Register src) { 4874 int encode = prefixq_and_encode(src->encoding()); 4875 emit_byte(0xF7); 4876 emit_byte(0xF8 | encode); 4877 } 4878 4879 void Assembler::imulq(Register dst, Register src) { 4880 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4881 emit_byte(0x0F); 4882 emit_byte(0xAF); 4883 emit_byte(0xC0 | encode); 4884 } 4885 4886 void Assembler::imulq(Register dst, Register src, int value) { 4887 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4888 if (is8bit(value)) { 4889 emit_byte(0x6B); 4890 emit_byte(0xC0 | encode); 4891 emit_byte(value & 0xFF); 4892 } else { 4893 emit_byte(0x69); 4894 emit_byte(0xC0 | encode); 4895 emit_long(value); 4896 } 4897 } 4898 4899 void Assembler::incl(Register dst) { 4900 // Don't use it directly. Use MacroAssembler::incrementl() instead. 4901 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4902 int encode = prefix_and_encode(dst->encoding()); 4903 emit_byte(0xFF); 4904 emit_byte(0xC0 | encode); 4905 } 4906 4907 void Assembler::incq(Register dst) { 4908 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4909 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 4910 int encode = prefixq_and_encode(dst->encoding()); 4911 emit_byte(0xFF); 4912 emit_byte(0xC0 | encode); 4913 } 4914 4915 void Assembler::incq(Address dst) { 4916 // Don't use it directly. Use MacroAssembler::incrementq() instead. 4917 InstructionMark im(this); 4918 prefixq(dst); 4919 emit_byte(0xFF); 4920 emit_operand(rax, dst); 4921 } 4922 4923 void Assembler::lea(Register dst, Address src) { 4924 leaq(dst, src); 4925 } 4926 4927 void Assembler::leaq(Register dst, Address src) { 4928 InstructionMark im(this); 4929 prefixq(src, dst); 4930 emit_byte(0x8D); 4931 emit_operand(dst, src); 4932 } 4933 4934 void Assembler::mov64(Register dst, int64_t imm64) { 4935 InstructionMark im(this); 4936 int encode = prefixq_and_encode(dst->encoding()); 4937 emit_byte(0xB8 | encode); 4938 emit_long64(imm64); 4939 } 4940 4941 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { 4942 InstructionMark im(this); 4943 int encode = prefixq_and_encode(dst->encoding()); 4944 emit_byte(0xB8 | encode); 4945 emit_data64(imm64, rspec); 4946 } 4947 4948 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) { 4949 InstructionMark im(this); 4950 int encode = prefix_and_encode(dst->encoding()); 4951 emit_byte(0xB8 | encode); 4952 emit_data((int)imm32, rspec, narrow_oop_operand); 4953 } 4954 4955 void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) { 4956 InstructionMark im(this); 4957 prefix(dst); 4958 emit_byte(0xC7); 4959 emit_operand(rax, dst, 4); 4960 emit_data((int)imm32, rspec, narrow_oop_operand); 4961 } 4962 4963 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) { 4964 InstructionMark im(this); 4965 int encode = prefix_and_encode(src1->encoding()); 4966 emit_byte(0x81); 4967 emit_byte(0xF8 | encode); 4968 emit_data((int)imm32, rspec, narrow_oop_operand); 4969 } 4970 4971 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) { 4972 InstructionMark im(this); 4973 prefix(src1); 4974 emit_byte(0x81); 4975 emit_operand(rax, src1, 4); 4976 emit_data((int)imm32, rspec, narrow_oop_operand); 4977 } 4978 4979 void Assembler::lzcntq(Register dst, Register src) { 4980 assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR"); 4981 emit_byte(0xF3); 4982 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 4983 emit_byte(0x0F); 4984 emit_byte(0xBD); 4985 emit_byte(0xC0 | encode); 4986 } 4987 4988 void Assembler::movdq(XMMRegister dst, Register src) { 4989 // table D-1 says MMX/SSE2 4990 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4991 int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66); 4992 emit_byte(0x6E); 4993 emit_byte(0xC0 | encode); 4994 } 4995 4996 void Assembler::movdq(Register dst, XMMRegister src) { 4997 // table D-1 says MMX/SSE2 4998 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 4999 // swap src/dst to get correct prefix 5000 int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66); 5001 emit_byte(0x7E); 5002 emit_byte(0xC0 | encode); 5003 } 5004 5005 void Assembler::movq(Register dst, Register src) { 5006 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5007 emit_byte(0x8B); 5008 emit_byte(0xC0 | encode); 5009 } 5010 5011 void Assembler::movq(Register dst, Address src) { 5012 InstructionMark im(this); 5013 prefixq(src, dst); 5014 emit_byte(0x8B); 5015 emit_operand(dst, src); 5016 } 5017 5018 void Assembler::movq(Address dst, Register src) { 5019 InstructionMark im(this); 5020 prefixq(dst, src); 5021 emit_byte(0x89); 5022 emit_operand(src, dst); 5023 } 5024 5025 void Assembler::movsbq(Register dst, Address src) { 5026 InstructionMark im(this); 5027 prefixq(src, dst); 5028 emit_byte(0x0F); 5029 emit_byte(0xBE); 5030 emit_operand(dst, src); 5031 } 5032 5033 void Assembler::movsbq(Register dst, Register src) { 5034 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5035 emit_byte(0x0F); 5036 emit_byte(0xBE); 5037 emit_byte(0xC0 | encode); 5038 } 5039 5040 void Assembler::movslq(Register dst, int32_t imm32) { 5041 // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx) 5042 // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx) 5043 // as a result we shouldn't use until tested at runtime... 5044 ShouldNotReachHere(); 5045 InstructionMark im(this); 5046 int encode = prefixq_and_encode(dst->encoding()); 5047 emit_byte(0xC7 | encode); 5048 emit_long(imm32); 5049 } 5050 5051 void Assembler::movslq(Address dst, int32_t imm32) { 5052 assert(is_simm32(imm32), "lost bits"); 5053 InstructionMark im(this); 5054 prefixq(dst); 5055 emit_byte(0xC7); 5056 emit_operand(rax, dst, 4); 5057 emit_long(imm32); 5058 } 5059 5060 void Assembler::movslq(Register dst, Address src) { 5061 InstructionMark im(this); 5062 prefixq(src, dst); 5063 emit_byte(0x63); 5064 emit_operand(dst, src); 5065 } 5066 5067 void Assembler::movslq(Register dst, Register src) { 5068 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5069 emit_byte(0x63); 5070 emit_byte(0xC0 | encode); 5071 } 5072 5073 void Assembler::movswq(Register dst, Address src) { 5074 InstructionMark im(this); 5075 prefixq(src, dst); 5076 emit_byte(0x0F); 5077 emit_byte(0xBF); 5078 emit_operand(dst, src); 5079 } 5080 5081 void Assembler::movswq(Register dst, Register src) { 5082 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5083 emit_byte(0x0F); 5084 emit_byte(0xBF); 5085 emit_byte(0xC0 | encode); 5086 } 5087 5088 void Assembler::movzbq(Register dst, Address src) { 5089 InstructionMark im(this); 5090 prefixq(src, dst); 5091 emit_byte(0x0F); 5092 emit_byte(0xB6); 5093 emit_operand(dst, src); 5094 } 5095 5096 void Assembler::movzbq(Register dst, Register src) { 5097 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5098 emit_byte(0x0F); 5099 emit_byte(0xB6); 5100 emit_byte(0xC0 | encode); 5101 } 5102 5103 void Assembler::movzwq(Register dst, Address src) { 5104 InstructionMark im(this); 5105 prefixq(src, dst); 5106 emit_byte(0x0F); 5107 emit_byte(0xB7); 5108 emit_operand(dst, src); 5109 } 5110 5111 void Assembler::movzwq(Register dst, Register src) { 5112 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5113 emit_byte(0x0F); 5114 emit_byte(0xB7); 5115 emit_byte(0xC0 | encode); 5116 } 5117 5118 void Assembler::negq(Register dst) { 5119 int encode = prefixq_and_encode(dst->encoding()); 5120 emit_byte(0xF7); 5121 emit_byte(0xD8 | encode); 5122 } 5123 5124 void Assembler::notq(Register dst) { 5125 int encode = prefixq_and_encode(dst->encoding()); 5126 emit_byte(0xF7); 5127 emit_byte(0xD0 | encode); 5128 } 5129 5130 void Assembler::orq(Address dst, int32_t imm32) { 5131 InstructionMark im(this); 5132 prefixq(dst); 5133 emit_byte(0x81); 5134 emit_operand(rcx, dst, 4); 5135 emit_long(imm32); 5136 } 5137 5138 void Assembler::orq(Register dst, int32_t imm32) { 5139 (void) prefixq_and_encode(dst->encoding()); 5140 emit_arith(0x81, 0xC8, dst, imm32); 5141 } 5142 5143 void Assembler::orq(Register dst, Address src) { 5144 InstructionMark im(this); 5145 prefixq(src, dst); 5146 emit_byte(0x0B); 5147 emit_operand(dst, src); 5148 } 5149 5150 void Assembler::orq(Register dst, Register src) { 5151 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5152 emit_arith(0x0B, 0xC0, dst, src); 5153 } 5154 5155 void Assembler::popa() { // 64bit 5156 movq(r15, Address(rsp, 0)); 5157 movq(r14, Address(rsp, wordSize)); 5158 movq(r13, Address(rsp, 2 * wordSize)); 5159 movq(r12, Address(rsp, 3 * wordSize)); 5160 movq(r11, Address(rsp, 4 * wordSize)); 5161 movq(r10, Address(rsp, 5 * wordSize)); 5162 movq(r9, Address(rsp, 6 * wordSize)); 5163 movq(r8, Address(rsp, 7 * wordSize)); 5164 movq(rdi, Address(rsp, 8 * wordSize)); 5165 movq(rsi, Address(rsp, 9 * wordSize)); 5166 movq(rbp, Address(rsp, 10 * wordSize)); 5167 // skip rsp 5168 movq(rbx, Address(rsp, 12 * wordSize)); 5169 movq(rdx, Address(rsp, 13 * wordSize)); 5170 movq(rcx, Address(rsp, 14 * wordSize)); 5171 movq(rax, Address(rsp, 15 * wordSize)); 5172 5173 addq(rsp, 16 * wordSize); 5174 } 5175 5176 void Assembler::popcntq(Register dst, Address src) { 5177 assert(VM_Version::supports_popcnt(), "must support"); 5178 InstructionMark im(this); 5179 emit_byte(0xF3); 5180 prefixq(src, dst); 5181 emit_byte(0x0F); 5182 emit_byte(0xB8); 5183 emit_operand(dst, src); 5184 } 5185 5186 void Assembler::popcntq(Register dst, Register src) { 5187 assert(VM_Version::supports_popcnt(), "must support"); 5188 emit_byte(0xF3); 5189 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5190 emit_byte(0x0F); 5191 emit_byte(0xB8); 5192 emit_byte(0xC0 | encode); 5193 } 5194 5195 void Assembler::popq(Address dst) { 5196 InstructionMark im(this); 5197 prefixq(dst); 5198 emit_byte(0x8F); 5199 emit_operand(rax, dst); 5200 } 5201 5202 void Assembler::pusha() { // 64bit 5203 // we have to store original rsp. ABI says that 128 bytes 5204 // below rsp are local scratch. 5205 movq(Address(rsp, -5 * wordSize), rsp); 5206 5207 subq(rsp, 16 * wordSize); 5208 5209 movq(Address(rsp, 15 * wordSize), rax); 5210 movq(Address(rsp, 14 * wordSize), rcx); 5211 movq(Address(rsp, 13 * wordSize), rdx); 5212 movq(Address(rsp, 12 * wordSize), rbx); 5213 // skip rsp 5214 movq(Address(rsp, 10 * wordSize), rbp); 5215 movq(Address(rsp, 9 * wordSize), rsi); 5216 movq(Address(rsp, 8 * wordSize), rdi); 5217 movq(Address(rsp, 7 * wordSize), r8); 5218 movq(Address(rsp, 6 * wordSize), r9); 5219 movq(Address(rsp, 5 * wordSize), r10); 5220 movq(Address(rsp, 4 * wordSize), r11); 5221 movq(Address(rsp, 3 * wordSize), r12); 5222 movq(Address(rsp, 2 * wordSize), r13); 5223 movq(Address(rsp, wordSize), r14); 5224 movq(Address(rsp, 0), r15); 5225 } 5226 5227 void Assembler::pushq(Address src) { 5228 InstructionMark im(this); 5229 prefixq(src); 5230 emit_byte(0xFF); 5231 emit_operand(rsi, src); 5232 } 5233 5234 void Assembler::rclq(Register dst, int imm8) { 5235 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5236 int encode = prefixq_and_encode(dst->encoding()); 5237 if (imm8 == 1) { 5238 emit_byte(0xD1); 5239 emit_byte(0xD0 | encode); 5240 } else { 5241 emit_byte(0xC1); 5242 emit_byte(0xD0 | encode); 5243 emit_byte(imm8); 5244 } 5245 } 5246 void Assembler::sarq(Register dst, int imm8) { 5247 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5248 int encode = prefixq_and_encode(dst->encoding()); 5249 if (imm8 == 1) { 5250 emit_byte(0xD1); 5251 emit_byte(0xF8 | encode); 5252 } else { 5253 emit_byte(0xC1); 5254 emit_byte(0xF8 | encode); 5255 emit_byte(imm8); 5256 } 5257 } 5258 5259 void Assembler::sarq(Register dst) { 5260 int encode = prefixq_and_encode(dst->encoding()); 5261 emit_byte(0xD3); 5262 emit_byte(0xF8 | encode); 5263 } 5264 5265 void Assembler::sbbq(Address dst, int32_t imm32) { 5266 InstructionMark im(this); 5267 prefixq(dst); 5268 emit_arith_operand(0x81, rbx, dst, imm32); 5269 } 5270 5271 void Assembler::sbbq(Register dst, int32_t imm32) { 5272 (void) prefixq_and_encode(dst->encoding()); 5273 emit_arith(0x81, 0xD8, dst, imm32); 5274 } 5275 5276 void Assembler::sbbq(Register dst, Address src) { 5277 InstructionMark im(this); 5278 prefixq(src, dst); 5279 emit_byte(0x1B); 5280 emit_operand(dst, src); 5281 } 5282 5283 void Assembler::sbbq(Register dst, Register src) { 5284 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5285 emit_arith(0x1B, 0xC0, dst, src); 5286 } 5287 5288 void Assembler::shlq(Register dst, int imm8) { 5289 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5290 int encode = prefixq_and_encode(dst->encoding()); 5291 if (imm8 == 1) { 5292 emit_byte(0xD1); 5293 emit_byte(0xE0 | encode); 5294 } else { 5295 emit_byte(0xC1); 5296 emit_byte(0xE0 | encode); 5297 emit_byte(imm8); 5298 } 5299 } 5300 5301 void Assembler::shlq(Register dst) { 5302 int encode = prefixq_and_encode(dst->encoding()); 5303 emit_byte(0xD3); 5304 emit_byte(0xE0 | encode); 5305 } 5306 5307 void Assembler::shrq(Register dst, int imm8) { 5308 assert(isShiftCount(imm8 >> 1), "illegal shift count"); 5309 int encode = prefixq_and_encode(dst->encoding()); 5310 emit_byte(0xC1); 5311 emit_byte(0xE8 | encode); 5312 emit_byte(imm8); 5313 } 5314 5315 void Assembler::shrq(Register dst) { 5316 int encode = prefixq_and_encode(dst->encoding()); 5317 emit_byte(0xD3); 5318 emit_byte(0xE8 | encode); 5319 } 5320 5321 void Assembler::subq(Address dst, int32_t imm32) { 5322 InstructionMark im(this); 5323 prefixq(dst); 5324 emit_arith_operand(0x81, rbp, dst, imm32); 5325 } 5326 5327 void Assembler::subq(Address dst, Register src) { 5328 InstructionMark im(this); 5329 prefixq(dst, src); 5330 emit_byte(0x29); 5331 emit_operand(src, dst); 5332 } 5333 5334 void Assembler::subq(Register dst, int32_t imm32) { 5335 (void) prefixq_and_encode(dst->encoding()); 5336 emit_arith(0x81, 0xE8, dst, imm32); 5337 } 5338 5339 // Force generation of a 4 byte immediate value even if it fits into 8bit 5340 void Assembler::subq_imm32(Register dst, int32_t imm32) { 5341 (void) prefixq_and_encode(dst->encoding()); 5342 emit_arith_imm32(0x81, 0xE8, dst, imm32); 5343 } 5344 5345 void Assembler::subq(Register dst, Address src) { 5346 InstructionMark im(this); 5347 prefixq(src, dst); 5348 emit_byte(0x2B); 5349 emit_operand(dst, src); 5350 } 5351 5352 void Assembler::subq(Register dst, Register src) { 5353 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5354 emit_arith(0x2B, 0xC0, dst, src); 5355 } 5356 5357 void Assembler::testq(Register dst, int32_t imm32) { 5358 // not using emit_arith because test 5359 // doesn't support sign-extension of 5360 // 8bit operands 5361 int encode = dst->encoding(); 5362 if (encode == 0) { 5363 prefix(REX_W); 5364 emit_byte(0xA9); 5365 } else { 5366 encode = prefixq_and_encode(encode); 5367 emit_byte(0xF7); 5368 emit_byte(0xC0 | encode); 5369 } 5370 emit_long(imm32); 5371 } 5372 5373 void Assembler::testq(Register dst, Register src) { 5374 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5375 emit_arith(0x85, 0xC0, dst, src); 5376 } 5377 5378 void Assembler::xaddq(Address dst, Register src) { 5379 InstructionMark im(this); 5380 prefixq(dst, src); 5381 emit_byte(0x0F); 5382 emit_byte(0xC1); 5383 emit_operand(src, dst); 5384 } 5385 5386 void Assembler::xchgq(Register dst, Address src) { 5387 InstructionMark im(this); 5388 prefixq(src, dst); 5389 emit_byte(0x87); 5390 emit_operand(dst, src); 5391 } 5392 5393 void Assembler::xchgq(Register dst, Register src) { 5394 int encode = prefixq_and_encode(dst->encoding(), src->encoding()); 5395 emit_byte(0x87); 5396 emit_byte(0xc0 | encode); 5397 } 5398 5399 void Assembler::xorq(Register dst, Register src) { 5400 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 5401 emit_arith(0x33, 0xC0, dst, src); 5402 } 5403 5404 void Assembler::xorq(Register dst, Address src) { 5405 InstructionMark im(this); 5406 prefixq(src, dst); 5407 emit_byte(0x33); 5408 emit_operand(dst, src); 5409 } 5410 5411 #endif // !LP64 5412 5413 static Assembler::Condition reverse[] = { 5414 Assembler::noOverflow /* overflow = 0x0 */ , 5415 Assembler::overflow /* noOverflow = 0x1 */ , 5416 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , 5417 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , 5418 Assembler::notZero /* zero = 0x4, equal = 0x4 */ , 5419 Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , 5420 Assembler::above /* belowEqual = 0x6 */ , 5421 Assembler::belowEqual /* above = 0x7 */ , 5422 Assembler::positive /* negative = 0x8 */ , 5423 Assembler::negative /* positive = 0x9 */ , 5424 Assembler::noParity /* parity = 0xa */ , 5425 Assembler::parity /* noParity = 0xb */ , 5426 Assembler::greaterEqual /* less = 0xc */ , 5427 Assembler::less /* greaterEqual = 0xd */ , 5428 Assembler::greater /* lessEqual = 0xe */ , 5429 Assembler::lessEqual /* greater = 0xf, */ 5430 5431 }; 5432 5433 5434 // Implementation of MacroAssembler 5435 5436 // First all the versions that have distinct versions depending on 32/64 bit 5437 // Unless the difference is trivial (1 line or so). 5438 5439 #ifndef _LP64 5440 5441 // 32bit versions 5442 5443 Address MacroAssembler::as_Address(AddressLiteral adr) { 5444 return Address(adr.target(), adr.rspec()); 5445 } 5446 5447 Address MacroAssembler::as_Address(ArrayAddress adr) { 5448 return Address::make_array(adr); 5449 } 5450 5451 int MacroAssembler::biased_locking_enter(Register lock_reg, 5452 Register obj_reg, 5453 Register swap_reg, 5454 Register tmp_reg, 5455 bool swap_reg_contains_mark, 5456 Label& done, 5457 Label* slow_case, 5458 BiasedLockingCounters* counters) { 5459 assert(UseBiasedLocking, "why call this otherwise?"); 5460 assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); 5461 assert_different_registers(lock_reg, obj_reg, swap_reg); 5462 5463 if (PrintBiasedLockingStatistics && counters == NULL) 5464 counters = BiasedLocking::counters(); 5465 5466 bool need_tmp_reg = false; 5467 if (tmp_reg == noreg) { 5468 need_tmp_reg = true; 5469 tmp_reg = lock_reg; 5470 } else { 5471 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 5472 } 5473 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 5474 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 5475 Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); 5476 Address saved_mark_addr(lock_reg, 0); 5477 5478 // Biased locking 5479 // See whether the lock is currently biased toward our thread and 5480 // whether the epoch is still valid 5481 // Note that the runtime guarantees sufficient alignment of JavaThread 5482 // pointers to allow age to be placed into low bits 5483 // First check to see whether biasing is even enabled for this object 5484 Label cas_label; 5485 int null_check_offset = -1; 5486 if (!swap_reg_contains_mark) { 5487 null_check_offset = offset(); 5488 movl(swap_reg, mark_addr); 5489 } 5490 if (need_tmp_reg) { 5491 push(tmp_reg); 5492 } 5493 movl(tmp_reg, swap_reg); 5494 andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); 5495 cmpl(tmp_reg, markOopDesc::biased_lock_pattern); 5496 if (need_tmp_reg) { 5497 pop(tmp_reg); 5498 } 5499 jcc(Assembler::notEqual, cas_label); 5500 // The bias pattern is present in the object's header. Need to check 5501 // whether the bias owner and the epoch are both still current. 5502 // Note that because there is no current thread register on x86 we 5503 // need to store off the mark word we read out of the object to 5504 // avoid reloading it and needing to recheck invariants below. This 5505 // store is unfortunate but it makes the overall code shorter and 5506 // simpler. 5507 movl(saved_mark_addr, swap_reg); 5508 if (need_tmp_reg) { 5509 push(tmp_reg); 5510 } 5511 get_thread(tmp_reg); 5512 xorl(swap_reg, tmp_reg); 5513 if (swap_reg_contains_mark) { 5514 null_check_offset = offset(); 5515 } 5516 movl(tmp_reg, klass_addr); 5517 xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); 5518 andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); 5519 if (need_tmp_reg) { 5520 pop(tmp_reg); 5521 } 5522 if (counters != NULL) { 5523 cond_inc32(Assembler::zero, 5524 ExternalAddress((address)counters->biased_lock_entry_count_addr())); 5525 } 5526 jcc(Assembler::equal, done); 5527 5528 Label try_revoke_bias; 5529 Label try_rebias; 5530 5531 // At this point we know that the header has the bias pattern and 5532 // that we are not the bias owner in the current epoch. We need to 5533 // figure out more details about the state of the header in order to 5534 // know what operations can be legally performed on the object's 5535 // header. 5536 5537 // If the low three bits in the xor result aren't clear, that means 5538 // the prototype header is no longer biased and we have to revoke 5539 // the bias on this object. 5540 testl(swap_reg, markOopDesc::biased_lock_mask_in_place); 5541 jcc(Assembler::notZero, try_revoke_bias); 5542 5543 // Biasing is still enabled for this data type. See whether the 5544 // epoch of the current bias is still valid, meaning that the epoch 5545 // bits of the mark word are equal to the epoch bits of the 5546 // prototype header. (Note that the prototype header's epoch bits 5547 // only change at a safepoint.) If not, attempt to rebias the object 5548 // toward the current thread. Note that we must be absolutely sure 5549 // that the current epoch is invalid in order to do this because 5550 // otherwise the manipulations it performs on the mark word are 5551 // illegal. 5552 testl(swap_reg, markOopDesc::epoch_mask_in_place); 5553 jcc(Assembler::notZero, try_rebias); 5554 5555 // The epoch of the current bias is still valid but we know nothing 5556 // about the owner; it might be set or it might be clear. Try to 5557 // acquire the bias of the object using an atomic operation. If this 5558 // fails we will go in to the runtime to revoke the object's bias. 5559 // Note that we first construct the presumed unbiased header so we 5560 // don't accidentally blow away another thread's valid bias. 5561 movl(swap_reg, saved_mark_addr); 5562 andl(swap_reg, 5563 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 5564 if (need_tmp_reg) { 5565 push(tmp_reg); 5566 } 5567 get_thread(tmp_reg); 5568 orl(tmp_reg, swap_reg); 5569 if (os::is_MP()) { 5570 lock(); 5571 } 5572 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5573 if (need_tmp_reg) { 5574 pop(tmp_reg); 5575 } 5576 // If the biasing toward our thread failed, this means that 5577 // another thread succeeded in biasing it toward itself and we 5578 // need to revoke that bias. The revocation will occur in the 5579 // interpreter runtime in the slow case. 5580 if (counters != NULL) { 5581 cond_inc32(Assembler::zero, 5582 ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); 5583 } 5584 if (slow_case != NULL) { 5585 jcc(Assembler::notZero, *slow_case); 5586 } 5587 jmp(done); 5588 5589 bind(try_rebias); 5590 // At this point we know the epoch has expired, meaning that the 5591 // current "bias owner", if any, is actually invalid. Under these 5592 // circumstances _only_, we are allowed to use the current header's 5593 // value as the comparison value when doing the cas to acquire the 5594 // bias in the current epoch. In other words, we allow transfer of 5595 // the bias from one thread to another directly in this situation. 5596 // 5597 // FIXME: due to a lack of registers we currently blow away the age 5598 // bits in this situation. Should attempt to preserve them. 5599 if (need_tmp_reg) { 5600 push(tmp_reg); 5601 } 5602 get_thread(tmp_reg); 5603 movl(swap_reg, klass_addr); 5604 orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); 5605 movl(swap_reg, saved_mark_addr); 5606 if (os::is_MP()) { 5607 lock(); 5608 } 5609 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5610 if (need_tmp_reg) { 5611 pop(tmp_reg); 5612 } 5613 // If the biasing toward our thread failed, then another thread 5614 // succeeded in biasing it toward itself and we need to revoke that 5615 // bias. The revocation will occur in the runtime in the slow case. 5616 if (counters != NULL) { 5617 cond_inc32(Assembler::zero, 5618 ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); 5619 } 5620 if (slow_case != NULL) { 5621 jcc(Assembler::notZero, *slow_case); 5622 } 5623 jmp(done); 5624 5625 bind(try_revoke_bias); 5626 // The prototype mark in the klass doesn't have the bias bit set any 5627 // more, indicating that objects of this data type are not supposed 5628 // to be biased any more. We are going to try to reset the mark of 5629 // this object to the prototype value and fall through to the 5630 // CAS-based locking scheme. Note that if our CAS fails, it means 5631 // that another thread raced us for the privilege of revoking the 5632 // bias of this particular object, so it's okay to continue in the 5633 // normal locking code. 5634 // 5635 // FIXME: due to a lack of registers we currently blow away the age 5636 // bits in this situation. Should attempt to preserve them. 5637 movl(swap_reg, saved_mark_addr); 5638 if (need_tmp_reg) { 5639 push(tmp_reg); 5640 } 5641 movl(tmp_reg, klass_addr); 5642 movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); 5643 if (os::is_MP()) { 5644 lock(); 5645 } 5646 cmpxchgptr(tmp_reg, Address(obj_reg, 0)); 5647 if (need_tmp_reg) { 5648 pop(tmp_reg); 5649 } 5650 // Fall through to the normal CAS-based lock, because no matter what 5651 // the result of the above CAS, some thread must have succeeded in 5652 // removing the bias bit from the object's header. 5653 if (counters != NULL) { 5654 cond_inc32(Assembler::zero, 5655 ExternalAddress((address)counters->revoked_lock_entry_count_addr())); 5656 } 5657 5658 bind(cas_label); 5659 5660 return null_check_offset; 5661 } 5662 void MacroAssembler::call_VM_leaf_base(address entry_point, 5663 int number_of_arguments) { 5664 call(RuntimeAddress(entry_point)); 5665 increment(rsp, number_of_arguments * wordSize); 5666 } 5667 5668 void MacroAssembler::cmpklass(Address src1, Metadata* obj) { 5669 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 5670 } 5671 5672 void MacroAssembler::cmpklass(Register src1, Metadata* obj) { 5673 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 5674 } 5675 5676 void MacroAssembler::cmpoop(Address src1, jobject obj) { 5677 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5678 } 5679 5680 void MacroAssembler::cmpoop(Register src1, jobject obj) { 5681 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5682 } 5683 5684 void MacroAssembler::extend_sign(Register hi, Register lo) { 5685 // According to Intel Doc. AP-526, "Integer Divide", p.18. 5686 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 5687 cdql(); 5688 } else { 5689 movl(hi, lo); 5690 sarl(hi, 31); 5691 } 5692 } 5693 5694 void MacroAssembler::jC2(Register tmp, Label& L) { 5695 // set parity bit if FPU flag C2 is set (via rax) 5696 save_rax(tmp); 5697 fwait(); fnstsw_ax(); 5698 sahf(); 5699 restore_rax(tmp); 5700 // branch 5701 jcc(Assembler::parity, L); 5702 } 5703 5704 void MacroAssembler::jnC2(Register tmp, Label& L) { 5705 // set parity bit if FPU flag C2 is set (via rax) 5706 save_rax(tmp); 5707 fwait(); fnstsw_ax(); 5708 sahf(); 5709 restore_rax(tmp); 5710 // branch 5711 jcc(Assembler::noParity, L); 5712 } 5713 5714 // 32bit can do a case table jump in one instruction but we no longer allow the base 5715 // to be installed in the Address class 5716 void MacroAssembler::jump(ArrayAddress entry) { 5717 jmp(as_Address(entry)); 5718 } 5719 5720 // Note: y_lo will be destroyed 5721 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 5722 // Long compare for Java (semantics as described in JVM spec.) 5723 Label high, low, done; 5724 5725 cmpl(x_hi, y_hi); 5726 jcc(Assembler::less, low); 5727 jcc(Assembler::greater, high); 5728 // x_hi is the return register 5729 xorl(x_hi, x_hi); 5730 cmpl(x_lo, y_lo); 5731 jcc(Assembler::below, low); 5732 jcc(Assembler::equal, done); 5733 5734 bind(high); 5735 xorl(x_hi, x_hi); 5736 increment(x_hi); 5737 jmp(done); 5738 5739 bind(low); 5740 xorl(x_hi, x_hi); 5741 decrementl(x_hi); 5742 5743 bind(done); 5744 } 5745 5746 void MacroAssembler::lea(Register dst, AddressLiteral src) { 5747 mov_literal32(dst, (int32_t)src.target(), src.rspec()); 5748 } 5749 5750 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 5751 // leal(dst, as_Address(adr)); 5752 // see note in movl as to why we must use a move 5753 mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); 5754 } 5755 5756 void MacroAssembler::leave() { 5757 mov(rsp, rbp); 5758 pop(rbp); 5759 } 5760 5761 void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { 5762 // Multiplication of two Java long values stored on the stack 5763 // as illustrated below. Result is in rdx:rax. 5764 // 5765 // rsp ---> [ ?? ] \ \ 5766 // .... | y_rsp_offset | 5767 // [ y_lo ] / (in bytes) | x_rsp_offset 5768 // [ y_hi ] | (in bytes) 5769 // .... | 5770 // [ x_lo ] / 5771 // [ x_hi ] 5772 // .... 5773 // 5774 // Basic idea: lo(result) = lo(x_lo * y_lo) 5775 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 5776 Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); 5777 Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); 5778 Label quick; 5779 // load x_hi, y_hi and check if quick 5780 // multiplication is possible 5781 movl(rbx, x_hi); 5782 movl(rcx, y_hi); 5783 movl(rax, rbx); 5784 orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 5785 jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply 5786 // do full multiplication 5787 // 1st step 5788 mull(y_lo); // x_hi * y_lo 5789 movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, 5790 // 2nd step 5791 movl(rax, x_lo); 5792 mull(rcx); // x_lo * y_hi 5793 addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, 5794 // 3rd step 5795 bind(quick); // note: rbx, = 0 if quick multiply! 5796 movl(rax, x_lo); 5797 mull(y_lo); // x_lo * y_lo 5798 addl(rdx, rbx); // correct hi(x_lo * y_lo) 5799 } 5800 5801 void MacroAssembler::lneg(Register hi, Register lo) { 5802 negl(lo); 5803 adcl(hi, 0); 5804 negl(hi); 5805 } 5806 5807 void MacroAssembler::lshl(Register hi, Register lo) { 5808 // Java shift left long support (semantics as described in JVM spec., p.305) 5809 // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) 5810 // shift value is in rcx ! 5811 assert(hi != rcx, "must not use rcx"); 5812 assert(lo != rcx, "must not use rcx"); 5813 const Register s = rcx; // shift count 5814 const int n = BitsPerWord; 5815 Label L; 5816 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5817 cmpl(s, n); // if (s < n) 5818 jcc(Assembler::less, L); // else (s >= n) 5819 movl(hi, lo); // x := x << n 5820 xorl(lo, lo); 5821 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5822 bind(L); // s (mod n) < n 5823 shldl(hi, lo); // x := x << s 5824 shll(lo); 5825 } 5826 5827 5828 void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { 5829 // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) 5830 // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) 5831 assert(hi != rcx, "must not use rcx"); 5832 assert(lo != rcx, "must not use rcx"); 5833 const Register s = rcx; // shift count 5834 const int n = BitsPerWord; 5835 Label L; 5836 andl(s, 0x3f); // s := s & 0x3f (s < 0x40) 5837 cmpl(s, n); // if (s < n) 5838 jcc(Assembler::less, L); // else (s >= n) 5839 movl(lo, hi); // x := x >> n 5840 if (sign_extension) sarl(hi, 31); 5841 else xorl(hi, hi); 5842 // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! 5843 bind(L); // s (mod n) < n 5844 shrdl(lo, hi); // x := x >> s 5845 if (sign_extension) sarl(hi); 5846 else shrl(hi); 5847 } 5848 5849 void MacroAssembler::movoop(Register dst, jobject obj) { 5850 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5851 } 5852 5853 void MacroAssembler::movoop(Address dst, jobject obj) { 5854 mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5855 } 5856 5857 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 5858 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 5859 } 5860 5861 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 5862 mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); 5863 } 5864 5865 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 5866 if (src.is_lval()) { 5867 mov_literal32(dst, (intptr_t)src.target(), src.rspec()); 5868 } else { 5869 movl(dst, as_Address(src)); 5870 } 5871 } 5872 5873 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 5874 movl(as_Address(dst), src); 5875 } 5876 5877 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 5878 movl(dst, as_Address(src)); 5879 } 5880 5881 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 5882 void MacroAssembler::movptr(Address dst, intptr_t src) { 5883 movl(dst, src); 5884 } 5885 5886 5887 void MacroAssembler::pop_callee_saved_registers() { 5888 pop(rcx); 5889 pop(rdx); 5890 pop(rdi); 5891 pop(rsi); 5892 } 5893 5894 void MacroAssembler::pop_fTOS() { 5895 fld_d(Address(rsp, 0)); 5896 addl(rsp, 2 * wordSize); 5897 } 5898 5899 void MacroAssembler::push_callee_saved_registers() { 5900 push(rsi); 5901 push(rdi); 5902 push(rdx); 5903 push(rcx); 5904 } 5905 5906 void MacroAssembler::push_fTOS() { 5907 subl(rsp, 2 * wordSize); 5908 fstp_d(Address(rsp, 0)); 5909 } 5910 5911 5912 void MacroAssembler::pushoop(jobject obj) { 5913 push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); 5914 } 5915 5916 void MacroAssembler::pushklass(Metadata* obj) { 5917 push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate()); 5918 } 5919 5920 void MacroAssembler::pushptr(AddressLiteral src) { 5921 if (src.is_lval()) { 5922 push_literal32((int32_t)src.target(), src.rspec()); 5923 } else { 5924 pushl(as_Address(src)); 5925 } 5926 } 5927 5928 void MacroAssembler::set_word_if_not_zero(Register dst) { 5929 xorl(dst, dst); 5930 set_byte_if_not_zero(dst); 5931 } 5932 5933 static void pass_arg0(MacroAssembler* masm, Register arg) { 5934 masm->push(arg); 5935 } 5936 5937 static void pass_arg1(MacroAssembler* masm, Register arg) { 5938 masm->push(arg); 5939 } 5940 5941 static void pass_arg2(MacroAssembler* masm, Register arg) { 5942 masm->push(arg); 5943 } 5944 5945 static void pass_arg3(MacroAssembler* masm, Register arg) { 5946 masm->push(arg); 5947 } 5948 5949 #ifndef PRODUCT 5950 extern "C" void findpc(intptr_t x); 5951 #endif 5952 5953 void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { 5954 // In order to get locks to work, we need to fake a in_VM state 5955 JavaThread* thread = JavaThread::current(); 5956 JavaThreadState saved_state = thread->thread_state(); 5957 thread->set_thread_state(_thread_in_vm); 5958 if (ShowMessageBoxOnError) { 5959 JavaThread* thread = JavaThread::current(); 5960 JavaThreadState saved_state = thread->thread_state(); 5961 thread->set_thread_state(_thread_in_vm); 5962 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 5963 ttyLocker ttyl; 5964 BytecodeCounter::print(); 5965 } 5966 // To see where a verify_oop failed, get $ebx+40/X for this frame. 5967 // This is the value of eip which points to where verify_oop will return. 5968 if (os::message_box(msg, "Execution stopped, print registers?")) { 5969 print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip); 5970 BREAKPOINT; 5971 } 5972 } else { 5973 ttyLocker ttyl; 5974 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); 5975 } 5976 // Don't assert holding the ttyLock 5977 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 5978 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 5979 } 5980 5981 void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) { 5982 ttyLocker ttyl; 5983 FlagSetting fs(Debugging, true); 5984 tty->print_cr("eip = 0x%08x", eip); 5985 #ifndef PRODUCT 5986 if ((WizardMode || Verbose) && PrintMiscellaneous) { 5987 tty->cr(); 5988 findpc(eip); 5989 tty->cr(); 5990 } 5991 #endif 5992 #define PRINT_REG(rax) \ 5993 { tty->print("%s = ", #rax); os::print_location(tty, rax); } 5994 PRINT_REG(rax); 5995 PRINT_REG(rbx); 5996 PRINT_REG(rcx); 5997 PRINT_REG(rdx); 5998 PRINT_REG(rdi); 5999 PRINT_REG(rsi); 6000 PRINT_REG(rbp); 6001 PRINT_REG(rsp); 6002 #undef PRINT_REG 6003 // Print some words near top of staack. 6004 int* dump_sp = (int*) rsp; 6005 for (int col1 = 0; col1 < 8; col1++) { 6006 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); 6007 os::print_location(tty, *dump_sp++); 6008 } 6009 for (int row = 0; row < 16; row++) { 6010 tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); 6011 for (int col = 0; col < 8; col++) { 6012 tty->print(" 0x%08x", *dump_sp++); 6013 } 6014 tty->cr(); 6015 } 6016 // Print some instructions around pc: 6017 Disassembler::decode((address)eip-64, (address)eip); 6018 tty->print_cr("--------"); 6019 Disassembler::decode((address)eip, (address)eip+32); 6020 } 6021 6022 void MacroAssembler::stop(const char* msg) { 6023 ExternalAddress message((address)msg); 6024 // push address of message 6025 pushptr(message.addr()); 6026 { Label L; call(L, relocInfo::none); bind(L); } // push eip 6027 pusha(); // push registers 6028 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); 6029 hlt(); 6030 } 6031 6032 void MacroAssembler::warn(const char* msg) { 6033 push_CPU_state(); 6034 6035 ExternalAddress message((address) msg); 6036 // push address of message 6037 pushptr(message.addr()); 6038 6039 call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); 6040 addl(rsp, wordSize); // discard argument 6041 pop_CPU_state(); 6042 } 6043 6044 void MacroAssembler::print_state() { 6045 { Label L; call(L, relocInfo::none); bind(L); } // push eip 6046 pusha(); // push registers 6047 6048 push_CPU_state(); 6049 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32))); 6050 pop_CPU_state(); 6051 6052 popa(); 6053 addl(rsp, wordSize); 6054 } 6055 6056 #else // _LP64 6057 6058 // 64 bit versions 6059 6060 Address MacroAssembler::as_Address(AddressLiteral adr) { 6061 // amd64 always does this as a pc-rel 6062 // we can be absolute or disp based on the instruction type 6063 // jmp/call are displacements others are absolute 6064 assert(!adr.is_lval(), "must be rval"); 6065 assert(reachable(adr), "must be"); 6066 return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); 6067 6068 } 6069 6070 Address MacroAssembler::as_Address(ArrayAddress adr) { 6071 AddressLiteral base = adr.base(); 6072 lea(rscratch1, base); 6073 Address index = adr.index(); 6074 assert(index._disp == 0, "must not have disp"); // maybe it can? 6075 Address array(rscratch1, index._index, index._scale, index._disp); 6076 return array; 6077 } 6078 6079 int MacroAssembler::biased_locking_enter(Register lock_reg, 6080 Register obj_reg, 6081 Register swap_reg, 6082 Register tmp_reg, 6083 bool swap_reg_contains_mark, 6084 Label& done, 6085 Label* slow_case, 6086 BiasedLockingCounters* counters) { 6087 assert(UseBiasedLocking, "why call this otherwise?"); 6088 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); 6089 assert(tmp_reg != noreg, "tmp_reg must be supplied"); 6090 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); 6091 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); 6092 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); 6093 Address saved_mark_addr(lock_reg, 0); 6094 6095 if (PrintBiasedLockingStatistics && counters == NULL) 6096 counters = BiasedLocking::counters(); 6097 6098 // Biased locking 6099 // See whether the lock is currently biased toward our thread and 6100 // whether the epoch is still valid 6101 // Note that the runtime guarantees sufficient alignment of JavaThread 6102 // pointers to allow age to be placed into low bits 6103 // First check to see whether biasing is even enabled for this object 6104 Label cas_label; 6105 int null_check_offset = -1; 6106 if (!swap_reg_contains_mark) { 6107 null_check_offset = offset(); 6108 movq(swap_reg, mark_addr); 6109 } 6110 movq(tmp_reg, swap_reg); 6111 andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 6112 cmpq(tmp_reg, markOopDesc::biased_lock_pattern); 6113 jcc(Assembler::notEqual, cas_label); 6114 // The bias pattern is present in the object's header. Need to check 6115 // whether the bias owner and the epoch are both still current. 6116 load_prototype_header(tmp_reg, obj_reg); 6117 orq(tmp_reg, r15_thread); 6118 xorq(tmp_reg, swap_reg); 6119 andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); 6120 if (counters != NULL) { 6121 cond_inc32(Assembler::zero, 6122 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 6123 } 6124 jcc(Assembler::equal, done); 6125 6126 Label try_revoke_bias; 6127 Label try_rebias; 6128 6129 // At this point we know that the header has the bias pattern and 6130 // that we are not the bias owner in the current epoch. We need to 6131 // figure out more details about the state of the header in order to 6132 // know what operations can be legally performed on the object's 6133 // header. 6134 6135 // If the low three bits in the xor result aren't clear, that means 6136 // the prototype header is no longer biased and we have to revoke 6137 // the bias on this object. 6138 testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); 6139 jcc(Assembler::notZero, try_revoke_bias); 6140 6141 // Biasing is still enabled for this data type. See whether the 6142 // epoch of the current bias is still valid, meaning that the epoch 6143 // bits of the mark word are equal to the epoch bits of the 6144 // prototype header. (Note that the prototype header's epoch bits 6145 // only change at a safepoint.) If not, attempt to rebias the object 6146 // toward the current thread. Note that we must be absolutely sure 6147 // that the current epoch is invalid in order to do this because 6148 // otherwise the manipulations it performs on the mark word are 6149 // illegal. 6150 testq(tmp_reg, markOopDesc::epoch_mask_in_place); 6151 jcc(Assembler::notZero, try_rebias); 6152 6153 // The epoch of the current bias is still valid but we know nothing 6154 // about the owner; it might be set or it might be clear. Try to 6155 // acquire the bias of the object using an atomic operation. If this 6156 // fails we will go in to the runtime to revoke the object's bias. 6157 // Note that we first construct the presumed unbiased header so we 6158 // don't accidentally blow away another thread's valid bias. 6159 andq(swap_reg, 6160 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); 6161 movq(tmp_reg, swap_reg); 6162 orq(tmp_reg, r15_thread); 6163 if (os::is_MP()) { 6164 lock(); 6165 } 6166 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 6167 // If the biasing toward our thread failed, this means that 6168 // another thread succeeded in biasing it toward itself and we 6169 // need to revoke that bias. The revocation will occur in the 6170 // interpreter runtime in the slow case. 6171 if (counters != NULL) { 6172 cond_inc32(Assembler::zero, 6173 ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); 6174 } 6175 if (slow_case != NULL) { 6176 jcc(Assembler::notZero, *slow_case); 6177 } 6178 jmp(done); 6179 6180 bind(try_rebias); 6181 // At this point we know the epoch has expired, meaning that the 6182 // current "bias owner", if any, is actually invalid. Under these 6183 // circumstances _only_, we are allowed to use the current header's 6184 // value as the comparison value when doing the cas to acquire the 6185 // bias in the current epoch. In other words, we allow transfer of 6186 // the bias from one thread to another directly in this situation. 6187 // 6188 // FIXME: due to a lack of registers we currently blow away the age 6189 // bits in this situation. Should attempt to preserve them. 6190 load_prototype_header(tmp_reg, obj_reg); 6191 orq(tmp_reg, r15_thread); 6192 if (os::is_MP()) { 6193 lock(); 6194 } 6195 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 6196 // If the biasing toward our thread failed, then another thread 6197 // succeeded in biasing it toward itself and we need to revoke that 6198 // bias. The revocation will occur in the runtime in the slow case. 6199 if (counters != NULL) { 6200 cond_inc32(Assembler::zero, 6201 ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); 6202 } 6203 if (slow_case != NULL) { 6204 jcc(Assembler::notZero, *slow_case); 6205 } 6206 jmp(done); 6207 6208 bind(try_revoke_bias); 6209 // The prototype mark in the klass doesn't have the bias bit set any 6210 // more, indicating that objects of this data type are not supposed 6211 // to be biased any more. We are going to try to reset the mark of 6212 // this object to the prototype value and fall through to the 6213 // CAS-based locking scheme. Note that if our CAS fails, it means 6214 // that another thread raced us for the privilege of revoking the 6215 // bias of this particular object, so it's okay to continue in the 6216 // normal locking code. 6217 // 6218 // FIXME: due to a lack of registers we currently blow away the age 6219 // bits in this situation. Should attempt to preserve them. 6220 load_prototype_header(tmp_reg, obj_reg); 6221 if (os::is_MP()) { 6222 lock(); 6223 } 6224 cmpxchgq(tmp_reg, Address(obj_reg, 0)); 6225 // Fall through to the normal CAS-based lock, because no matter what 6226 // the result of the above CAS, some thread must have succeeded in 6227 // removing the bias bit from the object's header. 6228 if (counters != NULL) { 6229 cond_inc32(Assembler::zero, 6230 ExternalAddress((address) counters->revoked_lock_entry_count_addr())); 6231 } 6232 6233 bind(cas_label); 6234 6235 return null_check_offset; 6236 } 6237 6238 void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { 6239 Label L, E; 6240 6241 #ifdef _WIN64 6242 // Windows always allocates space for it's register args 6243 assert(num_args <= 4, "only register arguments supported"); 6244 subq(rsp, frame::arg_reg_save_area_bytes); 6245 #endif 6246 6247 // Align stack if necessary 6248 testl(rsp, 15); 6249 jcc(Assembler::zero, L); 6250 6251 subq(rsp, 8); 6252 { 6253 call(RuntimeAddress(entry_point)); 6254 } 6255 addq(rsp, 8); 6256 jmp(E); 6257 6258 bind(L); 6259 { 6260 call(RuntimeAddress(entry_point)); 6261 } 6262 6263 bind(E); 6264 6265 #ifdef _WIN64 6266 // restore stack pointer 6267 addq(rsp, frame::arg_reg_save_area_bytes); 6268 #endif 6269 6270 } 6271 6272 void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { 6273 assert(!src2.is_lval(), "should use cmpptr"); 6274 6275 if (reachable(src2)) { 6276 cmpq(src1, as_Address(src2)); 6277 } else { 6278 lea(rscratch1, src2); 6279 Assembler::cmpq(src1, Address(rscratch1, 0)); 6280 } 6281 } 6282 6283 int MacroAssembler::corrected_idivq(Register reg) { 6284 // Full implementation of Java ldiv and lrem; checks for special 6285 // case as described in JVM spec., p.243 & p.271. The function 6286 // returns the (pc) offset of the idivl instruction - may be needed 6287 // for implicit exceptions. 6288 // 6289 // normal case special case 6290 // 6291 // input : rax: dividend min_long 6292 // reg: divisor (may not be eax/edx) -1 6293 // 6294 // output: rax: quotient (= rax idiv reg) min_long 6295 // rdx: remainder (= rax irem reg) 0 6296 assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); 6297 static const int64_t min_long = 0x8000000000000000; 6298 Label normal_case, special_case; 6299 6300 // check for special case 6301 cmp64(rax, ExternalAddress((address) &min_long)); 6302 jcc(Assembler::notEqual, normal_case); 6303 xorl(rdx, rdx); // prepare rdx for possible special case (where 6304 // remainder = 0) 6305 cmpq(reg, -1); 6306 jcc(Assembler::equal, special_case); 6307 6308 // handle normal case 6309 bind(normal_case); 6310 cdqq(); 6311 int idivq_offset = offset(); 6312 idivq(reg); 6313 6314 // normal and special case exit 6315 bind(special_case); 6316 6317 return idivq_offset; 6318 } 6319 6320 void MacroAssembler::decrementq(Register reg, int value) { 6321 if (value == min_jint) { subq(reg, value); return; } 6322 if (value < 0) { incrementq(reg, -value); return; } 6323 if (value == 0) { ; return; } 6324 if (value == 1 && UseIncDec) { decq(reg) ; return; } 6325 /* else */ { subq(reg, value) ; return; } 6326 } 6327 6328 void MacroAssembler::decrementq(Address dst, int value) { 6329 if (value == min_jint) { subq(dst, value); return; } 6330 if (value < 0) { incrementq(dst, -value); return; } 6331 if (value == 0) { ; return; } 6332 if (value == 1 && UseIncDec) { decq(dst) ; return; } 6333 /* else */ { subq(dst, value) ; return; } 6334 } 6335 6336 void MacroAssembler::incrementq(Register reg, int value) { 6337 if (value == min_jint) { addq(reg, value); return; } 6338 if (value < 0) { decrementq(reg, -value); return; } 6339 if (value == 0) { ; return; } 6340 if (value == 1 && UseIncDec) { incq(reg) ; return; } 6341 /* else */ { addq(reg, value) ; return; } 6342 } 6343 6344 void MacroAssembler::incrementq(Address dst, int value) { 6345 if (value == min_jint) { addq(dst, value); return; } 6346 if (value < 0) { decrementq(dst, -value); return; } 6347 if (value == 0) { ; return; } 6348 if (value == 1 && UseIncDec) { incq(dst) ; return; } 6349 /* else */ { addq(dst, value) ; return; } 6350 } 6351 6352 // 32bit can do a case table jump in one instruction but we no longer allow the base 6353 // to be installed in the Address class 6354 void MacroAssembler::jump(ArrayAddress entry) { 6355 lea(rscratch1, entry.base()); 6356 Address dispatch = entry.index(); 6357 assert(dispatch._base == noreg, "must be"); 6358 dispatch._base = rscratch1; 6359 jmp(dispatch); 6360 } 6361 6362 void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { 6363 ShouldNotReachHere(); // 64bit doesn't use two regs 6364 cmpq(x_lo, y_lo); 6365 } 6366 6367 void MacroAssembler::lea(Register dst, AddressLiteral src) { 6368 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 6369 } 6370 6371 void MacroAssembler::lea(Address dst, AddressLiteral adr) { 6372 mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); 6373 movptr(dst, rscratch1); 6374 } 6375 6376 void MacroAssembler::leave() { 6377 // %%% is this really better? Why not on 32bit too? 6378 emit_byte(0xC9); // LEAVE 6379 } 6380 6381 void MacroAssembler::lneg(Register hi, Register lo) { 6382 ShouldNotReachHere(); // 64bit doesn't use two regs 6383 negq(lo); 6384 } 6385 6386 void MacroAssembler::movoop(Register dst, jobject obj) { 6387 mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 6388 } 6389 6390 void MacroAssembler::movoop(Address dst, jobject obj) { 6391 mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); 6392 movq(dst, rscratch1); 6393 } 6394 6395 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { 6396 mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 6397 } 6398 6399 void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { 6400 mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); 6401 movq(dst, rscratch1); 6402 } 6403 6404 void MacroAssembler::movptr(Register dst, AddressLiteral src) { 6405 if (src.is_lval()) { 6406 mov_literal64(dst, (intptr_t)src.target(), src.rspec()); 6407 } else { 6408 if (reachable(src)) { 6409 movq(dst, as_Address(src)); 6410 } else { 6411 lea(rscratch1, src); 6412 movq(dst, Address(rscratch1,0)); 6413 } 6414 } 6415 } 6416 6417 void MacroAssembler::movptr(ArrayAddress dst, Register src) { 6418 movq(as_Address(dst), src); 6419 } 6420 6421 void MacroAssembler::movptr(Register dst, ArrayAddress src) { 6422 movq(dst, as_Address(src)); 6423 } 6424 6425 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 6426 void MacroAssembler::movptr(Address dst, intptr_t src) { 6427 mov64(rscratch1, src); 6428 movq(dst, rscratch1); 6429 } 6430 6431 // These are mostly for initializing NULL 6432 void MacroAssembler::movptr(Address dst, int32_t src) { 6433 movslq(dst, src); 6434 } 6435 6436 void MacroAssembler::movptr(Register dst, int32_t src) { 6437 mov64(dst, (intptr_t)src); 6438 } 6439 6440 void MacroAssembler::pushoop(jobject obj) { 6441 movoop(rscratch1, obj); 6442 push(rscratch1); 6443 } 6444 6445 void MacroAssembler::pushklass(Metadata* obj) { 6446 mov_metadata(rscratch1, obj); 6447 push(rscratch1); 6448 } 6449 6450 void MacroAssembler::pushptr(AddressLiteral src) { 6451 lea(rscratch1, src); 6452 if (src.is_lval()) { 6453 push(rscratch1); 6454 } else { 6455 pushq(Address(rscratch1, 0)); 6456 } 6457 } 6458 6459 void MacroAssembler::reset_last_Java_frame(bool clear_fp, 6460 bool clear_pc) { 6461 // we must set sp to zero to clear frame 6462 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 6463 // must clear fp, so that compiled frames are not confused; it is 6464 // possible that we need it only for debugging 6465 if (clear_fp) { 6466 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 6467 } 6468 6469 if (clear_pc) { 6470 movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 6471 } 6472 } 6473 6474 void MacroAssembler::set_last_Java_frame(Register last_java_sp, 6475 Register last_java_fp, 6476 address last_java_pc) { 6477 // determine last_java_sp register 6478 if (!last_java_sp->is_valid()) { 6479 last_java_sp = rsp; 6480 } 6481 6482 // last_java_fp is optional 6483 if (last_java_fp->is_valid()) { 6484 movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), 6485 last_java_fp); 6486 } 6487 6488 // last_java_pc is optional 6489 if (last_java_pc != NULL) { 6490 Address java_pc(r15_thread, 6491 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); 6492 lea(rscratch1, InternalAddress(last_java_pc)); 6493 movptr(java_pc, rscratch1); 6494 } 6495 6496 movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 6497 } 6498 6499 static void pass_arg0(MacroAssembler* masm, Register arg) { 6500 if (c_rarg0 != arg ) { 6501 masm->mov(c_rarg0, arg); 6502 } 6503 } 6504 6505 static void pass_arg1(MacroAssembler* masm, Register arg) { 6506 if (c_rarg1 != arg ) { 6507 masm->mov(c_rarg1, arg); 6508 } 6509 } 6510 6511 static void pass_arg2(MacroAssembler* masm, Register arg) { 6512 if (c_rarg2 != arg ) { 6513 masm->mov(c_rarg2, arg); 6514 } 6515 } 6516 6517 static void pass_arg3(MacroAssembler* masm, Register arg) { 6518 if (c_rarg3 != arg ) { 6519 masm->mov(c_rarg3, arg); 6520 } 6521 } 6522 6523 void MacroAssembler::stop(const char* msg) { 6524 address rip = pc(); 6525 pusha(); // get regs on stack 6526 lea(c_rarg0, ExternalAddress((address) msg)); 6527 lea(c_rarg1, InternalAddress(rip)); 6528 movq(c_rarg2, rsp); // pass pointer to regs array 6529 andq(rsp, -16); // align stack as required by ABI 6530 call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); 6531 hlt(); 6532 } 6533 6534 void MacroAssembler::warn(const char* msg) { 6535 push(rbp); 6536 movq(rbp, rsp); 6537 andq(rsp, -16); // align stack as required by push_CPU_state and call 6538 push_CPU_state(); // keeps alignment at 16 bytes 6539 lea(c_rarg0, ExternalAddress((address) msg)); 6540 call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); 6541 pop_CPU_state(); 6542 mov(rsp, rbp); 6543 pop(rbp); 6544 } 6545 6546 void MacroAssembler::print_state() { 6547 address rip = pc(); 6548 pusha(); // get regs on stack 6549 push(rbp); 6550 movq(rbp, rsp); 6551 andq(rsp, -16); // align stack as required by push_CPU_state and call 6552 push_CPU_state(); // keeps alignment at 16 bytes 6553 6554 lea(c_rarg0, InternalAddress(rip)); 6555 lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array 6556 call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1); 6557 6558 pop_CPU_state(); 6559 mov(rsp, rbp); 6560 pop(rbp); 6561 popa(); 6562 } 6563 6564 #ifndef PRODUCT 6565 extern "C" void findpc(intptr_t x); 6566 #endif 6567 6568 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { 6569 // In order to get locks to work, we need to fake a in_VM state 6570 if (ShowMessageBoxOnError) { 6571 JavaThread* thread = JavaThread::current(); 6572 JavaThreadState saved_state = thread->thread_state(); 6573 thread->set_thread_state(_thread_in_vm); 6574 #ifndef PRODUCT 6575 if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { 6576 ttyLocker ttyl; 6577 BytecodeCounter::print(); 6578 } 6579 #endif 6580 // To see where a verify_oop failed, get $ebx+40/X for this frame. 6581 // XXX correct this offset for amd64 6582 // This is the value of eip which points to where verify_oop will return. 6583 if (os::message_box(msg, "Execution stopped, print registers?")) { 6584 print_state64(pc, regs); 6585 BREAKPOINT; 6586 assert(false, "start up GDB"); 6587 } 6588 ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); 6589 } else { 6590 ttyLocker ttyl; 6591 ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", 6592 msg); 6593 assert(false, err_msg("DEBUG MESSAGE: %s", msg)); 6594 } 6595 } 6596 6597 void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) { 6598 ttyLocker ttyl; 6599 FlagSetting fs(Debugging, true); 6600 tty->print_cr("rip = 0x%016lx", pc); 6601 #ifndef PRODUCT 6602 tty->cr(); 6603 findpc(pc); 6604 tty->cr(); 6605 #endif 6606 #define PRINT_REG(rax, value) \ 6607 { tty->print("%s = ", #rax); os::print_location(tty, value); } 6608 PRINT_REG(rax, regs[15]); 6609 PRINT_REG(rbx, regs[12]); 6610 PRINT_REG(rcx, regs[14]); 6611 PRINT_REG(rdx, regs[13]); 6612 PRINT_REG(rdi, regs[8]); 6613 PRINT_REG(rsi, regs[9]); 6614 PRINT_REG(rbp, regs[10]); 6615 PRINT_REG(rsp, regs[11]); 6616 PRINT_REG(r8 , regs[7]); 6617 PRINT_REG(r9 , regs[6]); 6618 PRINT_REG(r10, regs[5]); 6619 PRINT_REG(r11, regs[4]); 6620 PRINT_REG(r12, regs[3]); 6621 PRINT_REG(r13, regs[2]); 6622 PRINT_REG(r14, regs[1]); 6623 PRINT_REG(r15, regs[0]); 6624 #undef PRINT_REG 6625 // Print some words near top of staack. 6626 int64_t* rsp = (int64_t*) regs[11]; 6627 int64_t* dump_sp = rsp; 6628 for (int col1 = 0; col1 < 8; col1++) { 6629 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); 6630 os::print_location(tty, *dump_sp++); 6631 } 6632 for (int row = 0; row < 25; row++) { 6633 tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); 6634 for (int col = 0; col < 4; col++) { 6635 tty->print(" 0x%016lx", *dump_sp++); 6636 } 6637 tty->cr(); 6638 } 6639 // Print some instructions around pc: 6640 Disassembler::decode((address)pc-64, (address)pc); 6641 tty->print_cr("--------"); 6642 Disassembler::decode((address)pc, (address)pc+32); 6643 } 6644 6645 #endif // _LP64 6646 6647 // Now versions that are common to 32/64 bit 6648 6649 void MacroAssembler::addptr(Register dst, int32_t imm32) { 6650 LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); 6651 } 6652 6653 void MacroAssembler::addptr(Register dst, Register src) { 6654 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 6655 } 6656 6657 void MacroAssembler::addptr(Address dst, Register src) { 6658 LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); 6659 } 6660 6661 void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { 6662 if (reachable(src)) { 6663 Assembler::addsd(dst, as_Address(src)); 6664 } else { 6665 lea(rscratch1, src); 6666 Assembler::addsd(dst, Address(rscratch1, 0)); 6667 } 6668 } 6669 6670 void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { 6671 if (reachable(src)) { 6672 addss(dst, as_Address(src)); 6673 } else { 6674 lea(rscratch1, src); 6675 addss(dst, Address(rscratch1, 0)); 6676 } 6677 } 6678 6679 void MacroAssembler::align(int modulus) { 6680 if (offset() % modulus != 0) { 6681 nop(modulus - (offset() % modulus)); 6682 } 6683 } 6684 6685 void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { 6686 // Used in sign-masking with aligned address. 6687 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 6688 if (reachable(src)) { 6689 Assembler::andpd(dst, as_Address(src)); 6690 } else { 6691 lea(rscratch1, src); 6692 Assembler::andpd(dst, Address(rscratch1, 0)); 6693 } 6694 } 6695 6696 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { 6697 // Used in sign-masking with aligned address. 6698 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 6699 if (reachable(src)) { 6700 Assembler::andps(dst, as_Address(src)); 6701 } else { 6702 lea(rscratch1, src); 6703 Assembler::andps(dst, Address(rscratch1, 0)); 6704 } 6705 } 6706 6707 void MacroAssembler::andptr(Register dst, int32_t imm32) { 6708 LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); 6709 } 6710 6711 void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { 6712 pushf(); 6713 if (os::is_MP()) 6714 lock(); 6715 incrementl(counter_addr); 6716 popf(); 6717 } 6718 6719 // Writes to stack successive pages until offset reached to check for 6720 // stack overflow + shadow pages. This clobbers tmp. 6721 void MacroAssembler::bang_stack_size(Register size, Register tmp) { 6722 movptr(tmp, rsp); 6723 // Bang stack for total size given plus shadow page size. 6724 // Bang one page at a time because large size can bang beyond yellow and 6725 // red zones. 6726 Label loop; 6727 bind(loop); 6728 movl(Address(tmp, (-os::vm_page_size())), size ); 6729 subptr(tmp, os::vm_page_size()); 6730 subl(size, os::vm_page_size()); 6731 jcc(Assembler::greater, loop); 6732 6733 // Bang down shadow pages too. 6734 // The -1 because we already subtracted 1 page. 6735 for (int i = 0; i< StackShadowPages-1; i++) { 6736 // this could be any sized move but this is can be a debugging crumb 6737 // so the bigger the better. 6738 movptr(Address(tmp, (-i*os::vm_page_size())), size ); 6739 } 6740 } 6741 6742 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { 6743 assert(UseBiasedLocking, "why call this otherwise?"); 6744 6745 // Check for biased locking unlock case, which is a no-op 6746 // Note: we do not have to check the thread ID for two reasons. 6747 // First, the interpreter checks for IllegalMonitorStateException at 6748 // a higher level. Second, if the bias was revoked while we held the 6749 // lock, the object could not be rebiased toward another thread, so 6750 // the bias bit would be clear. 6751 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); 6752 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); 6753 cmpptr(temp_reg, markOopDesc::biased_lock_pattern); 6754 jcc(Assembler::equal, done); 6755 } 6756 6757 void MacroAssembler::c2bool(Register x) { 6758 // implements x == 0 ? 0 : 1 6759 // note: must only look at least-significant byte of x 6760 // since C-style booleans are stored in one byte 6761 // only! (was bug) 6762 andl(x, 0xFF); 6763 setb(Assembler::notZero, x); 6764 } 6765 6766 // Wouldn't need if AddressLiteral version had new name 6767 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { 6768 Assembler::call(L, rtype); 6769 } 6770 6771 void MacroAssembler::call(Register entry) { 6772 Assembler::call(entry); 6773 } 6774 6775 void MacroAssembler::call(AddressLiteral entry) { 6776 if (reachable(entry)) { 6777 Assembler::call_literal(entry.target(), entry.rspec()); 6778 } else { 6779 lea(rscratch1, entry); 6780 Assembler::call(rscratch1); 6781 } 6782 } 6783 6784 void MacroAssembler::ic_call(address entry) { 6785 RelocationHolder rh = virtual_call_Relocation::spec(pc()); 6786 movptr(rax, (intptr_t)Universe::non_oop_word()); 6787 call(AddressLiteral(entry, rh)); 6788 } 6789 6790 // Implementation of call_VM versions 6791 6792 void MacroAssembler::call_VM(Register oop_result, 6793 address entry_point, 6794 bool check_exceptions) { 6795 Label C, E; 6796 call(C, relocInfo::none); 6797 jmp(E); 6798 6799 bind(C); 6800 call_VM_helper(oop_result, entry_point, 0, check_exceptions); 6801 ret(0); 6802 6803 bind(E); 6804 } 6805 6806 void MacroAssembler::call_VM(Register oop_result, 6807 address entry_point, 6808 Register arg_1, 6809 bool check_exceptions) { 6810 Label C, E; 6811 call(C, relocInfo::none); 6812 jmp(E); 6813 6814 bind(C); 6815 pass_arg1(this, arg_1); 6816 call_VM_helper(oop_result, entry_point, 1, check_exceptions); 6817 ret(0); 6818 6819 bind(E); 6820 } 6821 6822 void MacroAssembler::call_VM(Register oop_result, 6823 address entry_point, 6824 Register arg_1, 6825 Register arg_2, 6826 bool check_exceptions) { 6827 Label C, E; 6828 call(C, relocInfo::none); 6829 jmp(E); 6830 6831 bind(C); 6832 6833 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6834 6835 pass_arg2(this, arg_2); 6836 pass_arg1(this, arg_1); 6837 call_VM_helper(oop_result, entry_point, 2, check_exceptions); 6838 ret(0); 6839 6840 bind(E); 6841 } 6842 6843 void MacroAssembler::call_VM(Register oop_result, 6844 address entry_point, 6845 Register arg_1, 6846 Register arg_2, 6847 Register arg_3, 6848 bool check_exceptions) { 6849 Label C, E; 6850 call(C, relocInfo::none); 6851 jmp(E); 6852 6853 bind(C); 6854 6855 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6856 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6857 pass_arg3(this, arg_3); 6858 6859 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6860 pass_arg2(this, arg_2); 6861 6862 pass_arg1(this, arg_1); 6863 call_VM_helper(oop_result, entry_point, 3, check_exceptions); 6864 ret(0); 6865 6866 bind(E); 6867 } 6868 6869 void MacroAssembler::call_VM(Register oop_result, 6870 Register last_java_sp, 6871 address entry_point, 6872 int number_of_arguments, 6873 bool check_exceptions) { 6874 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6875 call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6876 } 6877 6878 void MacroAssembler::call_VM(Register oop_result, 6879 Register last_java_sp, 6880 address entry_point, 6881 Register arg_1, 6882 bool check_exceptions) { 6883 pass_arg1(this, arg_1); 6884 call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6885 } 6886 6887 void MacroAssembler::call_VM(Register oop_result, 6888 Register last_java_sp, 6889 address entry_point, 6890 Register arg_1, 6891 Register arg_2, 6892 bool check_exceptions) { 6893 6894 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6895 pass_arg2(this, arg_2); 6896 pass_arg1(this, arg_1); 6897 call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6898 } 6899 6900 void MacroAssembler::call_VM(Register oop_result, 6901 Register last_java_sp, 6902 address entry_point, 6903 Register arg_1, 6904 Register arg_2, 6905 Register arg_3, 6906 bool check_exceptions) { 6907 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6908 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6909 pass_arg3(this, arg_3); 6910 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6911 pass_arg2(this, arg_2); 6912 pass_arg1(this, arg_1); 6913 call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6914 } 6915 6916 void MacroAssembler::super_call_VM(Register oop_result, 6917 Register last_java_sp, 6918 address entry_point, 6919 int number_of_arguments, 6920 bool check_exceptions) { 6921 Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); 6922 MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); 6923 } 6924 6925 void MacroAssembler::super_call_VM(Register oop_result, 6926 Register last_java_sp, 6927 address entry_point, 6928 Register arg_1, 6929 bool check_exceptions) { 6930 pass_arg1(this, arg_1); 6931 super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); 6932 } 6933 6934 void MacroAssembler::super_call_VM(Register oop_result, 6935 Register last_java_sp, 6936 address entry_point, 6937 Register arg_1, 6938 Register arg_2, 6939 bool check_exceptions) { 6940 6941 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6942 pass_arg2(this, arg_2); 6943 pass_arg1(this, arg_1); 6944 super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); 6945 } 6946 6947 void MacroAssembler::super_call_VM(Register oop_result, 6948 Register last_java_sp, 6949 address entry_point, 6950 Register arg_1, 6951 Register arg_2, 6952 Register arg_3, 6953 bool check_exceptions) { 6954 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 6955 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 6956 pass_arg3(this, arg_3); 6957 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 6958 pass_arg2(this, arg_2); 6959 pass_arg1(this, arg_1); 6960 super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); 6961 } 6962 6963 void MacroAssembler::call_VM_base(Register oop_result, 6964 Register java_thread, 6965 Register last_java_sp, 6966 address entry_point, 6967 int number_of_arguments, 6968 bool check_exceptions) { 6969 // determine java_thread register 6970 if (!java_thread->is_valid()) { 6971 #ifdef _LP64 6972 java_thread = r15_thread; 6973 #else 6974 java_thread = rdi; 6975 get_thread(java_thread); 6976 #endif // LP64 6977 } 6978 // determine last_java_sp register 6979 if (!last_java_sp->is_valid()) { 6980 last_java_sp = rsp; 6981 } 6982 // debugging support 6983 assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); 6984 LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); 6985 #ifdef ASSERT 6986 // TraceBytecodes does not use r12 but saves it over the call, so don't verify 6987 // r12 is the heapbase. 6988 LP64_ONLY(if ((UseCompressedOops || UseCompressedKlassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");) 6989 #endif // ASSERT 6990 6991 assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); 6992 assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); 6993 6994 // push java thread (becomes first argument of C function) 6995 6996 NOT_LP64(push(java_thread); number_of_arguments++); 6997 LP64_ONLY(mov(c_rarg0, r15_thread)); 6998 6999 // set last Java frame before call 7000 assert(last_java_sp != rbp, "can't use ebp/rbp"); 7001 7002 // Only interpreter should have to set fp 7003 set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); 7004 7005 // do the call, remove parameters 7006 MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); 7007 7008 // restore the thread (cannot use the pushed argument since arguments 7009 // may be overwritten by C code generated by an optimizing compiler); 7010 // however can use the register value directly if it is callee saved. 7011 if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { 7012 // rdi & rsi (also r15) are callee saved -> nothing to do 7013 #ifdef ASSERT 7014 guarantee(java_thread != rax, "change this code"); 7015 push(rax); 7016 { Label L; 7017 get_thread(rax); 7018 cmpptr(java_thread, rax); 7019 jcc(Assembler::equal, L); 7020 STOP("MacroAssembler::call_VM_base: rdi not callee saved?"); 7021 bind(L); 7022 } 7023 pop(rax); 7024 #endif 7025 } else { 7026 get_thread(java_thread); 7027 } 7028 // reset last Java frame 7029 // Only interpreter should have to clear fp 7030 reset_last_Java_frame(java_thread, true, false); 7031 7032 #ifndef CC_INTERP 7033 // C++ interp handles this in the interpreter 7034 check_and_handle_popframe(java_thread); 7035 check_and_handle_earlyret(java_thread); 7036 #endif /* CC_INTERP */ 7037 7038 if (check_exceptions) { 7039 // check for pending exceptions (java_thread is set upon return) 7040 cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); 7041 #ifndef _LP64 7042 jump_cc(Assembler::notEqual, 7043 RuntimeAddress(StubRoutines::forward_exception_entry())); 7044 #else 7045 // This used to conditionally jump to forward_exception however it is 7046 // possible if we relocate that the branch will not reach. So we must jump 7047 // around so we can always reach 7048 7049 Label ok; 7050 jcc(Assembler::equal, ok); 7051 jump(RuntimeAddress(StubRoutines::forward_exception_entry())); 7052 bind(ok); 7053 #endif // LP64 7054 } 7055 7056 // get oop result if there is one and reset the value in the thread 7057 if (oop_result->is_valid()) { 7058 get_vm_result(oop_result, java_thread); 7059 } 7060 } 7061 7062 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { 7063 7064 // Calculate the value for last_Java_sp 7065 // somewhat subtle. call_VM does an intermediate call 7066 // which places a return address on the stack just under the 7067 // stack pointer as the user finsihed with it. This allows 7068 // use to retrieve last_Java_pc from last_Java_sp[-1]. 7069 // On 32bit we then have to push additional args on the stack to accomplish 7070 // the actual requested call. On 64bit call_VM only can use register args 7071 // so the only extra space is the return address that call_VM created. 7072 // This hopefully explains the calculations here. 7073 7074 #ifdef _LP64 7075 // We've pushed one address, correct last_Java_sp 7076 lea(rax, Address(rsp, wordSize)); 7077 #else 7078 lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); 7079 #endif // LP64 7080 7081 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); 7082 7083 } 7084 7085 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { 7086 call_VM_leaf_base(entry_point, number_of_arguments); 7087 } 7088 7089 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { 7090 pass_arg0(this, arg_0); 7091 call_VM_leaf(entry_point, 1); 7092 } 7093 7094 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 7095 7096 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7097 pass_arg1(this, arg_1); 7098 pass_arg0(this, arg_0); 7099 call_VM_leaf(entry_point, 2); 7100 } 7101 7102 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 7103 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 7104 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 7105 pass_arg2(this, arg_2); 7106 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7107 pass_arg1(this, arg_1); 7108 pass_arg0(this, arg_0); 7109 call_VM_leaf(entry_point, 3); 7110 } 7111 7112 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { 7113 pass_arg0(this, arg_0); 7114 MacroAssembler::call_VM_leaf_base(entry_point, 1); 7115 } 7116 7117 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { 7118 7119 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7120 pass_arg1(this, arg_1); 7121 pass_arg0(this, arg_0); 7122 MacroAssembler::call_VM_leaf_base(entry_point, 2); 7123 } 7124 7125 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { 7126 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 7127 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 7128 pass_arg2(this, arg_2); 7129 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7130 pass_arg1(this, arg_1); 7131 pass_arg0(this, arg_0); 7132 MacroAssembler::call_VM_leaf_base(entry_point, 3); 7133 } 7134 7135 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { 7136 LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); 7137 LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); 7138 LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); 7139 pass_arg3(this, arg_3); 7140 LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); 7141 LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); 7142 pass_arg2(this, arg_2); 7143 LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); 7144 pass_arg1(this, arg_1); 7145 pass_arg0(this, arg_0); 7146 MacroAssembler::call_VM_leaf_base(entry_point, 4); 7147 } 7148 7149 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { 7150 movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); 7151 movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); 7152 verify_oop(oop_result, "broken oop in call_VM_base"); 7153 } 7154 7155 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { 7156 movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); 7157 movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD); 7158 } 7159 7160 void MacroAssembler::check_and_handle_earlyret(Register java_thread) { 7161 } 7162 7163 void MacroAssembler::check_and_handle_popframe(Register java_thread) { 7164 } 7165 7166 void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { 7167 if (reachable(src1)) { 7168 cmpl(as_Address(src1), imm); 7169 } else { 7170 lea(rscratch1, src1); 7171 cmpl(Address(rscratch1, 0), imm); 7172 } 7173 } 7174 7175 void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { 7176 assert(!src2.is_lval(), "use cmpptr"); 7177 if (reachable(src2)) { 7178 cmpl(src1, as_Address(src2)); 7179 } else { 7180 lea(rscratch1, src2); 7181 cmpl(src1, Address(rscratch1, 0)); 7182 } 7183 } 7184 7185 void MacroAssembler::cmp32(Register src1, int32_t imm) { 7186 Assembler::cmpl(src1, imm); 7187 } 7188 7189 void MacroAssembler::cmp32(Register src1, Address src2) { 7190 Assembler::cmpl(src1, src2); 7191 } 7192 7193 void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 7194 ucomisd(opr1, opr2); 7195 7196 Label L; 7197 if (unordered_is_less) { 7198 movl(dst, -1); 7199 jcc(Assembler::parity, L); 7200 jcc(Assembler::below , L); 7201 movl(dst, 0); 7202 jcc(Assembler::equal , L); 7203 increment(dst); 7204 } else { // unordered is greater 7205 movl(dst, 1); 7206 jcc(Assembler::parity, L); 7207 jcc(Assembler::above , L); 7208 movl(dst, 0); 7209 jcc(Assembler::equal , L); 7210 decrementl(dst); 7211 } 7212 bind(L); 7213 } 7214 7215 void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { 7216 ucomiss(opr1, opr2); 7217 7218 Label L; 7219 if (unordered_is_less) { 7220 movl(dst, -1); 7221 jcc(Assembler::parity, L); 7222 jcc(Assembler::below , L); 7223 movl(dst, 0); 7224 jcc(Assembler::equal , L); 7225 increment(dst); 7226 } else { // unordered is greater 7227 movl(dst, 1); 7228 jcc(Assembler::parity, L); 7229 jcc(Assembler::above , L); 7230 movl(dst, 0); 7231 jcc(Assembler::equal , L); 7232 decrementl(dst); 7233 } 7234 bind(L); 7235 } 7236 7237 7238 void MacroAssembler::cmp8(AddressLiteral src1, int imm) { 7239 if (reachable(src1)) { 7240 cmpb(as_Address(src1), imm); 7241 } else { 7242 lea(rscratch1, src1); 7243 cmpb(Address(rscratch1, 0), imm); 7244 } 7245 } 7246 7247 void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { 7248 #ifdef _LP64 7249 if (src2.is_lval()) { 7250 movptr(rscratch1, src2); 7251 Assembler::cmpq(src1, rscratch1); 7252 } else if (reachable(src2)) { 7253 cmpq(src1, as_Address(src2)); 7254 } else { 7255 lea(rscratch1, src2); 7256 Assembler::cmpq(src1, Address(rscratch1, 0)); 7257 } 7258 #else 7259 if (src2.is_lval()) { 7260 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 7261 } else { 7262 cmpl(src1, as_Address(src2)); 7263 } 7264 #endif // _LP64 7265 } 7266 7267 void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { 7268 assert(src2.is_lval(), "not a mem-mem compare"); 7269 #ifdef _LP64 7270 // moves src2's literal address 7271 movptr(rscratch1, src2); 7272 Assembler::cmpq(src1, rscratch1); 7273 #else 7274 cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); 7275 #endif // _LP64 7276 } 7277 7278 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { 7279 if (reachable(adr)) { 7280 if (os::is_MP()) 7281 lock(); 7282 cmpxchgptr(reg, as_Address(adr)); 7283 } else { 7284 lea(rscratch1, adr); 7285 if (os::is_MP()) 7286 lock(); 7287 cmpxchgptr(reg, Address(rscratch1, 0)); 7288 } 7289 } 7290 7291 void MacroAssembler::cmpxchgptr(Register reg, Address adr) { 7292 LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); 7293 } 7294 7295 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { 7296 if (reachable(src)) { 7297 Assembler::comisd(dst, as_Address(src)); 7298 } else { 7299 lea(rscratch1, src); 7300 Assembler::comisd(dst, Address(rscratch1, 0)); 7301 } 7302 } 7303 7304 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { 7305 if (reachable(src)) { 7306 Assembler::comiss(dst, as_Address(src)); 7307 } else { 7308 lea(rscratch1, src); 7309 Assembler::comiss(dst, Address(rscratch1, 0)); 7310 } 7311 } 7312 7313 7314 void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { 7315 Condition negated_cond = negate_condition(cond); 7316 Label L; 7317 jcc(negated_cond, L); 7318 atomic_incl(counter_addr); 7319 bind(L); 7320 } 7321 7322 int MacroAssembler::corrected_idivl(Register reg) { 7323 // Full implementation of Java idiv and irem; checks for 7324 // special case as described in JVM spec., p.243 & p.271. 7325 // The function returns the (pc) offset of the idivl 7326 // instruction - may be needed for implicit exceptions. 7327 // 7328 // normal case special case 7329 // 7330 // input : rax,: dividend min_int 7331 // reg: divisor (may not be rax,/rdx) -1 7332 // 7333 // output: rax,: quotient (= rax, idiv reg) min_int 7334 // rdx: remainder (= rax, irem reg) 0 7335 assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); 7336 const int min_int = 0x80000000; 7337 Label normal_case, special_case; 7338 7339 // check for special case 7340 cmpl(rax, min_int); 7341 jcc(Assembler::notEqual, normal_case); 7342 xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) 7343 cmpl(reg, -1); 7344 jcc(Assembler::equal, special_case); 7345 7346 // handle normal case 7347 bind(normal_case); 7348 cdql(); 7349 int idivl_offset = offset(); 7350 idivl(reg); 7351 7352 // normal and special case exit 7353 bind(special_case); 7354 7355 return idivl_offset; 7356 } 7357 7358 7359 7360 void MacroAssembler::decrementl(Register reg, int value) { 7361 if (value == min_jint) {subl(reg, value) ; return; } 7362 if (value < 0) { incrementl(reg, -value); return; } 7363 if (value == 0) { ; return; } 7364 if (value == 1 && UseIncDec) { decl(reg) ; return; } 7365 /* else */ { subl(reg, value) ; return; } 7366 } 7367 7368 void MacroAssembler::decrementl(Address dst, int value) { 7369 if (value == min_jint) {subl(dst, value) ; return; } 7370 if (value < 0) { incrementl(dst, -value); return; } 7371 if (value == 0) { ; return; } 7372 if (value == 1 && UseIncDec) { decl(dst) ; return; } 7373 /* else */ { subl(dst, value) ; return; } 7374 } 7375 7376 void MacroAssembler::division_with_shift (Register reg, int shift_value) { 7377 assert (shift_value > 0, "illegal shift value"); 7378 Label _is_positive; 7379 testl (reg, reg); 7380 jcc (Assembler::positive, _is_positive); 7381 int offset = (1 << shift_value) - 1 ; 7382 7383 if (offset == 1) { 7384 incrementl(reg); 7385 } else { 7386 addl(reg, offset); 7387 } 7388 7389 bind (_is_positive); 7390 sarl(reg, shift_value); 7391 } 7392 7393 void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { 7394 if (reachable(src)) { 7395 Assembler::divsd(dst, as_Address(src)); 7396 } else { 7397 lea(rscratch1, src); 7398 Assembler::divsd(dst, Address(rscratch1, 0)); 7399 } 7400 } 7401 7402 void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { 7403 if (reachable(src)) { 7404 Assembler::divss(dst, as_Address(src)); 7405 } else { 7406 lea(rscratch1, src); 7407 Assembler::divss(dst, Address(rscratch1, 0)); 7408 } 7409 } 7410 7411 // !defined(COMPILER2) is because of stupid core builds 7412 #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) 7413 void MacroAssembler::empty_FPU_stack() { 7414 if (VM_Version::supports_mmx()) { 7415 emms(); 7416 } else { 7417 for (int i = 8; i-- > 0; ) ffree(i); 7418 } 7419 } 7420 #endif // !LP64 || C1 || !C2 7421 7422 7423 // Defines obj, preserves var_size_in_bytes 7424 void MacroAssembler::eden_allocate(Register obj, 7425 Register var_size_in_bytes, 7426 int con_size_in_bytes, 7427 Register t1, 7428 Label& slow_case) { 7429 assert(obj == rax, "obj must be in rax, for cmpxchg"); 7430 assert_different_registers(obj, var_size_in_bytes, t1); 7431 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 7432 jmp(slow_case); 7433 } else { 7434 Register end = t1; 7435 Label retry; 7436 bind(retry); 7437 ExternalAddress heap_top((address) Universe::heap()->top_addr()); 7438 movptr(obj, heap_top); 7439 if (var_size_in_bytes == noreg) { 7440 lea(end, Address(obj, con_size_in_bytes)); 7441 } else { 7442 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 7443 } 7444 // if end < obj then we wrapped around => object too long => slow case 7445 cmpptr(end, obj); 7446 jcc(Assembler::below, slow_case); 7447 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 7448 jcc(Assembler::above, slow_case); 7449 // Compare obj with the top addr, and if still equal, store the new top addr in 7450 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 7451 // it otherwise. Use lock prefix for atomicity on MPs. 7452 locked_cmpxchgptr(end, heap_top); 7453 jcc(Assembler::notEqual, retry); 7454 } 7455 } 7456 7457 void MacroAssembler::enter() { 7458 push(rbp); 7459 mov(rbp, rsp); 7460 } 7461 7462 // A 5 byte nop that is safe for patching (see patch_verified_entry) 7463 void MacroAssembler::fat_nop() { 7464 if (UseAddressNop) { 7465 addr_nop_5(); 7466 } else { 7467 emit_byte(0x26); // es: 7468 emit_byte(0x2e); // cs: 7469 emit_byte(0x64); // fs: 7470 emit_byte(0x65); // gs: 7471 emit_byte(0x90); 7472 } 7473 } 7474 7475 void MacroAssembler::fcmp(Register tmp) { 7476 fcmp(tmp, 1, true, true); 7477 } 7478 7479 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 7480 assert(!pop_right || pop_left, "usage error"); 7481 if (VM_Version::supports_cmov()) { 7482 assert(tmp == noreg, "unneeded temp"); 7483 if (pop_left) { 7484 fucomip(index); 7485 } else { 7486 fucomi(index); 7487 } 7488 if (pop_right) { 7489 fpop(); 7490 } 7491 } else { 7492 assert(tmp != noreg, "need temp"); 7493 if (pop_left) { 7494 if (pop_right) { 7495 fcompp(); 7496 } else { 7497 fcomp(index); 7498 } 7499 } else { 7500 fcom(index); 7501 } 7502 // convert FPU condition into eflags condition via rax, 7503 save_rax(tmp); 7504 fwait(); fnstsw_ax(); 7505 sahf(); 7506 restore_rax(tmp); 7507 } 7508 // condition codes set as follows: 7509 // 7510 // CF (corresponds to C0) if x < y 7511 // PF (corresponds to C2) if unordered 7512 // ZF (corresponds to C3) if x = y 7513 } 7514 7515 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { 7516 fcmp2int(dst, unordered_is_less, 1, true, true); 7517 } 7518 7519 void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { 7520 fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); 7521 Label L; 7522 if (unordered_is_less) { 7523 movl(dst, -1); 7524 jcc(Assembler::parity, L); 7525 jcc(Assembler::below , L); 7526 movl(dst, 0); 7527 jcc(Assembler::equal , L); 7528 increment(dst); 7529 } else { // unordered is greater 7530 movl(dst, 1); 7531 jcc(Assembler::parity, L); 7532 jcc(Assembler::above , L); 7533 movl(dst, 0); 7534 jcc(Assembler::equal , L); 7535 decrementl(dst); 7536 } 7537 bind(L); 7538 } 7539 7540 void MacroAssembler::fld_d(AddressLiteral src) { 7541 fld_d(as_Address(src)); 7542 } 7543 7544 void MacroAssembler::fld_s(AddressLiteral src) { 7545 fld_s(as_Address(src)); 7546 } 7547 7548 void MacroAssembler::fld_x(AddressLiteral src) { 7549 Assembler::fld_x(as_Address(src)); 7550 } 7551 7552 void MacroAssembler::fldcw(AddressLiteral src) { 7553 Assembler::fldcw(as_Address(src)); 7554 } 7555 7556 void MacroAssembler::pow_exp_core_encoding() { 7557 // kills rax, rcx, rdx 7558 subptr(rsp,sizeof(jdouble)); 7559 // computes 2^X. Stack: X ... 7560 // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and 7561 // keep it on the thread's stack to compute 2^int(X) later 7562 // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) 7563 // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) 7564 fld_s(0); // Stack: X X ... 7565 frndint(); // Stack: int(X) X ... 7566 fsuba(1); // Stack: int(X) X-int(X) ... 7567 fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... 7568 f2xm1(); // Stack: 2^(X-int(X))-1 ... 7569 fld1(); // Stack: 1 2^(X-int(X))-1 ... 7570 faddp(1); // Stack: 2^(X-int(X)) 7571 // computes 2^(int(X)): add exponent bias (1023) to int(X), then 7572 // shift int(X)+1023 to exponent position. 7573 // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 7574 // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent 7575 // values so detect them and set result to NaN. 7576 movl(rax,Address(rsp,0)); 7577 movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding 7578 addl(rax, 1023); 7579 movl(rdx,rax); 7580 shll(rax,20); 7581 // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. 7582 addl(rdx,1); 7583 // Check that 1 < int(X)+1023+1 < 2048 7584 // in 3 steps: 7585 // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 7586 // 2- (int(X)+1023+1)&-2048 != 0 7587 // 3- (int(X)+1023+1)&-2048 != 1 7588 // Do 2- first because addl just updated the flags. 7589 cmov32(Assembler::equal,rax,rcx); 7590 cmpl(rdx,1); 7591 cmov32(Assembler::equal,rax,rcx); 7592 testl(rdx,rcx); 7593 cmov32(Assembler::notEqual,rax,rcx); 7594 movl(Address(rsp,4),rax); 7595 movl(Address(rsp,0),0); 7596 fmul_d(Address(rsp,0)); // Stack: 2^X ... 7597 addptr(rsp,sizeof(jdouble)); 7598 } 7599 7600 void MacroAssembler::increase_precision() { 7601 subptr(rsp, BytesPerWord); 7602 fnstcw(Address(rsp, 0)); 7603 movl(rax, Address(rsp, 0)); 7604 orl(rax, 0x300); 7605 push(rax); 7606 fldcw(Address(rsp, 0)); 7607 pop(rax); 7608 } 7609 7610 void MacroAssembler::restore_precision() { 7611 fldcw(Address(rsp, 0)); 7612 addptr(rsp, BytesPerWord); 7613 } 7614 7615 void MacroAssembler::fast_pow() { 7616 // computes X^Y = 2^(Y * log2(X)) 7617 // if fast computation is not possible, result is NaN. Requires 7618 // fallback from user of this macro. 7619 // increase precision for intermediate steps of the computation 7620 increase_precision(); 7621 fyl2x(); // Stack: (Y*log2(X)) ... 7622 pow_exp_core_encoding(); // Stack: exp(X) ... 7623 restore_precision(); 7624 } 7625 7626 void MacroAssembler::fast_exp() { 7627 // computes exp(X) = 2^(X * log2(e)) 7628 // if fast computation is not possible, result is NaN. Requires 7629 // fallback from user of this macro. 7630 // increase precision for intermediate steps of the computation 7631 increase_precision(); 7632 fldl2e(); // Stack: log2(e) X ... 7633 fmulp(1); // Stack: (X*log2(e)) ... 7634 pow_exp_core_encoding(); // Stack: exp(X) ... 7635 restore_precision(); 7636 } 7637 7638 void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { 7639 // kills rax, rcx, rdx 7640 // pow and exp needs 2 extra registers on the fpu stack. 7641 Label slow_case, done; 7642 Register tmp = noreg; 7643 if (!VM_Version::supports_cmov()) { 7644 // fcmp needs a temporary so preserve rdx, 7645 tmp = rdx; 7646 } 7647 Register tmp2 = rax; 7648 Register tmp3 = rcx; 7649 7650 if (is_exp) { 7651 // Stack: X 7652 fld_s(0); // duplicate argument for runtime call. Stack: X X 7653 fast_exp(); // Stack: exp(X) X 7654 fcmp(tmp, 0, false, false); // Stack: exp(X) X 7655 // exp(X) not equal to itself: exp(X) is NaN go to slow case. 7656 jcc(Assembler::parity, slow_case); 7657 // get rid of duplicate argument. Stack: exp(X) 7658 if (num_fpu_regs_in_use > 0) { 7659 fxch(); 7660 fpop(); 7661 } else { 7662 ffree(1); 7663 } 7664 jmp(done); 7665 } else { 7666 // Stack: X Y 7667 Label x_negative, y_odd; 7668 7669 fldz(); // Stack: 0 X Y 7670 fcmp(tmp, 1, true, false); // Stack: X Y 7671 jcc(Assembler::above, x_negative); 7672 7673 // X >= 0 7674 7675 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 7676 fld_s(1); // Stack: X Y X Y 7677 fast_pow(); // Stack: X^Y X Y 7678 fcmp(tmp, 0, false, false); // Stack: X^Y X Y 7679 // X^Y not equal to itself: X^Y is NaN go to slow case. 7680 jcc(Assembler::parity, slow_case); 7681 // get rid of duplicate arguments. Stack: X^Y 7682 if (num_fpu_regs_in_use > 0) { 7683 fxch(); fpop(); 7684 fxch(); fpop(); 7685 } else { 7686 ffree(2); 7687 ffree(1); 7688 } 7689 jmp(done); 7690 7691 // X <= 0 7692 bind(x_negative); 7693 7694 fld_s(1); // Stack: Y X Y 7695 frndint(); // Stack: int(Y) X Y 7696 fcmp(tmp, 2, false, false); // Stack: int(Y) X Y 7697 jcc(Assembler::notEqual, slow_case); 7698 7699 subptr(rsp, 8); 7700 7701 // For X^Y, when X < 0, Y has to be an integer and the final 7702 // result depends on whether it's odd or even. We just checked 7703 // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit 7704 // integer to test its parity. If int(Y) is huge and doesn't fit 7705 // in the 64 bit integer range, the integer indefinite value will 7706 // end up in the gp registers. Huge numbers are all even, the 7707 // integer indefinite number is even so it's fine. 7708 7709 #ifdef ASSERT 7710 // Let's check we don't end up with an integer indefinite number 7711 // when not expected. First test for huge numbers: check whether 7712 // int(Y)+1 == int(Y) which is true for very large numbers and 7713 // those are all even. A 64 bit integer is guaranteed to not 7714 // overflow for numbers where y+1 != y (when precision is set to 7715 // double precision). 7716 Label y_not_huge; 7717 7718 fld1(); // Stack: 1 int(Y) X Y 7719 fadd(1); // Stack: 1+int(Y) int(Y) X Y 7720 7721 #ifdef _LP64 7722 // trip to memory to force the precision down from double extended 7723 // precision 7724 fstp_d(Address(rsp, 0)); 7725 fld_d(Address(rsp, 0)); 7726 #endif 7727 7728 fcmp(tmp, 1, true, false); // Stack: int(Y) X Y 7729 #endif 7730 7731 // move int(Y) as 64 bit integer to thread's stack 7732 fistp_d(Address(rsp,0)); // Stack: X Y 7733 7734 #ifdef ASSERT 7735 jcc(Assembler::notEqual, y_not_huge); 7736 7737 // Y is huge so we know it's even. It may not fit in a 64 bit 7738 // integer and we don't want the debug code below to see the 7739 // integer indefinite value so overwrite int(Y) on the thread's 7740 // stack with 0. 7741 movl(Address(rsp, 0), 0); 7742 movl(Address(rsp, 4), 0); 7743 7744 bind(y_not_huge); 7745 #endif 7746 7747 fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y 7748 fld_s(1); // Stack: X Y X Y 7749 fabs(); // Stack: abs(X) Y X Y 7750 fast_pow(); // Stack: abs(X)^Y X Y 7751 fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y 7752 // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. 7753 7754 pop(tmp2); 7755 NOT_LP64(pop(tmp3)); 7756 jcc(Assembler::parity, slow_case); 7757 7758 #ifdef ASSERT 7759 // Check that int(Y) is not integer indefinite value (int 7760 // overflow). Shouldn't happen because for values that would 7761 // overflow, 1+int(Y)==Y which was tested earlier. 7762 #ifndef _LP64 7763 { 7764 Label integer; 7765 testl(tmp2, tmp2); 7766 jcc(Assembler::notZero, integer); 7767 cmpl(tmp3, 0x80000000); 7768 jcc(Assembler::notZero, integer); 7769 STOP("integer indefinite value shouldn't be seen here"); 7770 bind(integer); 7771 } 7772 #else 7773 { 7774 Label integer; 7775 mov(tmp3, tmp2); // preserve tmp2 for parity check below 7776 shlq(tmp3, 1); 7777 jcc(Assembler::carryClear, integer); 7778 jcc(Assembler::notZero, integer); 7779 STOP("integer indefinite value shouldn't be seen here"); 7780 bind(integer); 7781 } 7782 #endif 7783 #endif 7784 7785 // get rid of duplicate arguments. Stack: X^Y 7786 if (num_fpu_regs_in_use > 0) { 7787 fxch(); fpop(); 7788 fxch(); fpop(); 7789 } else { 7790 ffree(2); 7791 ffree(1); 7792 } 7793 7794 testl(tmp2, 1); 7795 jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y 7796 // X <= 0, Y even: X^Y = -abs(X)^Y 7797 7798 fchs(); // Stack: -abs(X)^Y Y 7799 jmp(done); 7800 } 7801 7802 // slow case: runtime call 7803 bind(slow_case); 7804 7805 fpop(); // pop incorrect result or int(Y) 7806 7807 fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 7808 is_exp ? 1 : 2, num_fpu_regs_in_use); 7809 7810 // Come here with result in F-TOS 7811 bind(done); 7812 } 7813 7814 void MacroAssembler::fpop() { 7815 ffree(); 7816 fincstp(); 7817 } 7818 7819 void MacroAssembler::fremr(Register tmp) { 7820 save_rax(tmp); 7821 { Label L; 7822 bind(L); 7823 fprem(); 7824 fwait(); fnstsw_ax(); 7825 #ifdef _LP64 7826 testl(rax, 0x400); 7827 jcc(Assembler::notEqual, L); 7828 #else 7829 sahf(); 7830 jcc(Assembler::parity, L); 7831 #endif // _LP64 7832 } 7833 restore_rax(tmp); 7834 // Result is in ST0. 7835 // Note: fxch & fpop to get rid of ST1 7836 // (otherwise FPU stack could overflow eventually) 7837 fxch(1); 7838 fpop(); 7839 } 7840 7841 7842 void MacroAssembler::incrementl(AddressLiteral dst) { 7843 if (reachable(dst)) { 7844 incrementl(as_Address(dst)); 7845 } else { 7846 lea(rscratch1, dst); 7847 incrementl(Address(rscratch1, 0)); 7848 } 7849 } 7850 7851 void MacroAssembler::incrementl(ArrayAddress dst) { 7852 incrementl(as_Address(dst)); 7853 } 7854 7855 void MacroAssembler::incrementl(Register reg, int value) { 7856 if (value == min_jint) {addl(reg, value) ; return; } 7857 if (value < 0) { decrementl(reg, -value); return; } 7858 if (value == 0) { ; return; } 7859 if (value == 1 && UseIncDec) { incl(reg) ; return; } 7860 /* else */ { addl(reg, value) ; return; } 7861 } 7862 7863 void MacroAssembler::incrementl(Address dst, int value) { 7864 if (value == min_jint) {addl(dst, value) ; return; } 7865 if (value < 0) { decrementl(dst, -value); return; } 7866 if (value == 0) { ; return; } 7867 if (value == 1 && UseIncDec) { incl(dst) ; return; } 7868 /* else */ { addl(dst, value) ; return; } 7869 } 7870 7871 void MacroAssembler::jump(AddressLiteral dst) { 7872 if (reachable(dst)) { 7873 jmp_literal(dst.target(), dst.rspec()); 7874 } else { 7875 lea(rscratch1, dst); 7876 jmp(rscratch1); 7877 } 7878 } 7879 7880 void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { 7881 if (reachable(dst)) { 7882 InstructionMark im(this); 7883 relocate(dst.reloc()); 7884 const int short_size = 2; 7885 const int long_size = 6; 7886 int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos); 7887 if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { 7888 // 0111 tttn #8-bit disp 7889 emit_byte(0x70 | cc); 7890 emit_byte((offs - short_size) & 0xFF); 7891 } else { 7892 // 0000 1111 1000 tttn #32-bit disp 7893 emit_byte(0x0F); 7894 emit_byte(0x80 | cc); 7895 emit_long(offs - long_size); 7896 } 7897 } else { 7898 #ifdef ASSERT 7899 warning("reversing conditional branch"); 7900 #endif /* ASSERT */ 7901 Label skip; 7902 jccb(reverse[cc], skip); 7903 lea(rscratch1, dst); 7904 Assembler::jmp(rscratch1); 7905 bind(skip); 7906 } 7907 } 7908 7909 void MacroAssembler::ldmxcsr(AddressLiteral src) { 7910 if (reachable(src)) { 7911 Assembler::ldmxcsr(as_Address(src)); 7912 } else { 7913 lea(rscratch1, src); 7914 Assembler::ldmxcsr(Address(rscratch1, 0)); 7915 } 7916 } 7917 7918 int MacroAssembler::load_signed_byte(Register dst, Address src) { 7919 int off; 7920 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7921 off = offset(); 7922 movsbl(dst, src); // movsxb 7923 } else { 7924 off = load_unsigned_byte(dst, src); 7925 shll(dst, 24); 7926 sarl(dst, 24); 7927 } 7928 return off; 7929 } 7930 7931 // Note: load_signed_short used to be called load_signed_word. 7932 // Although the 'w' in x86 opcodes refers to the term "word" in the assembler 7933 // manual, which means 16 bits, that usage is found nowhere in HotSpot code. 7934 // The term "word" in HotSpot means a 32- or 64-bit machine word. 7935 int MacroAssembler::load_signed_short(Register dst, Address src) { 7936 int off; 7937 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 7938 // This is dubious to me since it seems safe to do a signed 16 => 64 bit 7939 // version but this is what 64bit has always done. This seems to imply 7940 // that users are only using 32bits worth. 7941 off = offset(); 7942 movswl(dst, src); // movsxw 7943 } else { 7944 off = load_unsigned_short(dst, src); 7945 shll(dst, 16); 7946 sarl(dst, 16); 7947 } 7948 return off; 7949 } 7950 7951 int MacroAssembler::load_unsigned_byte(Register dst, Address src) { 7952 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 7953 // and "3.9 Partial Register Penalties", p. 22). 7954 int off; 7955 if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { 7956 off = offset(); 7957 movzbl(dst, src); // movzxb 7958 } else { 7959 xorl(dst, dst); 7960 off = offset(); 7961 movb(dst, src); 7962 } 7963 return off; 7964 } 7965 7966 // Note: load_unsigned_short used to be called load_unsigned_word. 7967 int MacroAssembler::load_unsigned_short(Register dst, Address src) { 7968 // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, 7969 // and "3.9 Partial Register Penalties", p. 22). 7970 int off; 7971 if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { 7972 off = offset(); 7973 movzwl(dst, src); // movzxw 7974 } else { 7975 xorl(dst, dst); 7976 off = offset(); 7977 movw(dst, src); 7978 } 7979 return off; 7980 } 7981 7982 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { 7983 switch (size_in_bytes) { 7984 #ifndef _LP64 7985 case 8: 7986 assert(dst2 != noreg, "second dest register required"); 7987 movl(dst, src); 7988 movl(dst2, src.plus_disp(BytesPerInt)); 7989 break; 7990 #else 7991 case 8: movq(dst, src); break; 7992 #endif 7993 case 4: movl(dst, src); break; 7994 case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; 7995 case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; 7996 default: ShouldNotReachHere(); 7997 } 7998 } 7999 8000 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { 8001 switch (size_in_bytes) { 8002 #ifndef _LP64 8003 case 8: 8004 assert(src2 != noreg, "second source register required"); 8005 movl(dst, src); 8006 movl(dst.plus_disp(BytesPerInt), src2); 8007 break; 8008 #else 8009 case 8: movq(dst, src); break; 8010 #endif 8011 case 4: movl(dst, src); break; 8012 case 2: movw(dst, src); break; 8013 case 1: movb(dst, src); break; 8014 default: ShouldNotReachHere(); 8015 } 8016 } 8017 8018 void MacroAssembler::mov32(AddressLiteral dst, Register src) { 8019 if (reachable(dst)) { 8020 movl(as_Address(dst), src); 8021 } else { 8022 lea(rscratch1, dst); 8023 movl(Address(rscratch1, 0), src); 8024 } 8025 } 8026 8027 void MacroAssembler::mov32(Register dst, AddressLiteral src) { 8028 if (reachable(src)) { 8029 movl(dst, as_Address(src)); 8030 } else { 8031 lea(rscratch1, src); 8032 movl(dst, Address(rscratch1, 0)); 8033 } 8034 } 8035 8036 // C++ bool manipulation 8037 8038 void MacroAssembler::movbool(Register dst, Address src) { 8039 if(sizeof(bool) == 1) 8040 movb(dst, src); 8041 else if(sizeof(bool) == 2) 8042 movw(dst, src); 8043 else if(sizeof(bool) == 4) 8044 movl(dst, src); 8045 else 8046 // unsupported 8047 ShouldNotReachHere(); 8048 } 8049 8050 void MacroAssembler::movbool(Address dst, bool boolconst) { 8051 if(sizeof(bool) == 1) 8052 movb(dst, (int) boolconst); 8053 else if(sizeof(bool) == 2) 8054 movw(dst, (int) boolconst); 8055 else if(sizeof(bool) == 4) 8056 movl(dst, (int) boolconst); 8057 else 8058 // unsupported 8059 ShouldNotReachHere(); 8060 } 8061 8062 void MacroAssembler::movbool(Address dst, Register src) { 8063 if(sizeof(bool) == 1) 8064 movb(dst, src); 8065 else if(sizeof(bool) == 2) 8066 movw(dst, src); 8067 else if(sizeof(bool) == 4) 8068 movl(dst, src); 8069 else 8070 // unsupported 8071 ShouldNotReachHere(); 8072 } 8073 8074 void MacroAssembler::movbyte(ArrayAddress dst, int src) { 8075 movb(as_Address(dst), src); 8076 } 8077 8078 void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) { 8079 if (reachable(src)) { 8080 movdl(dst, as_Address(src)); 8081 } else { 8082 lea(rscratch1, src); 8083 movdl(dst, Address(rscratch1, 0)); 8084 } 8085 } 8086 8087 void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) { 8088 if (reachable(src)) { 8089 movq(dst, as_Address(src)); 8090 } else { 8091 lea(rscratch1, src); 8092 movq(dst, Address(rscratch1, 0)); 8093 } 8094 } 8095 8096 void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { 8097 if (reachable(src)) { 8098 if (UseXmmLoadAndClearUpper) { 8099 movsd (dst, as_Address(src)); 8100 } else { 8101 movlpd(dst, as_Address(src)); 8102 } 8103 } else { 8104 lea(rscratch1, src); 8105 if (UseXmmLoadAndClearUpper) { 8106 movsd (dst, Address(rscratch1, 0)); 8107 } else { 8108 movlpd(dst, Address(rscratch1, 0)); 8109 } 8110 } 8111 } 8112 8113 void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { 8114 if (reachable(src)) { 8115 movss(dst, as_Address(src)); 8116 } else { 8117 lea(rscratch1, src); 8118 movss(dst, Address(rscratch1, 0)); 8119 } 8120 } 8121 8122 void MacroAssembler::movptr(Register dst, Register src) { 8123 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 8124 } 8125 8126 void MacroAssembler::movptr(Register dst, Address src) { 8127 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 8128 } 8129 8130 // src should NEVER be a real pointer. Use AddressLiteral for true pointers 8131 void MacroAssembler::movptr(Register dst, intptr_t src) { 8132 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); 8133 } 8134 8135 void MacroAssembler::movptr(Address dst, Register src) { 8136 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); 8137 } 8138 8139 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) { 8140 if (reachable(src)) { 8141 Assembler::movdqu(dst, as_Address(src)); 8142 } else { 8143 lea(rscratch1, src); 8144 Assembler::movdqu(dst, Address(rscratch1, 0)); 8145 } 8146 } 8147 8148 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { 8149 if (reachable(src)) { 8150 Assembler::movsd(dst, as_Address(src)); 8151 } else { 8152 lea(rscratch1, src); 8153 Assembler::movsd(dst, Address(rscratch1, 0)); 8154 } 8155 } 8156 8157 void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { 8158 if (reachable(src)) { 8159 Assembler::movss(dst, as_Address(src)); 8160 } else { 8161 lea(rscratch1, src); 8162 Assembler::movss(dst, Address(rscratch1, 0)); 8163 } 8164 } 8165 8166 void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { 8167 if (reachable(src)) { 8168 Assembler::mulsd(dst, as_Address(src)); 8169 } else { 8170 lea(rscratch1, src); 8171 Assembler::mulsd(dst, Address(rscratch1, 0)); 8172 } 8173 } 8174 8175 void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { 8176 if (reachable(src)) { 8177 Assembler::mulss(dst, as_Address(src)); 8178 } else { 8179 lea(rscratch1, src); 8180 Assembler::mulss(dst, Address(rscratch1, 0)); 8181 } 8182 } 8183 8184 void MacroAssembler::null_check(Register reg, int offset) { 8185 if (needs_explicit_null_check(offset)) { 8186 // provoke OS NULL exception if reg = NULL by 8187 // accessing M[reg] w/o changing any (non-CC) registers 8188 // NOTE: cmpl is plenty here to provoke a segv 8189 cmpptr(rax, Address(reg, 0)); 8190 // Note: should probably use testl(rax, Address(reg, 0)); 8191 // may be shorter code (however, this version of 8192 // testl needs to be implemented first) 8193 } else { 8194 // nothing to do, (later) access of M[reg + offset] 8195 // will provoke OS NULL exception if reg = NULL 8196 } 8197 } 8198 8199 void MacroAssembler::os_breakpoint() { 8200 // instead of directly emitting a breakpoint, call os:breakpoint for better debugability 8201 // (e.g., MSVC can't call ps() otherwise) 8202 call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); 8203 } 8204 8205 void MacroAssembler::pop_CPU_state() { 8206 pop_FPU_state(); 8207 pop_IU_state(); 8208 } 8209 8210 void MacroAssembler::pop_FPU_state() { 8211 NOT_LP64(frstor(Address(rsp, 0));) 8212 LP64_ONLY(fxrstor(Address(rsp, 0));) 8213 addptr(rsp, FPUStateSizeInWords * wordSize); 8214 } 8215 8216 void MacroAssembler::pop_IU_state() { 8217 popa(); 8218 LP64_ONLY(addq(rsp, 8)); 8219 popf(); 8220 } 8221 8222 // Save Integer and Float state 8223 // Warning: Stack must be 16 byte aligned (64bit) 8224 void MacroAssembler::push_CPU_state() { 8225 push_IU_state(); 8226 push_FPU_state(); 8227 } 8228 8229 void MacroAssembler::push_FPU_state() { 8230 subptr(rsp, FPUStateSizeInWords * wordSize); 8231 #ifndef _LP64 8232 fnsave(Address(rsp, 0)); 8233 fwait(); 8234 #else 8235 fxsave(Address(rsp, 0)); 8236 #endif // LP64 8237 } 8238 8239 void MacroAssembler::push_IU_state() { 8240 // Push flags first because pusha kills them 8241 pushf(); 8242 // Make sure rsp stays 16-byte aligned 8243 LP64_ONLY(subq(rsp, 8)); 8244 pusha(); 8245 } 8246 8247 void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { 8248 // determine java_thread register 8249 if (!java_thread->is_valid()) { 8250 java_thread = rdi; 8251 get_thread(java_thread); 8252 } 8253 // we must set sp to zero to clear frame 8254 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); 8255 if (clear_fp) { 8256 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); 8257 } 8258 8259 if (clear_pc) 8260 movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); 8261 8262 } 8263 8264 void MacroAssembler::restore_rax(Register tmp) { 8265 if (tmp == noreg) pop(rax); 8266 else if (tmp != rax) mov(rax, tmp); 8267 } 8268 8269 void MacroAssembler::round_to(Register reg, int modulus) { 8270 addptr(reg, modulus - 1); 8271 andptr(reg, -modulus); 8272 } 8273 8274 void MacroAssembler::save_rax(Register tmp) { 8275 if (tmp == noreg) push(rax); 8276 else if (tmp != rax) mov(tmp, rax); 8277 } 8278 8279 // Write serialization page so VM thread can do a pseudo remote membar. 8280 // We use the current thread pointer to calculate a thread specific 8281 // offset to write to within the page. This minimizes bus traffic 8282 // due to cache line collision. 8283 void MacroAssembler::serialize_memory(Register thread, Register tmp) { 8284 movl(tmp, thread); 8285 shrl(tmp, os::get_serialize_page_shift_count()); 8286 andl(tmp, (os::vm_page_size() - sizeof(int))); 8287 8288 Address index(noreg, tmp, Address::times_1); 8289 ExternalAddress page(os::get_memory_serialize_page()); 8290 8291 // Size of store must match masking code above 8292 movl(as_Address(ArrayAddress(page, index)), tmp); 8293 } 8294 8295 // Calls to C land 8296 // 8297 // When entering C land, the rbp, & rsp of the last Java frame have to be recorded 8298 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp 8299 // has to be reset to 0. This is required to allow proper stack traversal. 8300 void MacroAssembler::set_last_Java_frame(Register java_thread, 8301 Register last_java_sp, 8302 Register last_java_fp, 8303 address last_java_pc) { 8304 // determine java_thread register 8305 if (!java_thread->is_valid()) { 8306 java_thread = rdi; 8307 get_thread(java_thread); 8308 } 8309 // determine last_java_sp register 8310 if (!last_java_sp->is_valid()) { 8311 last_java_sp = rsp; 8312 } 8313 8314 // last_java_fp is optional 8315 8316 if (last_java_fp->is_valid()) { 8317 movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); 8318 } 8319 8320 // last_java_pc is optional 8321 8322 if (last_java_pc != NULL) { 8323 lea(Address(java_thread, 8324 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), 8325 InternalAddress(last_java_pc)); 8326 8327 } 8328 movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); 8329 } 8330 8331 void MacroAssembler::shlptr(Register dst, int imm8) { 8332 LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); 8333 } 8334 8335 void MacroAssembler::shrptr(Register dst, int imm8) { 8336 LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); 8337 } 8338 8339 void MacroAssembler::sign_extend_byte(Register reg) { 8340 if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { 8341 movsbl(reg, reg); // movsxb 8342 } else { 8343 shll(reg, 24); 8344 sarl(reg, 24); 8345 } 8346 } 8347 8348 void MacroAssembler::sign_extend_short(Register reg) { 8349 if (LP64_ONLY(true ||) VM_Version::is_P6()) { 8350 movswl(reg, reg); // movsxw 8351 } else { 8352 shll(reg, 16); 8353 sarl(reg, 16); 8354 } 8355 } 8356 8357 void MacroAssembler::testl(Register dst, AddressLiteral src) { 8358 assert(reachable(src), "Address should be reachable"); 8359 testl(dst, as_Address(src)); 8360 } 8361 8362 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { 8363 if (reachable(src)) { 8364 Assembler::sqrtsd(dst, as_Address(src)); 8365 } else { 8366 lea(rscratch1, src); 8367 Assembler::sqrtsd(dst, Address(rscratch1, 0)); 8368 } 8369 } 8370 8371 void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { 8372 if (reachable(src)) { 8373 Assembler::sqrtss(dst, as_Address(src)); 8374 } else { 8375 lea(rscratch1, src); 8376 Assembler::sqrtss(dst, Address(rscratch1, 0)); 8377 } 8378 } 8379 8380 void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { 8381 if (reachable(src)) { 8382 Assembler::subsd(dst, as_Address(src)); 8383 } else { 8384 lea(rscratch1, src); 8385 Assembler::subsd(dst, Address(rscratch1, 0)); 8386 } 8387 } 8388 8389 void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { 8390 if (reachable(src)) { 8391 Assembler::subss(dst, as_Address(src)); 8392 } else { 8393 lea(rscratch1, src); 8394 Assembler::subss(dst, Address(rscratch1, 0)); 8395 } 8396 } 8397 8398 void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { 8399 if (reachable(src)) { 8400 Assembler::ucomisd(dst, as_Address(src)); 8401 } else { 8402 lea(rscratch1, src); 8403 Assembler::ucomisd(dst, Address(rscratch1, 0)); 8404 } 8405 } 8406 8407 void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { 8408 if (reachable(src)) { 8409 Assembler::ucomiss(dst, as_Address(src)); 8410 } else { 8411 lea(rscratch1, src); 8412 Assembler::ucomiss(dst, Address(rscratch1, 0)); 8413 } 8414 } 8415 8416 void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { 8417 // Used in sign-bit flipping with aligned address. 8418 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 8419 if (reachable(src)) { 8420 Assembler::xorpd(dst, as_Address(src)); 8421 } else { 8422 lea(rscratch1, src); 8423 Assembler::xorpd(dst, Address(rscratch1, 0)); 8424 } 8425 } 8426 8427 void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { 8428 // Used in sign-bit flipping with aligned address. 8429 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 8430 if (reachable(src)) { 8431 Assembler::xorps(dst, as_Address(src)); 8432 } else { 8433 lea(rscratch1, src); 8434 Assembler::xorps(dst, Address(rscratch1, 0)); 8435 } 8436 } 8437 8438 void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) { 8439 // Used in sign-bit flipping with aligned address. 8440 assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); 8441 if (reachable(src)) { 8442 Assembler::pshufb(dst, as_Address(src)); 8443 } else { 8444 lea(rscratch1, src); 8445 Assembler::pshufb(dst, Address(rscratch1, 0)); 8446 } 8447 } 8448 8449 // AVX 3-operands instructions 8450 8451 void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8452 if (reachable(src)) { 8453 vaddsd(dst, nds, as_Address(src)); 8454 } else { 8455 lea(rscratch1, src); 8456 vaddsd(dst, nds, Address(rscratch1, 0)); 8457 } 8458 } 8459 8460 void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8461 if (reachable(src)) { 8462 vaddss(dst, nds, as_Address(src)); 8463 } else { 8464 lea(rscratch1, src); 8465 vaddss(dst, nds, Address(rscratch1, 0)); 8466 } 8467 } 8468 8469 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8470 if (reachable(src)) { 8471 vandpd(dst, nds, as_Address(src), vector256); 8472 } else { 8473 lea(rscratch1, src); 8474 vandpd(dst, nds, Address(rscratch1, 0), vector256); 8475 } 8476 } 8477 8478 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8479 if (reachable(src)) { 8480 vandps(dst, nds, as_Address(src), vector256); 8481 } else { 8482 lea(rscratch1, src); 8483 vandps(dst, nds, Address(rscratch1, 0), vector256); 8484 } 8485 } 8486 8487 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8488 if (reachable(src)) { 8489 vdivsd(dst, nds, as_Address(src)); 8490 } else { 8491 lea(rscratch1, src); 8492 vdivsd(dst, nds, Address(rscratch1, 0)); 8493 } 8494 } 8495 8496 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8497 if (reachable(src)) { 8498 vdivss(dst, nds, as_Address(src)); 8499 } else { 8500 lea(rscratch1, src); 8501 vdivss(dst, nds, Address(rscratch1, 0)); 8502 } 8503 } 8504 8505 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8506 if (reachable(src)) { 8507 vmulsd(dst, nds, as_Address(src)); 8508 } else { 8509 lea(rscratch1, src); 8510 vmulsd(dst, nds, Address(rscratch1, 0)); 8511 } 8512 } 8513 8514 void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8515 if (reachable(src)) { 8516 vmulss(dst, nds, as_Address(src)); 8517 } else { 8518 lea(rscratch1, src); 8519 vmulss(dst, nds, Address(rscratch1, 0)); 8520 } 8521 } 8522 8523 void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8524 if (reachable(src)) { 8525 vsubsd(dst, nds, as_Address(src)); 8526 } else { 8527 lea(rscratch1, src); 8528 vsubsd(dst, nds, Address(rscratch1, 0)); 8529 } 8530 } 8531 8532 void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { 8533 if (reachable(src)) { 8534 vsubss(dst, nds, as_Address(src)); 8535 } else { 8536 lea(rscratch1, src); 8537 vsubss(dst, nds, Address(rscratch1, 0)); 8538 } 8539 } 8540 8541 void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8542 if (reachable(src)) { 8543 vxorpd(dst, nds, as_Address(src), vector256); 8544 } else { 8545 lea(rscratch1, src); 8546 vxorpd(dst, nds, Address(rscratch1, 0), vector256); 8547 } 8548 } 8549 8550 void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { 8551 if (reachable(src)) { 8552 vxorps(dst, nds, as_Address(src), vector256); 8553 } else { 8554 lea(rscratch1, src); 8555 vxorps(dst, nds, Address(rscratch1, 0), vector256); 8556 } 8557 } 8558 8559 8560 ////////////////////////////////////////////////////////////////////////////////// 8561 #ifndef SERIALGC 8562 8563 void MacroAssembler::g1_write_barrier_pre(Register obj, 8564 Register pre_val, 8565 Register thread, 8566 Register tmp, 8567 bool tosca_live, 8568 bool expand_call) { 8569 8570 // If expand_call is true then we expand the call_VM_leaf macro 8571 // directly to skip generating the check by 8572 // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. 8573 8574 #ifdef _LP64 8575 assert(thread == r15_thread, "must be"); 8576 #endif // _LP64 8577 8578 Label done; 8579 Label runtime; 8580 8581 assert(pre_val != noreg, "check this code"); 8582 8583 if (obj != noreg) { 8584 assert_different_registers(obj, pre_val, tmp); 8585 assert(pre_val != rax, "check this code"); 8586 } 8587 8588 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 8589 PtrQueue::byte_offset_of_active())); 8590 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 8591 PtrQueue::byte_offset_of_index())); 8592 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + 8593 PtrQueue::byte_offset_of_buf())); 8594 8595 8596 // Is marking active? 8597 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { 8598 cmpl(in_progress, 0); 8599 } else { 8600 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); 8601 cmpb(in_progress, 0); 8602 } 8603 jcc(Assembler::equal, done); 8604 8605 // Do we need to load the previous value? 8606 if (obj != noreg) { 8607 load_heap_oop(pre_val, Address(obj, 0)); 8608 } 8609 8610 // Is the previous value null? 8611 cmpptr(pre_val, (int32_t) NULL_WORD); 8612 jcc(Assembler::equal, done); 8613 8614 // Can we store original value in the thread's buffer? 8615 // Is index == 0? 8616 // (The index field is typed as size_t.) 8617 8618 movptr(tmp, index); // tmp := *index_adr 8619 cmpptr(tmp, 0); // tmp == 0? 8620 jcc(Assembler::equal, runtime); // If yes, goto runtime 8621 8622 subptr(tmp, wordSize); // tmp := tmp - wordSize 8623 movptr(index, tmp); // *index_adr := tmp 8624 addptr(tmp, buffer); // tmp := tmp + *buffer_adr 8625 8626 // Record the previous value 8627 movptr(Address(tmp, 0), pre_val); 8628 jmp(done); 8629 8630 bind(runtime); 8631 // save the live input values 8632 if(tosca_live) push(rax); 8633 8634 if (obj != noreg && obj != rax) 8635 push(obj); 8636 8637 if (pre_val != rax) 8638 push(pre_val); 8639 8640 // Calling the runtime using the regular call_VM_leaf mechanism generates 8641 // code (generated by InterpreterMacroAssember::call_VM_leaf_base) 8642 // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. 8643 // 8644 // If we care generating the pre-barrier without a frame (e.g. in the 8645 // intrinsified Reference.get() routine) then ebp might be pointing to 8646 // the caller frame and so this check will most likely fail at runtime. 8647 // 8648 // Expanding the call directly bypasses the generation of the check. 8649 // So when we do not have have a full interpreter frame on the stack 8650 // expand_call should be passed true. 8651 8652 NOT_LP64( push(thread); ) 8653 8654 if (expand_call) { 8655 LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) 8656 pass_arg1(this, thread); 8657 pass_arg0(this, pre_val); 8658 MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); 8659 } else { 8660 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); 8661 } 8662 8663 NOT_LP64( pop(thread); ) 8664 8665 // save the live input values 8666 if (pre_val != rax) 8667 pop(pre_val); 8668 8669 if (obj != noreg && obj != rax) 8670 pop(obj); 8671 8672 if(tosca_live) pop(rax); 8673 8674 bind(done); 8675 } 8676 8677 void MacroAssembler::g1_write_barrier_post(Register store_addr, 8678 Register new_val, 8679 Register thread, 8680 Register tmp, 8681 Register tmp2) { 8682 #ifdef _LP64 8683 assert(thread == r15_thread, "must be"); 8684 #endif // _LP64 8685 8686 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 8687 PtrQueue::byte_offset_of_index())); 8688 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + 8689 PtrQueue::byte_offset_of_buf())); 8690 8691 BarrierSet* bs = Universe::heap()->barrier_set(); 8692 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 8693 Label done; 8694 Label runtime; 8695 8696 // Does store cross heap regions? 8697 8698 movptr(tmp, store_addr); 8699 xorptr(tmp, new_val); 8700 shrptr(tmp, HeapRegion::LogOfHRGrainBytes); 8701 jcc(Assembler::equal, done); 8702 8703 // crosses regions, storing NULL? 8704 8705 cmpptr(new_val, (int32_t) NULL_WORD); 8706 jcc(Assembler::equal, done); 8707 8708 // storing region crossing non-NULL, is card already dirty? 8709 8710 ExternalAddress cardtable((address) ct->byte_map_base); 8711 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 8712 #ifdef _LP64 8713 const Register card_addr = tmp; 8714 8715 movq(card_addr, store_addr); 8716 shrq(card_addr, CardTableModRefBS::card_shift); 8717 8718 lea(tmp2, cardtable); 8719 8720 // get the address of the card 8721 addq(card_addr, tmp2); 8722 #else 8723 const Register card_index = tmp; 8724 8725 movl(card_index, store_addr); 8726 shrl(card_index, CardTableModRefBS::card_shift); 8727 8728 Address index(noreg, card_index, Address::times_1); 8729 const Register card_addr = tmp; 8730 lea(card_addr, as_Address(ArrayAddress(cardtable, index))); 8731 #endif 8732 cmpb(Address(card_addr, 0), 0); 8733 jcc(Assembler::equal, done); 8734 8735 // storing a region crossing, non-NULL oop, card is clean. 8736 // dirty card and log. 8737 8738 movb(Address(card_addr, 0), 0); 8739 8740 cmpl(queue_index, 0); 8741 jcc(Assembler::equal, runtime); 8742 subl(queue_index, wordSize); 8743 movptr(tmp2, buffer); 8744 #ifdef _LP64 8745 movslq(rscratch1, queue_index); 8746 addq(tmp2, rscratch1); 8747 movq(Address(tmp2, 0), card_addr); 8748 #else 8749 addl(tmp2, queue_index); 8750 movl(Address(tmp2, 0), card_index); 8751 #endif 8752 jmp(done); 8753 8754 bind(runtime); 8755 // save the live input values 8756 push(store_addr); 8757 push(new_val); 8758 #ifdef _LP64 8759 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); 8760 #else 8761 push(thread); 8762 call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); 8763 pop(thread); 8764 #endif 8765 pop(new_val); 8766 pop(store_addr); 8767 8768 bind(done); 8769 } 8770 8771 #endif // SERIALGC 8772 ////////////////////////////////////////////////////////////////////////////////// 8773 8774 8775 void MacroAssembler::store_check(Register obj) { 8776 // Does a store check for the oop in register obj. The content of 8777 // register obj is destroyed afterwards. 8778 store_check_part_1(obj); 8779 store_check_part_2(obj); 8780 } 8781 8782 void MacroAssembler::store_check(Register obj, Address dst) { 8783 store_check(obj); 8784 } 8785 8786 8787 // split the store check operation so that other instructions can be scheduled inbetween 8788 void MacroAssembler::store_check_part_1(Register obj) { 8789 BarrierSet* bs = Universe::heap()->barrier_set(); 8790 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 8791 shrptr(obj, CardTableModRefBS::card_shift); 8792 } 8793 8794 void MacroAssembler::store_check_part_2(Register obj) { 8795 BarrierSet* bs = Universe::heap()->barrier_set(); 8796 assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); 8797 CardTableModRefBS* ct = (CardTableModRefBS*)bs; 8798 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); 8799 8800 // The calculation for byte_map_base is as follows: 8801 // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); 8802 // So this essentially converts an address to a displacement and 8803 // it will never need to be relocated. On 64bit however the value may be too 8804 // large for a 32bit displacement 8805 8806 intptr_t disp = (intptr_t) ct->byte_map_base; 8807 if (is_simm32(disp)) { 8808 Address cardtable(noreg, obj, Address::times_1, disp); 8809 movb(cardtable, 0); 8810 } else { 8811 // By doing it as an ExternalAddress disp could be converted to a rip-relative 8812 // displacement and done in a single instruction given favorable mapping and 8813 // a smarter version of as_Address. Worst case it is two instructions which 8814 // is no worse off then loading disp into a register and doing as a simple 8815 // Address() as above. 8816 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 8817 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 8818 // in some cases we'll get a single instruction version. 8819 8820 ExternalAddress cardtable((address)disp); 8821 Address index(noreg, obj, Address::times_1); 8822 movb(as_Address(ArrayAddress(cardtable, index)), 0); 8823 } 8824 } 8825 8826 void MacroAssembler::subptr(Register dst, int32_t imm32) { 8827 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 8828 } 8829 8830 // Force generation of a 4 byte immediate value even if it fits into 8bit 8831 void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { 8832 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); 8833 } 8834 8835 void MacroAssembler::subptr(Register dst, Register src) { 8836 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 8837 } 8838 8839 // C++ bool manipulation 8840 void MacroAssembler::testbool(Register dst) { 8841 if(sizeof(bool) == 1) 8842 testb(dst, 0xff); 8843 else if(sizeof(bool) == 2) { 8844 // testw implementation needed for two byte bools 8845 ShouldNotReachHere(); 8846 } else if(sizeof(bool) == 4) 8847 testl(dst, dst); 8848 else 8849 // unsupported 8850 ShouldNotReachHere(); 8851 } 8852 8853 void MacroAssembler::testptr(Register dst, Register src) { 8854 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 8855 } 8856 8857 // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. 8858 void MacroAssembler::tlab_allocate(Register obj, 8859 Register var_size_in_bytes, 8860 int con_size_in_bytes, 8861 Register t1, 8862 Register t2, 8863 Label& slow_case) { 8864 assert_different_registers(obj, t1, t2); 8865 assert_different_registers(obj, var_size_in_bytes, t1); 8866 Register end = t2; 8867 Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); 8868 8869 verify_tlab(); 8870 8871 NOT_LP64(get_thread(thread)); 8872 8873 movptr(obj, Address(thread, JavaThread::tlab_top_offset())); 8874 if (var_size_in_bytes == noreg) { 8875 lea(end, Address(obj, con_size_in_bytes)); 8876 } else { 8877 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 8878 } 8879 cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); 8880 jcc(Assembler::above, slow_case); 8881 8882 // update the tlab top pointer 8883 movptr(Address(thread, JavaThread::tlab_top_offset()), end); 8884 8885 // recover var_size_in_bytes if necessary 8886 if (var_size_in_bytes == end) { 8887 subptr(var_size_in_bytes, obj); 8888 } 8889 verify_tlab(); 8890 } 8891 8892 // Preserves rbx, and rdx. 8893 Register MacroAssembler::tlab_refill(Label& retry, 8894 Label& try_eden, 8895 Label& slow_case) { 8896 Register top = rax; 8897 Register t1 = rcx; 8898 Register t2 = rsi; 8899 Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); 8900 assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); 8901 Label do_refill, discard_tlab; 8902 8903 if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { 8904 // No allocation in the shared eden. 8905 jmp(slow_case); 8906 } 8907 8908 NOT_LP64(get_thread(thread_reg)); 8909 8910 movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 8911 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 8912 8913 // calculate amount of free space 8914 subptr(t1, top); 8915 shrptr(t1, LogHeapWordSize); 8916 8917 // Retain tlab and allocate object in shared space if 8918 // the amount free in the tlab is too large to discard. 8919 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); 8920 jcc(Assembler::lessEqual, discard_tlab); 8921 8922 // Retain 8923 // %%% yuck as movptr... 8924 movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); 8925 addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); 8926 if (TLABStats) { 8927 // increment number of slow_allocations 8928 addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); 8929 } 8930 jmp(try_eden); 8931 8932 bind(discard_tlab); 8933 if (TLABStats) { 8934 // increment number of refills 8935 addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); 8936 // accumulate wastage -- t1 is amount free in tlab 8937 addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); 8938 } 8939 8940 // if tlab is currently allocated (top or end != null) then 8941 // fill [top, end + alignment_reserve) with array object 8942 testptr(top, top); 8943 jcc(Assembler::zero, do_refill); 8944 8945 // set up the mark word 8946 movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); 8947 // set the length to the remaining space 8948 subptr(t1, typeArrayOopDesc::header_size(T_INT)); 8949 addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); 8950 shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); 8951 movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); 8952 // set klass to intArrayKlass 8953 // dubious reloc why not an oop reloc? 8954 movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); 8955 // store klass last. concurrent gcs assumes klass length is valid if 8956 // klass field is not null. 8957 store_klass(top, t1); 8958 8959 movptr(t1, top); 8960 subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 8961 incr_allocated_bytes(thread_reg, t1, 0); 8962 8963 // refill the tlab with an eden allocation 8964 bind(do_refill); 8965 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 8966 shlptr(t1, LogHeapWordSize); 8967 // allocate new tlab, address returned in top 8968 eden_allocate(top, t1, 0, t2, slow_case); 8969 8970 // Check that t1 was preserved in eden_allocate. 8971 #ifdef ASSERT 8972 if (UseTLAB) { 8973 Label ok; 8974 Register tsize = rsi; 8975 assert_different_registers(tsize, thread_reg, t1); 8976 push(tsize); 8977 movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); 8978 shlptr(tsize, LogHeapWordSize); 8979 cmpptr(t1, tsize); 8980 jcc(Assembler::equal, ok); 8981 STOP("assert(t1 != tlab size)"); 8982 should_not_reach_here(); 8983 8984 bind(ok); 8985 pop(tsize); 8986 } 8987 #endif 8988 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); 8989 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); 8990 addptr(top, t1); 8991 subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); 8992 movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); 8993 verify_tlab(); 8994 jmp(retry); 8995 8996 return thread_reg; // for use by caller 8997 } 8998 8999 void MacroAssembler::incr_allocated_bytes(Register thread, 9000 Register var_size_in_bytes, 9001 int con_size_in_bytes, 9002 Register t1) { 9003 if (!thread->is_valid()) { 9004 #ifdef _LP64 9005 thread = r15_thread; 9006 #else 9007 assert(t1->is_valid(), "need temp reg"); 9008 thread = t1; 9009 get_thread(thread); 9010 #endif 9011 } 9012 9013 #ifdef _LP64 9014 if (var_size_in_bytes->is_valid()) { 9015 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 9016 } else { 9017 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 9018 } 9019 #else 9020 if (var_size_in_bytes->is_valid()) { 9021 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); 9022 } else { 9023 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); 9024 } 9025 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); 9026 #endif 9027 } 9028 9029 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { 9030 pusha(); 9031 9032 // if we are coming from c1, xmm registers may be live 9033 int off = 0; 9034 if (UseSSE == 1) { 9035 subptr(rsp, sizeof(jdouble)*8); 9036 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); 9037 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); 9038 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); 9039 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); 9040 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); 9041 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); 9042 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); 9043 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); 9044 } else if (UseSSE >= 2) { 9045 #ifdef COMPILER2 9046 if (MaxVectorSize > 16) { 9047 assert(UseAVX > 0, "256bit vectors are supported only with AVX"); 9048 // Save upper half of YMM registes 9049 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 9050 vextractf128h(Address(rsp, 0),xmm0); 9051 vextractf128h(Address(rsp, 16),xmm1); 9052 vextractf128h(Address(rsp, 32),xmm2); 9053 vextractf128h(Address(rsp, 48),xmm3); 9054 vextractf128h(Address(rsp, 64),xmm4); 9055 vextractf128h(Address(rsp, 80),xmm5); 9056 vextractf128h(Address(rsp, 96),xmm6); 9057 vextractf128h(Address(rsp,112),xmm7); 9058 #ifdef _LP64 9059 vextractf128h(Address(rsp,128),xmm8); 9060 vextractf128h(Address(rsp,144),xmm9); 9061 vextractf128h(Address(rsp,160),xmm10); 9062 vextractf128h(Address(rsp,176),xmm11); 9063 vextractf128h(Address(rsp,192),xmm12); 9064 vextractf128h(Address(rsp,208),xmm13); 9065 vextractf128h(Address(rsp,224),xmm14); 9066 vextractf128h(Address(rsp,240),xmm15); 9067 #endif 9068 } 9069 #endif 9070 // Save whole 128bit (16 bytes) XMM regiters 9071 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 9072 movdqu(Address(rsp,off++*16),xmm0); 9073 movdqu(Address(rsp,off++*16),xmm1); 9074 movdqu(Address(rsp,off++*16),xmm2); 9075 movdqu(Address(rsp,off++*16),xmm3); 9076 movdqu(Address(rsp,off++*16),xmm4); 9077 movdqu(Address(rsp,off++*16),xmm5); 9078 movdqu(Address(rsp,off++*16),xmm6); 9079 movdqu(Address(rsp,off++*16),xmm7); 9080 #ifdef _LP64 9081 movdqu(Address(rsp,off++*16),xmm8); 9082 movdqu(Address(rsp,off++*16),xmm9); 9083 movdqu(Address(rsp,off++*16),xmm10); 9084 movdqu(Address(rsp,off++*16),xmm11); 9085 movdqu(Address(rsp,off++*16),xmm12); 9086 movdqu(Address(rsp,off++*16),xmm13); 9087 movdqu(Address(rsp,off++*16),xmm14); 9088 movdqu(Address(rsp,off++*16),xmm15); 9089 #endif 9090 } 9091 9092 // Preserve registers across runtime call 9093 int incoming_argument_and_return_value_offset = -1; 9094 if (num_fpu_regs_in_use > 1) { 9095 // Must preserve all other FPU regs (could alternatively convert 9096 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash 9097 // FPU state, but can not trust C compiler) 9098 NEEDS_CLEANUP; 9099 // NOTE that in this case we also push the incoming argument(s) to 9100 // the stack and restore it later; we also use this stack slot to 9101 // hold the return value from dsin, dcos etc. 9102 for (int i = 0; i < num_fpu_regs_in_use; i++) { 9103 subptr(rsp, sizeof(jdouble)); 9104 fstp_d(Address(rsp, 0)); 9105 } 9106 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); 9107 for (int i = nb_args-1; i >= 0; i--) { 9108 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); 9109 } 9110 } 9111 9112 subptr(rsp, nb_args*sizeof(jdouble)); 9113 for (int i = 0; i < nb_args; i++) { 9114 fstp_d(Address(rsp, i*sizeof(jdouble))); 9115 } 9116 9117 #ifdef _LP64 9118 if (nb_args > 0) { 9119 movdbl(xmm0, Address(rsp, 0)); 9120 } 9121 if (nb_args > 1) { 9122 movdbl(xmm1, Address(rsp, sizeof(jdouble))); 9123 } 9124 assert(nb_args <= 2, "unsupported number of args"); 9125 #endif // _LP64 9126 9127 // NOTE: we must not use call_VM_leaf here because that requires a 9128 // complete interpreter frame in debug mode -- same bug as 4387334 9129 // MacroAssembler::call_VM_leaf_base is perfectly safe and will 9130 // do proper 64bit abi 9131 9132 NEEDS_CLEANUP; 9133 // Need to add stack banging before this runtime call if it needs to 9134 // be taken; however, there is no generic stack banging routine at 9135 // the MacroAssembler level 9136 9137 MacroAssembler::call_VM_leaf_base(runtime_entry, 0); 9138 9139 #ifdef _LP64 9140 movsd(Address(rsp, 0), xmm0); 9141 fld_d(Address(rsp, 0)); 9142 #endif // _LP64 9143 addptr(rsp, sizeof(jdouble) * nb_args); 9144 if (num_fpu_regs_in_use > 1) { 9145 // Must save return value to stack and then restore entire FPU 9146 // stack except incoming arguments 9147 fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); 9148 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { 9149 fld_d(Address(rsp, 0)); 9150 addptr(rsp, sizeof(jdouble)); 9151 } 9152 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); 9153 addptr(rsp, sizeof(jdouble) * nb_args); 9154 } 9155 9156 off = 0; 9157 if (UseSSE == 1) { 9158 movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); 9159 movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); 9160 movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); 9161 movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); 9162 movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); 9163 movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); 9164 movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); 9165 movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); 9166 addptr(rsp, sizeof(jdouble)*8); 9167 } else if (UseSSE >= 2) { 9168 // Restore whole 128bit (16 bytes) XMM regiters 9169 movdqu(xmm0, Address(rsp,off++*16)); 9170 movdqu(xmm1, Address(rsp,off++*16)); 9171 movdqu(xmm2, Address(rsp,off++*16)); 9172 movdqu(xmm3, Address(rsp,off++*16)); 9173 movdqu(xmm4, Address(rsp,off++*16)); 9174 movdqu(xmm5, Address(rsp,off++*16)); 9175 movdqu(xmm6, Address(rsp,off++*16)); 9176 movdqu(xmm7, Address(rsp,off++*16)); 9177 #ifdef _LP64 9178 movdqu(xmm8, Address(rsp,off++*16)); 9179 movdqu(xmm9, Address(rsp,off++*16)); 9180 movdqu(xmm10, Address(rsp,off++*16)); 9181 movdqu(xmm11, Address(rsp,off++*16)); 9182 movdqu(xmm12, Address(rsp,off++*16)); 9183 movdqu(xmm13, Address(rsp,off++*16)); 9184 movdqu(xmm14, Address(rsp,off++*16)); 9185 movdqu(xmm15, Address(rsp,off++*16)); 9186 #endif 9187 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 9188 #ifdef COMPILER2 9189 if (MaxVectorSize > 16) { 9190 // Restore upper half of YMM registes. 9191 vinsertf128h(xmm0, Address(rsp, 0)); 9192 vinsertf128h(xmm1, Address(rsp, 16)); 9193 vinsertf128h(xmm2, Address(rsp, 32)); 9194 vinsertf128h(xmm3, Address(rsp, 48)); 9195 vinsertf128h(xmm4, Address(rsp, 64)); 9196 vinsertf128h(xmm5, Address(rsp, 80)); 9197 vinsertf128h(xmm6, Address(rsp, 96)); 9198 vinsertf128h(xmm7, Address(rsp,112)); 9199 #ifdef _LP64 9200 vinsertf128h(xmm8, Address(rsp,128)); 9201 vinsertf128h(xmm9, Address(rsp,144)); 9202 vinsertf128h(xmm10, Address(rsp,160)); 9203 vinsertf128h(xmm11, Address(rsp,176)); 9204 vinsertf128h(xmm12, Address(rsp,192)); 9205 vinsertf128h(xmm13, Address(rsp,208)); 9206 vinsertf128h(xmm14, Address(rsp,224)); 9207 vinsertf128h(xmm15, Address(rsp,240)); 9208 #endif 9209 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); 9210 } 9211 #endif 9212 } 9213 popa(); 9214 } 9215 9216 static const double pi_4 = 0.7853981633974483; 9217 9218 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { 9219 // A hand-coded argument reduction for values in fabs(pi/4, pi/2) 9220 // was attempted in this code; unfortunately it appears that the 9221 // switch to 80-bit precision and back causes this to be 9222 // unprofitable compared with simply performing a runtime call if 9223 // the argument is out of the (-pi/4, pi/4) range. 9224 9225 Register tmp = noreg; 9226 if (!VM_Version::supports_cmov()) { 9227 // fcmp needs a temporary so preserve rbx, 9228 tmp = rbx; 9229 push(tmp); 9230 } 9231 9232 Label slow_case, done; 9233 9234 ExternalAddress pi4_adr = (address)&pi_4; 9235 if (reachable(pi4_adr)) { 9236 // x ?<= pi/4 9237 fld_d(pi4_adr); 9238 fld_s(1); // Stack: X PI/4 X 9239 fabs(); // Stack: |X| PI/4 X 9240 fcmp(tmp); 9241 jcc(Assembler::above, slow_case); 9242 9243 // fastest case: -pi/4 <= x <= pi/4 9244 switch(trig) { 9245 case 's': 9246 fsin(); 9247 break; 9248 case 'c': 9249 fcos(); 9250 break; 9251 case 't': 9252 ftan(); 9253 break; 9254 default: 9255 assert(false, "bad intrinsic"); 9256 break; 9257 } 9258 jmp(done); 9259 } 9260 9261 // slow case: runtime call 9262 bind(slow_case); 9263 9264 switch(trig) { 9265 case 's': 9266 { 9267 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); 9268 } 9269 break; 9270 case 'c': 9271 { 9272 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); 9273 } 9274 break; 9275 case 't': 9276 { 9277 fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); 9278 } 9279 break; 9280 default: 9281 assert(false, "bad intrinsic"); 9282 break; 9283 } 9284 9285 // Come here with result in F-TOS 9286 bind(done); 9287 9288 if (tmp != noreg) { 9289 pop(tmp); 9290 } 9291 } 9292 9293 9294 // Look up the method for a megamorphic invokeinterface call. 9295 // The target method is determined by <intf_klass, itable_index>. 9296 // The receiver klass is in recv_klass. 9297 // On success, the result will be in method_result, and execution falls through. 9298 // On failure, execution transfers to the given label. 9299 void MacroAssembler::lookup_interface_method(Register recv_klass, 9300 Register intf_klass, 9301 RegisterOrConstant itable_index, 9302 Register method_result, 9303 Register scan_temp, 9304 Label& L_no_such_interface) { 9305 assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); 9306 assert(itable_index.is_constant() || itable_index.as_register() == method_result, 9307 "caller must use same register for non-constant itable index as for method"); 9308 9309 // Compute start of first itableOffsetEntry (which is at the end of the vtable) 9310 int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; 9311 int itentry_off = itableMethodEntry::method_offset_in_bytes(); 9312 int scan_step = itableOffsetEntry::size() * wordSize; 9313 int vte_size = vtableEntry::size() * wordSize; 9314 Address::ScaleFactor times_vte_scale = Address::times_ptr; 9315 assert(vte_size == wordSize, "else adjust times_vte_scale"); 9316 9317 movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); 9318 9319 // %%% Could store the aligned, prescaled offset in the klassoop. 9320 lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); 9321 if (HeapWordsPerLong > 1) { 9322 // Round up to align_object_offset boundary 9323 // see code for InstanceKlass::start_of_itable! 9324 round_to(scan_temp, BytesPerLong); 9325 } 9326 9327 // Adjust recv_klass by scaled itable_index, so we can free itable_index. 9328 assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); 9329 lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); 9330 9331 // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { 9332 // if (scan->interface() == intf) { 9333 // result = (klass + scan->offset() + itable_index); 9334 // } 9335 // } 9336 Label search, found_method; 9337 9338 for (int peel = 1; peel >= 0; peel--) { 9339 movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); 9340 cmpptr(intf_klass, method_result); 9341 9342 if (peel) { 9343 jccb(Assembler::equal, found_method); 9344 } else { 9345 jccb(Assembler::notEqual, search); 9346 // (invert the test to fall through to found_method...) 9347 } 9348 9349 if (!peel) break; 9350 9351 bind(search); 9352 9353 // Check that the previous entry is non-null. A null entry means that 9354 // the receiver class doesn't implement the interface, and wasn't the 9355 // same as when the caller was compiled. 9356 testptr(method_result, method_result); 9357 jcc(Assembler::zero, L_no_such_interface); 9358 addptr(scan_temp, scan_step); 9359 } 9360 9361 bind(found_method); 9362 9363 // Got a hit. 9364 movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); 9365 movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); 9366 } 9367 9368 9369 // virtual method calling 9370 void MacroAssembler::lookup_virtual_method(Register recv_klass, 9371 RegisterOrConstant vtable_index, 9372 Register method_result) { 9373 const int base = InstanceKlass::vtable_start_offset() * wordSize; 9374 assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); 9375 Address vtable_entry_addr(recv_klass, 9376 vtable_index, Address::times_ptr, 9377 base + vtableEntry::method_offset_in_bytes()); 9378 movptr(method_result, vtable_entry_addr); 9379 } 9380 9381 9382 void MacroAssembler::check_klass_subtype(Register sub_klass, 9383 Register super_klass, 9384 Register temp_reg, 9385 Label& L_success) { 9386 Label L_failure; 9387 check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); 9388 check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); 9389 bind(L_failure); 9390 } 9391 9392 9393 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, 9394 Register super_klass, 9395 Register temp_reg, 9396 Label* L_success, 9397 Label* L_failure, 9398 Label* L_slow_path, 9399 RegisterOrConstant super_check_offset) { 9400 assert_different_registers(sub_klass, super_klass, temp_reg); 9401 bool must_load_sco = (super_check_offset.constant_or_zero() == -1); 9402 if (super_check_offset.is_register()) { 9403 assert_different_registers(sub_klass, super_klass, 9404 super_check_offset.as_register()); 9405 } else if (must_load_sco) { 9406 assert(temp_reg != noreg, "supply either a temp or a register offset"); 9407 } 9408 9409 Label L_fallthrough; 9410 int label_nulls = 0; 9411 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 9412 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 9413 if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } 9414 assert(label_nulls <= 1, "at most one NULL in the batch"); 9415 9416 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 9417 int sco_offset = in_bytes(Klass::super_check_offset_offset()); 9418 Address super_check_offset_addr(super_klass, sco_offset); 9419 9420 // Hacked jcc, which "knows" that L_fallthrough, at least, is in 9421 // range of a jccb. If this routine grows larger, reconsider at 9422 // least some of these. 9423 #define local_jcc(assembler_cond, label) \ 9424 if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ 9425 else jcc( assembler_cond, label) /*omit semi*/ 9426 9427 // Hacked jmp, which may only be used just before L_fallthrough. 9428 #define final_jmp(label) \ 9429 if (&(label) == &L_fallthrough) { /*do nothing*/ } \ 9430 else jmp(label) /*omit semi*/ 9431 9432 // If the pointers are equal, we are done (e.g., String[] elements). 9433 // This self-check enables sharing of secondary supertype arrays among 9434 // non-primary types such as array-of-interface. Otherwise, each such 9435 // type would need its own customized SSA. 9436 // We move this check to the front of the fast path because many 9437 // type checks are in fact trivially successful in this manner, 9438 // so we get a nicely predicted branch right at the start of the check. 9439 cmpptr(sub_klass, super_klass); 9440 local_jcc(Assembler::equal, *L_success); 9441 9442 // Check the supertype display: 9443 if (must_load_sco) { 9444 // Positive movl does right thing on LP64. 9445 movl(temp_reg, super_check_offset_addr); 9446 super_check_offset = RegisterOrConstant(temp_reg); 9447 } 9448 Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); 9449 cmpptr(super_klass, super_check_addr); // load displayed supertype 9450 9451 // This check has worked decisively for primary supers. 9452 // Secondary supers are sought in the super_cache ('super_cache_addr'). 9453 // (Secondary supers are interfaces and very deeply nested subtypes.) 9454 // This works in the same check above because of a tricky aliasing 9455 // between the super_cache and the primary super display elements. 9456 // (The 'super_check_addr' can address either, as the case requires.) 9457 // Note that the cache is updated below if it does not help us find 9458 // what we need immediately. 9459 // So if it was a primary super, we can just fail immediately. 9460 // Otherwise, it's the slow path for us (no success at this point). 9461 9462 if (super_check_offset.is_register()) { 9463 local_jcc(Assembler::equal, *L_success); 9464 cmpl(super_check_offset.as_register(), sc_offset); 9465 if (L_failure == &L_fallthrough) { 9466 local_jcc(Assembler::equal, *L_slow_path); 9467 } else { 9468 local_jcc(Assembler::notEqual, *L_failure); 9469 final_jmp(*L_slow_path); 9470 } 9471 } else if (super_check_offset.as_constant() == sc_offset) { 9472 // Need a slow path; fast failure is impossible. 9473 if (L_slow_path == &L_fallthrough) { 9474 local_jcc(Assembler::equal, *L_success); 9475 } else { 9476 local_jcc(Assembler::notEqual, *L_slow_path); 9477 final_jmp(*L_success); 9478 } 9479 } else { 9480 // No slow path; it's a fast decision. 9481 if (L_failure == &L_fallthrough) { 9482 local_jcc(Assembler::equal, *L_success); 9483 } else { 9484 local_jcc(Assembler::notEqual, *L_failure); 9485 final_jmp(*L_success); 9486 } 9487 } 9488 9489 bind(L_fallthrough); 9490 9491 #undef local_jcc 9492 #undef final_jmp 9493 } 9494 9495 9496 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, 9497 Register super_klass, 9498 Register temp_reg, 9499 Register temp2_reg, 9500 Label* L_success, 9501 Label* L_failure, 9502 bool set_cond_codes) { 9503 assert_different_registers(sub_klass, super_klass, temp_reg); 9504 if (temp2_reg != noreg) 9505 assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); 9506 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) 9507 9508 Label L_fallthrough; 9509 int label_nulls = 0; 9510 if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } 9511 if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } 9512 assert(label_nulls <= 1, "at most one NULL in the batch"); 9513 9514 // a couple of useful fields in sub_klass: 9515 int ss_offset = in_bytes(Klass::secondary_supers_offset()); 9516 int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); 9517 Address secondary_supers_addr(sub_klass, ss_offset); 9518 Address super_cache_addr( sub_klass, sc_offset); 9519 9520 // Do a linear scan of the secondary super-klass chain. 9521 // This code is rarely used, so simplicity is a virtue here. 9522 // The repne_scan instruction uses fixed registers, which we must spill. 9523 // Don't worry too much about pre-existing connections with the input regs. 9524 9525 assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) 9526 assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) 9527 9528 // Get super_klass value into rax (even if it was in rdi or rcx). 9529 bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; 9530 if (super_klass != rax || UseCompressedOops) { 9531 if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } 9532 mov(rax, super_klass); 9533 } 9534 if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } 9535 if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } 9536 9537 #ifndef PRODUCT 9538 int* pst_counter = &SharedRuntime::_partial_subtype_ctr; 9539 ExternalAddress pst_counter_addr((address) pst_counter); 9540 NOT_LP64( incrementl(pst_counter_addr) ); 9541 LP64_ONLY( lea(rcx, pst_counter_addr) ); 9542 LP64_ONLY( incrementl(Address(rcx, 0)) ); 9543 #endif //PRODUCT 9544 9545 // We will consult the secondary-super array. 9546 movptr(rdi, secondary_supers_addr); 9547 // Load the array length. (Positive movl does right thing on LP64.) 9548 movl(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes())); 9549 // Skip to start of data. 9550 addptr(rdi, Array<Klass*>::base_offset_in_bytes()); 9551 9552 // Scan RCX words at [RDI] for an occurrence of RAX. 9553 // Set NZ/Z based on last compare. 9554 // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does 9555 // not change flags (only scas instruction which is repeated sets flags). 9556 // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. 9557 9558 testptr(rax,rax); // Set Z = 0 9559 repne_scan(); 9560 9561 // Unspill the temp. registers: 9562 if (pushed_rdi) pop(rdi); 9563 if (pushed_rcx) pop(rcx); 9564 if (pushed_rax) pop(rax); 9565 9566 if (set_cond_codes) { 9567 // Special hack for the AD files: rdi is guaranteed non-zero. 9568 assert(!pushed_rdi, "rdi must be left non-NULL"); 9569 // Also, the condition codes are properly set Z/NZ on succeed/failure. 9570 } 9571 9572 if (L_failure == &L_fallthrough) 9573 jccb(Assembler::notEqual, *L_failure); 9574 else jcc(Assembler::notEqual, *L_failure); 9575 9576 // Success. Cache the super we found and proceed in triumph. 9577 movptr(super_cache_addr, super_klass); 9578 9579 if (L_success != &L_fallthrough) { 9580 jmp(*L_success); 9581 } 9582 9583 #undef IS_A_TEMP 9584 9585 bind(L_fallthrough); 9586 } 9587 9588 9589 void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { 9590 if (VM_Version::supports_cmov()) { 9591 cmovl(cc, dst, src); 9592 } else { 9593 Label L; 9594 jccb(negate_condition(cc), L); 9595 movl(dst, src); 9596 bind(L); 9597 } 9598 } 9599 9600 void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { 9601 if (VM_Version::supports_cmov()) { 9602 cmovl(cc, dst, src); 9603 } else { 9604 Label L; 9605 jccb(negate_condition(cc), L); 9606 movl(dst, src); 9607 bind(L); 9608 } 9609 } 9610 9611 void MacroAssembler::verify_oop(Register reg, const char* s) { 9612 if (!VerifyOops) return; 9613 9614 // Pass register number to verify_oop_subroutine 9615 char* b = new char[strlen(s) + 50]; 9616 sprintf(b, "verify_oop: %s: %s", reg->name(), s); 9617 BLOCK_COMMENT("verify_oop {"); 9618 #ifdef _LP64 9619 push(rscratch1); // save r10, trashed by movptr() 9620 #endif 9621 push(rax); // save rax, 9622 push(reg); // pass register argument 9623 ExternalAddress buffer((address) b); 9624 // avoid using pushptr, as it modifies scratch registers 9625 // and our contract is not to modify anything 9626 movptr(rax, buffer.addr()); 9627 push(rax); 9628 // call indirectly to solve generation ordering problem 9629 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 9630 call(rax); 9631 // Caller pops the arguments (oop, message) and restores rax, r10 9632 BLOCK_COMMENT("} verify_oop"); 9633 } 9634 9635 9636 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, 9637 Register tmp, 9638 int offset) { 9639 intptr_t value = *delayed_value_addr; 9640 if (value != 0) 9641 return RegisterOrConstant(value + offset); 9642 9643 // load indirectly to solve generation ordering problem 9644 movptr(tmp, ExternalAddress((address) delayed_value_addr)); 9645 9646 #ifdef ASSERT 9647 { Label L; 9648 testptr(tmp, tmp); 9649 if (WizardMode) { 9650 jcc(Assembler::notZero, L); 9651 char* buf = new char[40]; 9652 sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); 9653 STOP(buf); 9654 } else { 9655 jccb(Assembler::notZero, L); 9656 hlt(); 9657 } 9658 bind(L); 9659 } 9660 #endif 9661 9662 if (offset != 0) 9663 addptr(tmp, offset); 9664 9665 return RegisterOrConstant(tmp); 9666 } 9667 9668 9669 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, 9670 int extra_slot_offset) { 9671 // cf. TemplateTable::prepare_invoke(), if (load_receiver). 9672 int stackElementSize = Interpreter::stackElementSize; 9673 int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); 9674 #ifdef ASSERT 9675 int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); 9676 assert(offset1 - offset == stackElementSize, "correct arithmetic"); 9677 #endif 9678 Register scale_reg = noreg; 9679 Address::ScaleFactor scale_factor = Address::no_scale; 9680 if (arg_slot.is_constant()) { 9681 offset += arg_slot.as_constant() * stackElementSize; 9682 } else { 9683 scale_reg = arg_slot.as_register(); 9684 scale_factor = Address::times(stackElementSize); 9685 } 9686 offset += wordSize; // return PC is on stack 9687 return Address(rsp, scale_reg, scale_factor, offset); 9688 } 9689 9690 9691 void MacroAssembler::verify_oop_addr(Address addr, const char* s) { 9692 if (!VerifyOops) return; 9693 9694 // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); 9695 // Pass register number to verify_oop_subroutine 9696 char* b = new char[strlen(s) + 50]; 9697 sprintf(b, "verify_oop_addr: %s", s); 9698 9699 #ifdef _LP64 9700 push(rscratch1); // save r10, trashed by movptr() 9701 #endif 9702 push(rax); // save rax, 9703 // addr may contain rsp so we will have to adjust it based on the push 9704 // we just did (and on 64 bit we do two pushes) 9705 // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which 9706 // stores rax into addr which is backwards of what was intended. 9707 if (addr.uses(rsp)) { 9708 lea(rax, addr); 9709 pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); 9710 } else { 9711 pushptr(addr); 9712 } 9713 9714 ExternalAddress buffer((address) b); 9715 // pass msg argument 9716 // avoid using pushptr, as it modifies scratch registers 9717 // and our contract is not to modify anything 9718 movptr(rax, buffer.addr()); 9719 push(rax); 9720 9721 // call indirectly to solve generation ordering problem 9722 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); 9723 call(rax); 9724 // Caller pops the arguments (addr, message) and restores rax, r10. 9725 } 9726 9727 void MacroAssembler::verify_tlab() { 9728 #ifdef ASSERT 9729 if (UseTLAB && VerifyOops) { 9730 Label next, ok; 9731 Register t1 = rsi; 9732 Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); 9733 9734 push(t1); 9735 NOT_LP64(push(thread_reg)); 9736 NOT_LP64(get_thread(thread_reg)); 9737 9738 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 9739 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); 9740 jcc(Assembler::aboveEqual, next); 9741 STOP("assert(top >= start)"); 9742 should_not_reach_here(); 9743 9744 bind(next); 9745 movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); 9746 cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); 9747 jcc(Assembler::aboveEqual, ok); 9748 STOP("assert(top <= end)"); 9749 should_not_reach_here(); 9750 9751 bind(ok); 9752 NOT_LP64(pop(thread_reg)); 9753 pop(t1); 9754 } 9755 #endif 9756 } 9757 9758 class ControlWord { 9759 public: 9760 int32_t _value; 9761 9762 int rounding_control() const { return (_value >> 10) & 3 ; } 9763 int precision_control() const { return (_value >> 8) & 3 ; } 9764 bool precision() const { return ((_value >> 5) & 1) != 0; } 9765 bool underflow() const { return ((_value >> 4) & 1) != 0; } 9766 bool overflow() const { return ((_value >> 3) & 1) != 0; } 9767 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 9768 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 9769 bool invalid() const { return ((_value >> 0) & 1) != 0; } 9770 9771 void print() const { 9772 // rounding control 9773 const char* rc; 9774 switch (rounding_control()) { 9775 case 0: rc = "round near"; break; 9776 case 1: rc = "round down"; break; 9777 case 2: rc = "round up "; break; 9778 case 3: rc = "chop "; break; 9779 }; 9780 // precision control 9781 const char* pc; 9782 switch (precision_control()) { 9783 case 0: pc = "24 bits "; break; 9784 case 1: pc = "reserved"; break; 9785 case 2: pc = "53 bits "; break; 9786 case 3: pc = "64 bits "; break; 9787 }; 9788 // flags 9789 char f[9]; 9790 f[0] = ' '; 9791 f[1] = ' '; 9792 f[2] = (precision ()) ? 'P' : 'p'; 9793 f[3] = (underflow ()) ? 'U' : 'u'; 9794 f[4] = (overflow ()) ? 'O' : 'o'; 9795 f[5] = (zero_divide ()) ? 'Z' : 'z'; 9796 f[6] = (denormalized()) ? 'D' : 'd'; 9797 f[7] = (invalid ()) ? 'I' : 'i'; 9798 f[8] = '\x0'; 9799 // output 9800 printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); 9801 } 9802 9803 }; 9804 9805 class StatusWord { 9806 public: 9807 int32_t _value; 9808 9809 bool busy() const { return ((_value >> 15) & 1) != 0; } 9810 bool C3() const { return ((_value >> 14) & 1) != 0; } 9811 bool C2() const { return ((_value >> 10) & 1) != 0; } 9812 bool C1() const { return ((_value >> 9) & 1) != 0; } 9813 bool C0() const { return ((_value >> 8) & 1) != 0; } 9814 int top() const { return (_value >> 11) & 7 ; } 9815 bool error_status() const { return ((_value >> 7) & 1) != 0; } 9816 bool stack_fault() const { return ((_value >> 6) & 1) != 0; } 9817 bool precision() const { return ((_value >> 5) & 1) != 0; } 9818 bool underflow() const { return ((_value >> 4) & 1) != 0; } 9819 bool overflow() const { return ((_value >> 3) & 1) != 0; } 9820 bool zero_divide() const { return ((_value >> 2) & 1) != 0; } 9821 bool denormalized() const { return ((_value >> 1) & 1) != 0; } 9822 bool invalid() const { return ((_value >> 0) & 1) != 0; } 9823 9824 void print() const { 9825 // condition codes 9826 char c[5]; 9827 c[0] = (C3()) ? '3' : '-'; 9828 c[1] = (C2()) ? '2' : '-'; 9829 c[2] = (C1()) ? '1' : '-'; 9830 c[3] = (C0()) ? '0' : '-'; 9831 c[4] = '\x0'; 9832 // flags 9833 char f[9]; 9834 f[0] = (error_status()) ? 'E' : '-'; 9835 f[1] = (stack_fault ()) ? 'S' : '-'; 9836 f[2] = (precision ()) ? 'P' : '-'; 9837 f[3] = (underflow ()) ? 'U' : '-'; 9838 f[4] = (overflow ()) ? 'O' : '-'; 9839 f[5] = (zero_divide ()) ? 'Z' : '-'; 9840 f[6] = (denormalized()) ? 'D' : '-'; 9841 f[7] = (invalid ()) ? 'I' : '-'; 9842 f[8] = '\x0'; 9843 // output 9844 printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); 9845 } 9846 9847 }; 9848 9849 class TagWord { 9850 public: 9851 int32_t _value; 9852 9853 int tag_at(int i) const { return (_value >> (i*2)) & 3; } 9854 9855 void print() const { 9856 printf("%04x", _value & 0xFFFF); 9857 } 9858 9859 }; 9860 9861 class FPU_Register { 9862 public: 9863 int32_t _m0; 9864 int32_t _m1; 9865 int16_t _ex; 9866 9867 bool is_indefinite() const { 9868 return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; 9869 } 9870 9871 void print() const { 9872 char sign = (_ex < 0) ? '-' : '+'; 9873 const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; 9874 printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); 9875 }; 9876 9877 }; 9878 9879 class FPU_State { 9880 public: 9881 enum { 9882 register_size = 10, 9883 number_of_registers = 8, 9884 register_mask = 7 9885 }; 9886 9887 ControlWord _control_word; 9888 StatusWord _status_word; 9889 TagWord _tag_word; 9890 int32_t _error_offset; 9891 int32_t _error_selector; 9892 int32_t _data_offset; 9893 int32_t _data_selector; 9894 int8_t _register[register_size * number_of_registers]; 9895 9896 int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } 9897 FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } 9898 9899 const char* tag_as_string(int tag) const { 9900 switch (tag) { 9901 case 0: return "valid"; 9902 case 1: return "zero"; 9903 case 2: return "special"; 9904 case 3: return "empty"; 9905 } 9906 ShouldNotReachHere(); 9907 return NULL; 9908 } 9909 9910 void print() const { 9911 // print computation registers 9912 { int t = _status_word.top(); 9913 for (int i = 0; i < number_of_registers; i++) { 9914 int j = (i - t) & register_mask; 9915 printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); 9916 st(j)->print(); 9917 printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); 9918 } 9919 } 9920 printf("\n"); 9921 // print control registers 9922 printf("ctrl = "); _control_word.print(); printf("\n"); 9923 printf("stat = "); _status_word .print(); printf("\n"); 9924 printf("tags = "); _tag_word .print(); printf("\n"); 9925 } 9926 9927 }; 9928 9929 class Flag_Register { 9930 public: 9931 int32_t _value; 9932 9933 bool overflow() const { return ((_value >> 11) & 1) != 0; } 9934 bool direction() const { return ((_value >> 10) & 1) != 0; } 9935 bool sign() const { return ((_value >> 7) & 1) != 0; } 9936 bool zero() const { return ((_value >> 6) & 1) != 0; } 9937 bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } 9938 bool parity() const { return ((_value >> 2) & 1) != 0; } 9939 bool carry() const { return ((_value >> 0) & 1) != 0; } 9940 9941 void print() const { 9942 // flags 9943 char f[8]; 9944 f[0] = (overflow ()) ? 'O' : '-'; 9945 f[1] = (direction ()) ? 'D' : '-'; 9946 f[2] = (sign ()) ? 'S' : '-'; 9947 f[3] = (zero ()) ? 'Z' : '-'; 9948 f[4] = (auxiliary_carry()) ? 'A' : '-'; 9949 f[5] = (parity ()) ? 'P' : '-'; 9950 f[6] = (carry ()) ? 'C' : '-'; 9951 f[7] = '\x0'; 9952 // output 9953 printf("%08x flags = %s", _value, f); 9954 } 9955 9956 }; 9957 9958 class IU_Register { 9959 public: 9960 int32_t _value; 9961 9962 void print() const { 9963 printf("%08x %11d", _value, _value); 9964 } 9965 9966 }; 9967 9968 class IU_State { 9969 public: 9970 Flag_Register _eflags; 9971 IU_Register _rdi; 9972 IU_Register _rsi; 9973 IU_Register _rbp; 9974 IU_Register _rsp; 9975 IU_Register _rbx; 9976 IU_Register _rdx; 9977 IU_Register _rcx; 9978 IU_Register _rax; 9979 9980 void print() const { 9981 // computation registers 9982 printf("rax, = "); _rax.print(); printf("\n"); 9983 printf("rbx, = "); _rbx.print(); printf("\n"); 9984 printf("rcx = "); _rcx.print(); printf("\n"); 9985 printf("rdx = "); _rdx.print(); printf("\n"); 9986 printf("rdi = "); _rdi.print(); printf("\n"); 9987 printf("rsi = "); _rsi.print(); printf("\n"); 9988 printf("rbp, = "); _rbp.print(); printf("\n"); 9989 printf("rsp = "); _rsp.print(); printf("\n"); 9990 printf("\n"); 9991 // control registers 9992 printf("flgs = "); _eflags.print(); printf("\n"); 9993 } 9994 }; 9995 9996 9997 class CPU_State { 9998 public: 9999 FPU_State _fpu_state; 10000 IU_State _iu_state; 10001 10002 void print() const { 10003 printf("--------------------------------------------------\n"); 10004 _iu_state .print(); 10005 printf("\n"); 10006 _fpu_state.print(); 10007 printf("--------------------------------------------------\n"); 10008 } 10009 10010 }; 10011 10012 10013 static void _print_CPU_state(CPU_State* state) { 10014 state->print(); 10015 }; 10016 10017 10018 void MacroAssembler::print_CPU_state() { 10019 push_CPU_state(); 10020 push(rsp); // pass CPU state 10021 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); 10022 addptr(rsp, wordSize); // discard argument 10023 pop_CPU_state(); 10024 } 10025 10026 10027 static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { 10028 static int counter = 0; 10029 FPU_State* fs = &state->_fpu_state; 10030 counter++; 10031 // For leaf calls, only verify that the top few elements remain empty. 10032 // We only need 1 empty at the top for C2 code. 10033 if( stack_depth < 0 ) { 10034 if( fs->tag_for_st(7) != 3 ) { 10035 printf("FPR7 not empty\n"); 10036 state->print(); 10037 assert(false, "error"); 10038 return false; 10039 } 10040 return true; // All other stack states do not matter 10041 } 10042 10043 assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, 10044 "bad FPU control word"); 10045 10046 // compute stack depth 10047 int i = 0; 10048 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; 10049 int d = i; 10050 while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; 10051 // verify findings 10052 if (i != FPU_State::number_of_registers) { 10053 // stack not contiguous 10054 printf("%s: stack not contiguous at ST%d\n", s, i); 10055 state->print(); 10056 assert(false, "error"); 10057 return false; 10058 } 10059 // check if computed stack depth corresponds to expected stack depth 10060 if (stack_depth < 0) { 10061 // expected stack depth is -stack_depth or less 10062 if (d > -stack_depth) { 10063 // too many elements on the stack 10064 printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); 10065 state->print(); 10066 assert(false, "error"); 10067 return false; 10068 } 10069 } else { 10070 // expected stack depth is stack_depth 10071 if (d != stack_depth) { 10072 // wrong stack depth 10073 printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); 10074 state->print(); 10075 assert(false, "error"); 10076 return false; 10077 } 10078 } 10079 // everything is cool 10080 return true; 10081 } 10082 10083 10084 void MacroAssembler::verify_FPU(int stack_depth, const char* s) { 10085 if (!VerifyFPU) return; 10086 push_CPU_state(); 10087 push(rsp); // pass CPU state 10088 ExternalAddress msg((address) s); 10089 // pass message string s 10090 pushptr(msg.addr()); 10091 push(stack_depth); // pass stack depth 10092 call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); 10093 addptr(rsp, 3 * wordSize); // discard arguments 10094 // check for error 10095 { Label L; 10096 testl(rax, rax); 10097 jcc(Assembler::notZero, L); 10098 int3(); // break if error condition 10099 bind(L); 10100 } 10101 pop_CPU_state(); 10102 } 10103 10104 void MacroAssembler::load_klass(Register dst, Register src) { 10105 #ifdef _LP64 10106 if (UseCompressedKlassPointers) { 10107 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 10108 decode_klass_not_null(dst); 10109 } else 10110 #endif 10111 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 10112 } 10113 10114 void MacroAssembler::load_prototype_header(Register dst, Register src) { 10115 #ifdef _LP64 10116 if (UseCompressedKlassPointers) { 10117 assert (Universe::heap() != NULL, "java heap should be initialized"); 10118 movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); 10119 if (Universe::narrow_klass_shift() != 0) { 10120 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 10121 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); 10122 movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset())); 10123 } else { 10124 movq(dst, Address(dst, Klass::prototype_header_offset())); 10125 } 10126 } else 10127 #endif 10128 { 10129 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); 10130 movptr(dst, Address(dst, Klass::prototype_header_offset())); 10131 } 10132 } 10133 10134 void MacroAssembler::store_klass(Register dst, Register src) { 10135 #ifdef _LP64 10136 if (UseCompressedKlassPointers) { 10137 encode_klass_not_null(src); 10138 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); 10139 } else 10140 #endif 10141 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); 10142 } 10143 10144 void MacroAssembler::load_heap_oop(Register dst, Address src) { 10145 #ifdef _LP64 10146 // FIXME: Must change all places where we try to load the klass. 10147 if (UseCompressedOops) { 10148 movl(dst, src); 10149 decode_heap_oop(dst); 10150 } else 10151 #endif 10152 movptr(dst, src); 10153 } 10154 10155 // Doesn't do verfication, generates fixed size code 10156 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { 10157 #ifdef _LP64 10158 if (UseCompressedOops) { 10159 movl(dst, src); 10160 decode_heap_oop_not_null(dst); 10161 } else 10162 #endif 10163 movptr(dst, src); 10164 } 10165 10166 void MacroAssembler::store_heap_oop(Address dst, Register src) { 10167 #ifdef _LP64 10168 if (UseCompressedOops) { 10169 assert(!dst.uses(src), "not enough registers"); 10170 encode_heap_oop(src); 10171 movl(dst, src); 10172 } else 10173 #endif 10174 movptr(dst, src); 10175 } 10176 10177 void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) { 10178 assert_different_registers(src1, tmp); 10179 #ifdef _LP64 10180 if (UseCompressedOops) { 10181 bool did_push = false; 10182 if (tmp == noreg) { 10183 tmp = rax; 10184 push(tmp); 10185 did_push = true; 10186 assert(!src2.uses(rsp), "can't push"); 10187 } 10188 load_heap_oop(tmp, src2); 10189 cmpptr(src1, tmp); 10190 if (did_push) pop(tmp); 10191 } else 10192 #endif 10193 cmpptr(src1, src2); 10194 } 10195 10196 // Used for storing NULLs. 10197 void MacroAssembler::store_heap_oop_null(Address dst) { 10198 #ifdef _LP64 10199 if (UseCompressedOops) { 10200 movl(dst, (int32_t)NULL_WORD); 10201 } else { 10202 movslq(dst, (int32_t)NULL_WORD); 10203 } 10204 #else 10205 movl(dst, (int32_t)NULL_WORD); 10206 #endif 10207 } 10208 10209 #ifdef _LP64 10210 void MacroAssembler::store_klass_gap(Register dst, Register src) { 10211 if (UseCompressedKlassPointers) { 10212 // Store to klass gap in destination 10213 movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); 10214 } 10215 } 10216 10217 #ifdef ASSERT 10218 void MacroAssembler::verify_heapbase(const char* msg) { 10219 assert (UseCompressedOops || UseCompressedKlassPointers, "should be compressed"); 10220 assert (Universe::heap() != NULL, "java heap should be initialized"); 10221 if (CheckCompressedOops) { 10222 Label ok; 10223 push(rscratch1); // cmpptr trashes rscratch1 10224 cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); 10225 jcc(Assembler::equal, ok); 10226 STOP(msg); 10227 bind(ok); 10228 pop(rscratch1); 10229 } 10230 } 10231 #endif 10232 10233 // Algorithm must match oop.inline.hpp encode_heap_oop. 10234 void MacroAssembler::encode_heap_oop(Register r) { 10235 #ifdef ASSERT 10236 verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); 10237 #endif 10238 verify_oop(r, "broken oop in encode_heap_oop"); 10239 if (Universe::narrow_oop_base() == NULL) { 10240 if (Universe::narrow_oop_shift() != 0) { 10241 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10242 shrq(r, LogMinObjAlignmentInBytes); 10243 } 10244 return; 10245 } 10246 testq(r, r); 10247 cmovq(Assembler::equal, r, r12_heapbase); 10248 subq(r, r12_heapbase); 10249 shrq(r, LogMinObjAlignmentInBytes); 10250 } 10251 10252 void MacroAssembler::encode_heap_oop_not_null(Register r) { 10253 #ifdef ASSERT 10254 verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); 10255 if (CheckCompressedOops) { 10256 Label ok; 10257 testq(r, r); 10258 jcc(Assembler::notEqual, ok); 10259 STOP("null oop passed to encode_heap_oop_not_null"); 10260 bind(ok); 10261 } 10262 #endif 10263 verify_oop(r, "broken oop in encode_heap_oop_not_null"); 10264 if (Universe::narrow_oop_base() != NULL) { 10265 subq(r, r12_heapbase); 10266 } 10267 if (Universe::narrow_oop_shift() != 0) { 10268 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10269 shrq(r, LogMinObjAlignmentInBytes); 10270 } 10271 } 10272 10273 void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { 10274 #ifdef ASSERT 10275 verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); 10276 if (CheckCompressedOops) { 10277 Label ok; 10278 testq(src, src); 10279 jcc(Assembler::notEqual, ok); 10280 STOP("null oop passed to encode_heap_oop_not_null2"); 10281 bind(ok); 10282 } 10283 #endif 10284 verify_oop(src, "broken oop in encode_heap_oop_not_null2"); 10285 if (dst != src) { 10286 movq(dst, src); 10287 } 10288 if (Universe::narrow_oop_base() != NULL) { 10289 subq(dst, r12_heapbase); 10290 } 10291 if (Universe::narrow_oop_shift() != 0) { 10292 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10293 shrq(dst, LogMinObjAlignmentInBytes); 10294 } 10295 } 10296 10297 void MacroAssembler::decode_heap_oop(Register r) { 10298 #ifdef ASSERT 10299 verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); 10300 #endif 10301 if (Universe::narrow_oop_base() == NULL) { 10302 if (Universe::narrow_oop_shift() != 0) { 10303 assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10304 shlq(r, LogMinObjAlignmentInBytes); 10305 } 10306 } else { 10307 Label done; 10308 shlq(r, LogMinObjAlignmentInBytes); 10309 jccb(Assembler::equal, done); 10310 addq(r, r12_heapbase); 10311 bind(done); 10312 } 10313 verify_oop(r, "broken oop in decode_heap_oop"); 10314 } 10315 10316 void MacroAssembler::decode_heap_oop_not_null(Register r) { 10317 // Note: it will change flags 10318 assert (UseCompressedOops, "should only be used for compressed headers"); 10319 assert (Universe::heap() != NULL, "java heap should be initialized"); 10320 // Cannot assert, unverified entry point counts instructions (see .ad file) 10321 // vtableStubs also counts instructions in pd_code_size_limit. 10322 // Also do not verify_oop as this is called by verify_oop. 10323 if (Universe::narrow_oop_shift() != 0) { 10324 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10325 shlq(r, LogMinObjAlignmentInBytes); 10326 if (Universe::narrow_oop_base() != NULL) { 10327 addq(r, r12_heapbase); 10328 } 10329 } else { 10330 assert (Universe::narrow_oop_base() == NULL, "sanity"); 10331 } 10332 } 10333 10334 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { 10335 // Note: it will change flags 10336 assert (UseCompressedOops, "should only be used for compressed headers"); 10337 assert (Universe::heap() != NULL, "java heap should be initialized"); 10338 // Cannot assert, unverified entry point counts instructions (see .ad file) 10339 // vtableStubs also counts instructions in pd_code_size_limit. 10340 // Also do not verify_oop as this is called by verify_oop. 10341 if (Universe::narrow_oop_shift() != 0) { 10342 assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); 10343 if (LogMinObjAlignmentInBytes == Address::times_8) { 10344 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 10345 } else { 10346 if (dst != src) { 10347 movq(dst, src); 10348 } 10349 shlq(dst, LogMinObjAlignmentInBytes); 10350 if (Universe::narrow_oop_base() != NULL) { 10351 addq(dst, r12_heapbase); 10352 } 10353 } 10354 } else { 10355 assert (Universe::narrow_oop_base() == NULL, "sanity"); 10356 if (dst != src) { 10357 movq(dst, src); 10358 } 10359 } 10360 } 10361 10362 void MacroAssembler::encode_klass_not_null(Register r) { 10363 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 10364 #ifdef ASSERT 10365 verify_heapbase("MacroAssembler::encode_klass_not_null: heap base corrupted?"); 10366 #endif 10367 if (Universe::narrow_klass_base() != NULL) { 10368 subq(r, r12_heapbase); 10369 } 10370 if (Universe::narrow_klass_shift() != 0) { 10371 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 10372 shrq(r, LogKlassAlignmentInBytes); 10373 } 10374 } 10375 10376 void MacroAssembler::encode_klass_not_null(Register dst, Register src) { 10377 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 10378 #ifdef ASSERT 10379 verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?"); 10380 #endif 10381 if (dst != src) { 10382 movq(dst, src); 10383 } 10384 if (Universe::narrow_klass_base() != NULL) { 10385 subq(dst, r12_heapbase); 10386 } 10387 if (Universe::narrow_klass_shift() != 0) { 10388 assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 10389 shrq(dst, LogKlassAlignmentInBytes); 10390 } 10391 } 10392 10393 void MacroAssembler::decode_klass_not_null(Register r) { 10394 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 10395 // Note: it will change flags 10396 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 10397 // Cannot assert, unverified entry point counts instructions (see .ad file) 10398 // vtableStubs also counts instructions in pd_code_size_limit. 10399 // Also do not verify_oop as this is called by verify_oop. 10400 if (Universe::narrow_klass_shift() != 0) { 10401 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 10402 shlq(r, LogKlassAlignmentInBytes); 10403 if (Universe::narrow_klass_base() != NULL) { 10404 addq(r, r12_heapbase); 10405 } 10406 } else { 10407 assert (Universe::narrow_klass_base() == NULL, "sanity"); 10408 } 10409 } 10410 10411 void MacroAssembler::decode_klass_not_null(Register dst, Register src) { 10412 assert(Metaspace::is_initialized(), "metaspace should be initialized"); 10413 // Note: it will change flags 10414 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 10415 // Cannot assert, unverified entry point counts instructions (see .ad file) 10416 // vtableStubs also counts instructions in pd_code_size_limit. 10417 // Also do not verify_oop as this is called by verify_oop. 10418 if (Universe::narrow_klass_shift() != 0) { 10419 assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); 10420 assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); 10421 leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); 10422 } else { 10423 assert (Universe::narrow_klass_base() == NULL, "sanity"); 10424 if (dst != src) { 10425 movq(dst, src); 10426 } 10427 } 10428 } 10429 10430 void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { 10431 assert (UseCompressedOops, "should only be used for compressed headers"); 10432 assert (Universe::heap() != NULL, "java heap should be initialized"); 10433 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10434 int oop_index = oop_recorder()->find_index(obj); 10435 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10436 mov_narrow_oop(dst, oop_index, rspec); 10437 } 10438 10439 void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { 10440 assert (UseCompressedOops, "should only be used for compressed headers"); 10441 assert (Universe::heap() != NULL, "java heap should be initialized"); 10442 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10443 int oop_index = oop_recorder()->find_index(obj); 10444 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10445 mov_narrow_oop(dst, oop_index, rspec); 10446 } 10447 10448 void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { 10449 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 10450 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10451 int klass_index = oop_recorder()->find_index(k); 10452 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 10453 mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 10454 } 10455 10456 void MacroAssembler::set_narrow_klass(Address dst, Klass* k) { 10457 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 10458 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10459 int klass_index = oop_recorder()->find_index(k); 10460 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 10461 mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 10462 } 10463 10464 void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { 10465 assert (UseCompressedOops, "should only be used for compressed headers"); 10466 assert (Universe::heap() != NULL, "java heap should be initialized"); 10467 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10468 int oop_index = oop_recorder()->find_index(obj); 10469 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10470 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 10471 } 10472 10473 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 10474 assert (UseCompressedOops, "should only be used for compressed headers"); 10475 assert (Universe::heap() != NULL, "java heap should be initialized"); 10476 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10477 int oop_index = oop_recorder()->find_index(obj); 10478 RelocationHolder rspec = oop_Relocation::spec(oop_index); 10479 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 10480 } 10481 10482 void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) { 10483 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 10484 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10485 int klass_index = oop_recorder()->find_index(k); 10486 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 10487 Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 10488 } 10489 10490 void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) { 10491 assert (UseCompressedKlassPointers, "should only be used for compressed headers"); 10492 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 10493 int klass_index = oop_recorder()->find_index(k); 10494 RelocationHolder rspec = metadata_Relocation::spec(klass_index); 10495 Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec); 10496 } 10497 10498 void MacroAssembler::reinit_heapbase() { 10499 if (UseCompressedOops || UseCompressedKlassPointers) { 10500 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); 10501 } 10502 } 10503 #endif // _LP64 10504 10505 10506 // C2 compiled method's prolog code. 10507 void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { 10508 10509 // WARNING: Initial instruction MUST be 5 bytes or longer so that 10510 // NativeJump::patch_verified_entry will be able to patch out the entry 10511 // code safely. The push to verify stack depth is ok at 5 bytes, 10512 // the frame allocation can be either 3 or 6 bytes. So if we don't do 10513 // stack bang then we must use the 6 byte frame allocation even if 10514 // we have no frame. :-( 10515 10516 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 10517 // Remove word for return addr 10518 framesize -= wordSize; 10519 10520 // Calls to C2R adapters often do not accept exceptional returns. 10521 // We require that their callers must bang for them. But be careful, because 10522 // some VM calls (such as call site linkage) can use several kilobytes of 10523 // stack. But the stack safety zone should account for that. 10524 // See bugs 4446381, 4468289, 4497237. 10525 if (stack_bang) { 10526 generate_stack_overflow_check(framesize); 10527 10528 // We always push rbp, so that on return to interpreter rbp, will be 10529 // restored correctly and we can correct the stack. 10530 push(rbp); 10531 // Remove word for ebp 10532 framesize -= wordSize; 10533 10534 // Create frame 10535 if (framesize) { 10536 subptr(rsp, framesize); 10537 } 10538 } else { 10539 // Create frame (force generation of a 4 byte immediate value) 10540 subptr_imm32(rsp, framesize); 10541 10542 // Save RBP register now. 10543 framesize -= wordSize; 10544 movptr(Address(rsp, framesize), rbp); 10545 } 10546 10547 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth 10548 framesize -= wordSize; 10549 movptr(Address(rsp, framesize), (int32_t)0xbadb100d); 10550 } 10551 10552 #ifndef _LP64 10553 // If method sets FPU control word do it now 10554 if (fp_mode_24b) { 10555 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10556 } 10557 if (UseSSE >= 2 && VerifyFPU) { 10558 verify_FPU(0, "FPU stack must be clean on entry"); 10559 } 10560 #endif 10561 10562 #ifdef ASSERT 10563 if (VerifyStackAtCalls) { 10564 Label L; 10565 push(rax); 10566 mov(rax, rsp); 10567 andptr(rax, StackAlignmentInBytes-1); 10568 cmpptr(rax, StackAlignmentInBytes-wordSize); 10569 pop(rax); 10570 jcc(Assembler::equal, L); 10571 STOP("Stack is not properly aligned!"); 10572 bind(L); 10573 } 10574 #endif 10575 10576 } 10577 10578 10579 // IndexOf for constant substrings with size >= 8 chars 10580 // which don't need to be loaded through stack. 10581 void MacroAssembler::string_indexofC8(Register str1, Register str2, 10582 Register cnt1, Register cnt2, 10583 int int_cnt2, Register result, 10584 XMMRegister vec, Register tmp) { 10585 ShortBranchVerifier sbv(this); 10586 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 10587 10588 // This method uses pcmpestri inxtruction with bound registers 10589 // inputs: 10590 // xmm - substring 10591 // rax - substring length (elements count) 10592 // mem - scanned string 10593 // rdx - string length (elements count) 10594 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 10595 // outputs: 10596 // rcx - matched index in string 10597 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 10598 10599 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, 10600 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, 10601 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; 10602 10603 // Note, inline_string_indexOf() generates checks: 10604 // if (substr.count > string.count) return -1; 10605 // if (substr.count == 0) return 0; 10606 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); 10607 10608 // Load substring. 10609 movdqu(vec, Address(str2, 0)); 10610 movl(cnt2, int_cnt2); 10611 movptr(result, str1); // string addr 10612 10613 if (int_cnt2 > 8) { 10614 jmpb(SCAN_TO_SUBSTR); 10615 10616 // Reload substr for rescan, this code 10617 // is executed only for large substrings (> 8 chars) 10618 bind(RELOAD_SUBSTR); 10619 movdqu(vec, Address(str2, 0)); 10620 negptr(cnt2); // Jumped here with negative cnt2, convert to positive 10621 10622 bind(RELOAD_STR); 10623 // We came here after the beginning of the substring was 10624 // matched but the rest of it was not so we need to search 10625 // again. Start from the next element after the previous match. 10626 10627 // cnt2 is number of substring reminding elements and 10628 // cnt1 is number of string reminding elements when cmp failed. 10629 // Restored cnt1 = cnt1 - cnt2 + int_cnt2 10630 subl(cnt1, cnt2); 10631 addl(cnt1, int_cnt2); 10632 movl(cnt2, int_cnt2); // Now restore cnt2 10633 10634 decrementl(cnt1); // Shift to next element 10635 cmpl(cnt1, cnt2); 10636 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10637 10638 addptr(result, 2); 10639 10640 } // (int_cnt2 > 8) 10641 10642 // Scan string for start of substr in 16-byte vectors 10643 bind(SCAN_TO_SUBSTR); 10644 pcmpestri(vec, Address(result, 0), 0x0d); 10645 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 10646 subl(cnt1, 8); 10647 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 10648 cmpl(cnt1, cnt2); 10649 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10650 addptr(result, 16); 10651 jmpb(SCAN_TO_SUBSTR); 10652 10653 // Found a potential substr 10654 bind(FOUND_CANDIDATE); 10655 // Matched whole vector if first element matched (tmp(rcx) == 0). 10656 if (int_cnt2 == 8) { 10657 jccb(Assembler::overflow, RET_FOUND); // OF == 1 10658 } else { // int_cnt2 > 8 10659 jccb(Assembler::overflow, FOUND_SUBSTR); 10660 } 10661 // After pcmpestri tmp(rcx) contains matched element index 10662 // Compute start addr of substr 10663 lea(result, Address(result, tmp, Address::times_2)); 10664 10665 // Make sure string is still long enough 10666 subl(cnt1, tmp); 10667 cmpl(cnt1, cnt2); 10668 if (int_cnt2 == 8) { 10669 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 10670 } else { // int_cnt2 > 8 10671 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); 10672 } 10673 // Left less then substring. 10674 10675 bind(RET_NOT_FOUND); 10676 movl(result, -1); 10677 jmpb(EXIT); 10678 10679 if (int_cnt2 > 8) { 10680 // This code is optimized for the case when whole substring 10681 // is matched if its head is matched. 10682 bind(MATCH_SUBSTR_HEAD); 10683 pcmpestri(vec, Address(result, 0), 0x0d); 10684 // Reload only string if does not match 10685 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 10686 10687 Label CONT_SCAN_SUBSTR; 10688 // Compare the rest of substring (> 8 chars). 10689 bind(FOUND_SUBSTR); 10690 // First 8 chars are already matched. 10691 negptr(cnt2); 10692 addptr(cnt2, 8); 10693 10694 bind(SCAN_SUBSTR); 10695 subl(cnt1, 8); 10696 cmpl(cnt2, -8); // Do not read beyond substring 10697 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); 10698 // Back-up strings to avoid reading beyond substring: 10699 // cnt1 = cnt1 - cnt2 + 8 10700 addl(cnt1, cnt2); // cnt2 is negative 10701 addl(cnt1, 8); 10702 movl(cnt2, 8); negptr(cnt2); 10703 bind(CONT_SCAN_SUBSTR); 10704 if (int_cnt2 < (int)G) { 10705 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); 10706 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); 10707 } else { 10708 // calculate index in register to avoid integer overflow (int_cnt2*2) 10709 movl(tmp, int_cnt2); 10710 addptr(tmp, cnt2); 10711 movdqu(vec, Address(str2, tmp, Address::times_2, 0)); 10712 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); 10713 } 10714 // Need to reload strings pointers if not matched whole vector 10715 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 10716 addptr(cnt2, 8); 10717 jcc(Assembler::negative, SCAN_SUBSTR); 10718 // Fall through if found full substring 10719 10720 } // (int_cnt2 > 8) 10721 10722 bind(RET_FOUND); 10723 // Found result if we matched full small substring. 10724 // Compute substr offset 10725 subptr(result, str1); 10726 shrl(result, 1); // index 10727 bind(EXIT); 10728 10729 } // string_indexofC8 10730 10731 // Small strings are loaded through stack if they cross page boundary. 10732 void MacroAssembler::string_indexof(Register str1, Register str2, 10733 Register cnt1, Register cnt2, 10734 int int_cnt2, Register result, 10735 XMMRegister vec, Register tmp) { 10736 ShortBranchVerifier sbv(this); 10737 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 10738 // 10739 // int_cnt2 is length of small (< 8 chars) constant substring 10740 // or (-1) for non constant substring in which case its length 10741 // is in cnt2 register. 10742 // 10743 // Note, inline_string_indexOf() generates checks: 10744 // if (substr.count > string.count) return -1; 10745 // if (substr.count == 0) return 0; 10746 // 10747 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); 10748 10749 // This method uses pcmpestri inxtruction with bound registers 10750 // inputs: 10751 // xmm - substring 10752 // rax - substring length (elements count) 10753 // mem - scanned string 10754 // rdx - string length (elements count) 10755 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 10756 // outputs: 10757 // rcx - matched index in string 10758 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 10759 10760 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, 10761 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, 10762 FOUND_CANDIDATE; 10763 10764 { //======================================================== 10765 // We don't know where these strings are located 10766 // and we can't read beyond them. Load them through stack. 10767 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; 10768 10769 movptr(tmp, rsp); // save old SP 10770 10771 if (int_cnt2 > 0) { // small (< 8 chars) constant substring 10772 if (int_cnt2 == 1) { // One char 10773 load_unsigned_short(result, Address(str2, 0)); 10774 movdl(vec, result); // move 32 bits 10775 } else if (int_cnt2 == 2) { // Two chars 10776 movdl(vec, Address(str2, 0)); // move 32 bits 10777 } else if (int_cnt2 == 4) { // Four chars 10778 movq(vec, Address(str2, 0)); // move 64 bits 10779 } else { // cnt2 = { 3, 5, 6, 7 } 10780 // Array header size is 12 bytes in 32-bit VM 10781 // + 6 bytes for 3 chars == 18 bytes, 10782 // enough space to load vec and shift. 10783 assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity"); 10784 movdqu(vec, Address(str2, (int_cnt2*2)-16)); 10785 psrldq(vec, 16-(int_cnt2*2)); 10786 } 10787 } else { // not constant substring 10788 cmpl(cnt2, 8); 10789 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough 10790 10791 // We can read beyond string if srt+16 does not cross page boundary 10792 // since heaps are aligned and mapped by pages. 10793 assert(os::vm_page_size() < (int)G, "default page should be small"); 10794 movl(result, str2); // We need only low 32 bits 10795 andl(result, (os::vm_page_size()-1)); 10796 cmpl(result, (os::vm_page_size()-16)); 10797 jccb(Assembler::belowEqual, CHECK_STR); 10798 10799 // Move small strings to stack to allow load 16 bytes into vec. 10800 subptr(rsp, 16); 10801 int stk_offset = wordSize-2; 10802 push(cnt2); 10803 10804 bind(COPY_SUBSTR); 10805 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); 10806 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 10807 decrement(cnt2); 10808 jccb(Assembler::notZero, COPY_SUBSTR); 10809 10810 pop(cnt2); 10811 movptr(str2, rsp); // New substring address 10812 } // non constant 10813 10814 bind(CHECK_STR); 10815 cmpl(cnt1, 8); 10816 jccb(Assembler::aboveEqual, BIG_STRINGS); 10817 10818 // Check cross page boundary. 10819 movl(result, str1); // We need only low 32 bits 10820 andl(result, (os::vm_page_size()-1)); 10821 cmpl(result, (os::vm_page_size()-16)); 10822 jccb(Assembler::belowEqual, BIG_STRINGS); 10823 10824 subptr(rsp, 16); 10825 int stk_offset = -2; 10826 if (int_cnt2 < 0) { // not constant 10827 push(cnt2); 10828 stk_offset += wordSize; 10829 } 10830 movl(cnt2, cnt1); 10831 10832 bind(COPY_STR); 10833 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); 10834 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); 10835 decrement(cnt2); 10836 jccb(Assembler::notZero, COPY_STR); 10837 10838 if (int_cnt2 < 0) { // not constant 10839 pop(cnt2); 10840 } 10841 movptr(str1, rsp); // New string address 10842 10843 bind(BIG_STRINGS); 10844 // Load substring. 10845 if (int_cnt2 < 0) { // -1 10846 movdqu(vec, Address(str2, 0)); 10847 push(cnt2); // substr count 10848 push(str2); // substr addr 10849 push(str1); // string addr 10850 } else { 10851 // Small (< 8 chars) constant substrings are loaded already. 10852 movl(cnt2, int_cnt2); 10853 } 10854 push(tmp); // original SP 10855 10856 } // Finished loading 10857 10858 //======================================================== 10859 // Start search 10860 // 10861 10862 movptr(result, str1); // string addr 10863 10864 if (int_cnt2 < 0) { // Only for non constant substring 10865 jmpb(SCAN_TO_SUBSTR); 10866 10867 // SP saved at sp+0 10868 // String saved at sp+1*wordSize 10869 // Substr saved at sp+2*wordSize 10870 // Substr count saved at sp+3*wordSize 10871 10872 // Reload substr for rescan, this code 10873 // is executed only for large substrings (> 8 chars) 10874 bind(RELOAD_SUBSTR); 10875 movptr(str2, Address(rsp, 2*wordSize)); 10876 movl(cnt2, Address(rsp, 3*wordSize)); 10877 movdqu(vec, Address(str2, 0)); 10878 // We came here after the beginning of the substring was 10879 // matched but the rest of it was not so we need to search 10880 // again. Start from the next element after the previous match. 10881 subptr(str1, result); // Restore counter 10882 shrl(str1, 1); 10883 addl(cnt1, str1); 10884 decrementl(cnt1); // Shift to next element 10885 cmpl(cnt1, cnt2); 10886 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10887 10888 addptr(result, 2); 10889 } // non constant 10890 10891 // Scan string for start of substr in 16-byte vectors 10892 bind(SCAN_TO_SUBSTR); 10893 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 10894 pcmpestri(vec, Address(result, 0), 0x0d); 10895 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 10896 subl(cnt1, 8); 10897 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string 10898 cmpl(cnt1, cnt2); 10899 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring 10900 addptr(result, 16); 10901 10902 bind(ADJUST_STR); 10903 cmpl(cnt1, 8); // Do not read beyond string 10904 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); 10905 // Back-up string to avoid reading beyond string. 10906 lea(result, Address(result, cnt1, Address::times_2, -16)); 10907 movl(cnt1, 8); 10908 jmpb(SCAN_TO_SUBSTR); 10909 10910 // Found a potential substr 10911 bind(FOUND_CANDIDATE); 10912 // After pcmpestri tmp(rcx) contains matched element index 10913 10914 // Make sure string is still long enough 10915 subl(cnt1, tmp); 10916 cmpl(cnt1, cnt2); 10917 jccb(Assembler::greaterEqual, FOUND_SUBSTR); 10918 // Left less then substring. 10919 10920 bind(RET_NOT_FOUND); 10921 movl(result, -1); 10922 jmpb(CLEANUP); 10923 10924 bind(FOUND_SUBSTR); 10925 // Compute start addr of substr 10926 lea(result, Address(result, tmp, Address::times_2)); 10927 10928 if (int_cnt2 > 0) { // Constant substring 10929 // Repeat search for small substring (< 8 chars) 10930 // from new point without reloading substring. 10931 // Have to check that we don't read beyond string. 10932 cmpl(tmp, 8-int_cnt2); 10933 jccb(Assembler::greater, ADJUST_STR); 10934 // Fall through if matched whole substring. 10935 } else { // non constant 10936 assert(int_cnt2 == -1, "should be != 0"); 10937 10938 addl(tmp, cnt2); 10939 // Found result if we matched whole substring. 10940 cmpl(tmp, 8); 10941 jccb(Assembler::lessEqual, RET_FOUND); 10942 10943 // Repeat search for small substring (<= 8 chars) 10944 // from new point 'str1' without reloading substring. 10945 cmpl(cnt2, 8); 10946 // Have to check that we don't read beyond string. 10947 jccb(Assembler::lessEqual, ADJUST_STR); 10948 10949 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; 10950 // Compare the rest of substring (> 8 chars). 10951 movptr(str1, result); 10952 10953 cmpl(tmp, cnt2); 10954 // First 8 chars are already matched. 10955 jccb(Assembler::equal, CHECK_NEXT); 10956 10957 bind(SCAN_SUBSTR); 10958 pcmpestri(vec, Address(str1, 0), 0x0d); 10959 // Need to reload strings pointers if not matched whole vector 10960 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 10961 10962 bind(CHECK_NEXT); 10963 subl(cnt2, 8); 10964 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring 10965 addptr(str1, 16); 10966 addptr(str2, 16); 10967 subl(cnt1, 8); 10968 cmpl(cnt2, 8); // Do not read beyond substring 10969 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); 10970 // Back-up strings to avoid reading beyond substring. 10971 lea(str2, Address(str2, cnt2, Address::times_2, -16)); 10972 lea(str1, Address(str1, cnt2, Address::times_2, -16)); 10973 subl(cnt1, cnt2); 10974 movl(cnt2, 8); 10975 addl(cnt1, 8); 10976 bind(CONT_SCAN_SUBSTR); 10977 movdqu(vec, Address(str2, 0)); 10978 jmpb(SCAN_SUBSTR); 10979 10980 bind(RET_FOUND_LONG); 10981 movptr(str1, Address(rsp, wordSize)); 10982 } // non constant 10983 10984 bind(RET_FOUND); 10985 // Compute substr offset 10986 subptr(result, str1); 10987 shrl(result, 1); // index 10988 10989 bind(CLEANUP); 10990 pop(rsp); // restore SP 10991 10992 } // string_indexof 10993 10994 // Compare strings. 10995 void MacroAssembler::string_compare(Register str1, Register str2, 10996 Register cnt1, Register cnt2, Register result, 10997 XMMRegister vec1) { 10998 ShortBranchVerifier sbv(this); 10999 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; 11000 11001 // Compute the minimum of the string lengths and the 11002 // difference of the string lengths (stack). 11003 // Do the conditional move stuff 11004 movl(result, cnt1); 11005 subl(cnt1, cnt2); 11006 push(cnt1); 11007 cmov32(Assembler::lessEqual, cnt2, result); 11008 11009 // Is the minimum length zero? 11010 testl(cnt2, cnt2); 11011 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 11012 11013 // Load first characters 11014 load_unsigned_short(result, Address(str1, 0)); 11015 load_unsigned_short(cnt1, Address(str2, 0)); 11016 11017 // Compare first characters 11018 subl(result, cnt1); 11019 jcc(Assembler::notZero, POP_LABEL); 11020 decrementl(cnt2); 11021 jcc(Assembler::zero, LENGTH_DIFF_LABEL); 11022 11023 { 11024 // Check after comparing first character to see if strings are equivalent 11025 Label LSkip2; 11026 // Check if the strings start at same location 11027 cmpptr(str1, str2); 11028 jccb(Assembler::notEqual, LSkip2); 11029 11030 // Check if the length difference is zero (from stack) 11031 cmpl(Address(rsp, 0), 0x0); 11032 jcc(Assembler::equal, LENGTH_DIFF_LABEL); 11033 11034 // Strings might not be equivalent 11035 bind(LSkip2); 11036 } 11037 11038 Address::ScaleFactor scale = Address::times_2; 11039 int stride = 8; 11040 11041 // Advance to next element 11042 addptr(str1, 16/stride); 11043 addptr(str2, 16/stride); 11044 11045 if (UseSSE42Intrinsics) { 11046 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; 11047 int pcmpmask = 0x19; 11048 // Setup to compare 16-byte vectors 11049 movl(result, cnt2); 11050 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count 11051 jccb(Assembler::zero, COMPARE_TAIL); 11052 11053 lea(str1, Address(str1, result, scale)); 11054 lea(str2, Address(str2, result, scale)); 11055 negptr(result); 11056 11057 // pcmpestri 11058 // inputs: 11059 // vec1- substring 11060 // rax - negative string length (elements count) 11061 // mem - scaned string 11062 // rdx - string length (elements count) 11063 // pcmpmask - cmp mode: 11000 (string compare with negated result) 11064 // + 00 (unsigned bytes) or + 01 (unsigned shorts) 11065 // outputs: 11066 // rcx - first mismatched element index 11067 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); 11068 11069 bind(COMPARE_WIDE_VECTORS); 11070 movdqu(vec1, Address(str1, result, scale)); 11071 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 11072 // After pcmpestri cnt1(rcx) contains mismatched element index 11073 11074 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 11075 addptr(result, stride); 11076 subptr(cnt2, stride); 11077 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); 11078 11079 // compare wide vectors tail 11080 testl(result, result); 11081 jccb(Assembler::zero, LENGTH_DIFF_LABEL); 11082 11083 movl(cnt2, stride); 11084 movl(result, stride); 11085 negptr(result); 11086 movdqu(vec1, Address(str1, result, scale)); 11087 pcmpestri(vec1, Address(str2, result, scale), pcmpmask); 11088 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); 11089 11090 // Mismatched characters in the vectors 11091 bind(VECTOR_NOT_EQUAL); 11092 addptr(result, cnt1); 11093 movptr(cnt2, result); 11094 load_unsigned_short(result, Address(str1, cnt2, scale)); 11095 load_unsigned_short(cnt1, Address(str2, cnt2, scale)); 11096 subl(result, cnt1); 11097 jmpb(POP_LABEL); 11098 11099 bind(COMPARE_TAIL); // limit is zero 11100 movl(cnt2, result); 11101 // Fallthru to tail compare 11102 } 11103 11104 // Shift str2 and str1 to the end of the arrays, negate min 11105 lea(str1, Address(str1, cnt2, scale, 0)); 11106 lea(str2, Address(str2, cnt2, scale, 0)); 11107 negptr(cnt2); 11108 11109 // Compare the rest of the elements 11110 bind(WHILE_HEAD_LABEL); 11111 load_unsigned_short(result, Address(str1, cnt2, scale, 0)); 11112 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); 11113 subl(result, cnt1); 11114 jccb(Assembler::notZero, POP_LABEL); 11115 increment(cnt2); 11116 jccb(Assembler::notZero, WHILE_HEAD_LABEL); 11117 11118 // Strings are equal up to min length. Return the length difference. 11119 bind(LENGTH_DIFF_LABEL); 11120 pop(result); 11121 jmpb(DONE_LABEL); 11122 11123 // Discard the stored length difference 11124 bind(POP_LABEL); 11125 pop(cnt1); 11126 11127 // That's it 11128 bind(DONE_LABEL); 11129 } 11130 11131 // Compare char[] arrays aligned to 4 bytes or substrings. 11132 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, 11133 Register limit, Register result, Register chr, 11134 XMMRegister vec1, XMMRegister vec2) { 11135 ShortBranchVerifier sbv(this); 11136 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; 11137 11138 int length_offset = arrayOopDesc::length_offset_in_bytes(); 11139 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); 11140 11141 // Check the input args 11142 cmpptr(ary1, ary2); 11143 jcc(Assembler::equal, TRUE_LABEL); 11144 11145 if (is_array_equ) { 11146 // Need additional checks for arrays_equals. 11147 testptr(ary1, ary1); 11148 jcc(Assembler::zero, FALSE_LABEL); 11149 testptr(ary2, ary2); 11150 jcc(Assembler::zero, FALSE_LABEL); 11151 11152 // Check the lengths 11153 movl(limit, Address(ary1, length_offset)); 11154 cmpl(limit, Address(ary2, length_offset)); 11155 jcc(Assembler::notEqual, FALSE_LABEL); 11156 } 11157 11158 // count == 0 11159 testl(limit, limit); 11160 jcc(Assembler::zero, TRUE_LABEL); 11161 11162 if (is_array_equ) { 11163 // Load array address 11164 lea(ary1, Address(ary1, base_offset)); 11165 lea(ary2, Address(ary2, base_offset)); 11166 } 11167 11168 shll(limit, 1); // byte count != 0 11169 movl(result, limit); // copy 11170 11171 if (UseSSE42Intrinsics) { 11172 // With SSE4.2, use double quad vector compare 11173 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; 11174 11175 // Compare 16-byte vectors 11176 andl(result, 0x0000000e); // tail count (in bytes) 11177 andl(limit, 0xfffffff0); // vector count (in bytes) 11178 jccb(Assembler::zero, COMPARE_TAIL); 11179 11180 lea(ary1, Address(ary1, limit, Address::times_1)); 11181 lea(ary2, Address(ary2, limit, Address::times_1)); 11182 negptr(limit); 11183 11184 bind(COMPARE_WIDE_VECTORS); 11185 movdqu(vec1, Address(ary1, limit, Address::times_1)); 11186 movdqu(vec2, Address(ary2, limit, Address::times_1)); 11187 pxor(vec1, vec2); 11188 11189 ptest(vec1, vec1); 11190 jccb(Assembler::notZero, FALSE_LABEL); 11191 addptr(limit, 16); 11192 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); 11193 11194 testl(result, result); 11195 jccb(Assembler::zero, TRUE_LABEL); 11196 11197 movdqu(vec1, Address(ary1, result, Address::times_1, -16)); 11198 movdqu(vec2, Address(ary2, result, Address::times_1, -16)); 11199 pxor(vec1, vec2); 11200 11201 ptest(vec1, vec1); 11202 jccb(Assembler::notZero, FALSE_LABEL); 11203 jmpb(TRUE_LABEL); 11204 11205 bind(COMPARE_TAIL); // limit is zero 11206 movl(limit, result); 11207 // Fallthru to tail compare 11208 } 11209 11210 // Compare 4-byte vectors 11211 andl(limit, 0xfffffffc); // vector count (in bytes) 11212 jccb(Assembler::zero, COMPARE_CHAR); 11213 11214 lea(ary1, Address(ary1, limit, Address::times_1)); 11215 lea(ary2, Address(ary2, limit, Address::times_1)); 11216 negptr(limit); 11217 11218 bind(COMPARE_VECTORS); 11219 movl(chr, Address(ary1, limit, Address::times_1)); 11220 cmpl(chr, Address(ary2, limit, Address::times_1)); 11221 jccb(Assembler::notEqual, FALSE_LABEL); 11222 addptr(limit, 4); 11223 jcc(Assembler::notZero, COMPARE_VECTORS); 11224 11225 // Compare trailing char (final 2 bytes), if any 11226 bind(COMPARE_CHAR); 11227 testl(result, 0x2); // tail char 11228 jccb(Assembler::zero, TRUE_LABEL); 11229 load_unsigned_short(chr, Address(ary1, 0)); 11230 load_unsigned_short(limit, Address(ary2, 0)); 11231 cmpl(chr, limit); 11232 jccb(Assembler::notEqual, FALSE_LABEL); 11233 11234 bind(TRUE_LABEL); 11235 movl(result, 1); // return true 11236 jmpb(DONE); 11237 11238 bind(FALSE_LABEL); 11239 xorl(result, result); // return false 11240 11241 // That's it 11242 bind(DONE); 11243 } 11244 11245 void MacroAssembler::generate_fill(BasicType t, bool aligned, 11246 Register to, Register value, Register count, 11247 Register rtmp, XMMRegister xtmp) { 11248 ShortBranchVerifier sbv(this); 11249 assert_different_registers(to, value, count, rtmp); 11250 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; 11251 Label L_fill_2_bytes, L_fill_4_bytes; 11252 11253 int shift = -1; 11254 switch (t) { 11255 case T_BYTE: 11256 shift = 2; 11257 break; 11258 case T_SHORT: 11259 shift = 1; 11260 break; 11261 case T_INT: 11262 shift = 0; 11263 break; 11264 default: ShouldNotReachHere(); 11265 } 11266 11267 if (t == T_BYTE) { 11268 andl(value, 0xff); 11269 movl(rtmp, value); 11270 shll(rtmp, 8); 11271 orl(value, rtmp); 11272 } 11273 if (t == T_SHORT) { 11274 andl(value, 0xffff); 11275 } 11276 if (t == T_BYTE || t == T_SHORT) { 11277 movl(rtmp, value); 11278 shll(rtmp, 16); 11279 orl(value, rtmp); 11280 } 11281 11282 cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element 11283 jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp 11284 if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) { 11285 // align source address at 4 bytes address boundary 11286 if (t == T_BYTE) { 11287 // One byte misalignment happens only for byte arrays 11288 testptr(to, 1); 11289 jccb(Assembler::zero, L_skip_align1); 11290 movb(Address(to, 0), value); 11291 increment(to); 11292 decrement(count); 11293 BIND(L_skip_align1); 11294 } 11295 // Two bytes misalignment happens only for byte and short (char) arrays 11296 testptr(to, 2); 11297 jccb(Assembler::zero, L_skip_align2); 11298 movw(Address(to, 0), value); 11299 addptr(to, 2); 11300 subl(count, 1<<(shift-1)); 11301 BIND(L_skip_align2); 11302 } 11303 if (UseSSE < 2) { 11304 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 11305 // Fill 32-byte chunks 11306 subl(count, 8 << shift); 11307 jcc(Assembler::less, L_check_fill_8_bytes); 11308 align(16); 11309 11310 BIND(L_fill_32_bytes_loop); 11311 11312 for (int i = 0; i < 32; i += 4) { 11313 movl(Address(to, i), value); 11314 } 11315 11316 addptr(to, 32); 11317 subl(count, 8 << shift); 11318 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 11319 BIND(L_check_fill_8_bytes); 11320 addl(count, 8 << shift); 11321 jccb(Assembler::zero, L_exit); 11322 jmpb(L_fill_8_bytes); 11323 11324 // 11325 // length is too short, just fill qwords 11326 // 11327 BIND(L_fill_8_bytes_loop); 11328 movl(Address(to, 0), value); 11329 movl(Address(to, 4), value); 11330 addptr(to, 8); 11331 BIND(L_fill_8_bytes); 11332 subl(count, 1 << (shift + 1)); 11333 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 11334 // fall through to fill 4 bytes 11335 } else { 11336 Label L_fill_32_bytes; 11337 if (!UseUnalignedLoadStores) { 11338 // align to 8 bytes, we know we are 4 byte aligned to start 11339 testptr(to, 4); 11340 jccb(Assembler::zero, L_fill_32_bytes); 11341 movl(Address(to, 0), value); 11342 addptr(to, 4); 11343 subl(count, 1<<shift); 11344 } 11345 BIND(L_fill_32_bytes); 11346 { 11347 assert( UseSSE >= 2, "supported cpu only" ); 11348 Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; 11349 // Fill 32-byte chunks 11350 movdl(xtmp, value); 11351 pshufd(xtmp, xtmp, 0); 11352 11353 subl(count, 8 << shift); 11354 jcc(Assembler::less, L_check_fill_8_bytes); 11355 align(16); 11356 11357 BIND(L_fill_32_bytes_loop); 11358 11359 if (UseUnalignedLoadStores) { 11360 movdqu(Address(to, 0), xtmp); 11361 movdqu(Address(to, 16), xtmp); 11362 } else { 11363 movq(Address(to, 0), xtmp); 11364 movq(Address(to, 8), xtmp); 11365 movq(Address(to, 16), xtmp); 11366 movq(Address(to, 24), xtmp); 11367 } 11368 11369 addptr(to, 32); 11370 subl(count, 8 << shift); 11371 jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); 11372 BIND(L_check_fill_8_bytes); 11373 addl(count, 8 << shift); 11374 jccb(Assembler::zero, L_exit); 11375 jmpb(L_fill_8_bytes); 11376 11377 // 11378 // length is too short, just fill qwords 11379 // 11380 BIND(L_fill_8_bytes_loop); 11381 movq(Address(to, 0), xtmp); 11382 addptr(to, 8); 11383 BIND(L_fill_8_bytes); 11384 subl(count, 1 << (shift + 1)); 11385 jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); 11386 } 11387 } 11388 // fill trailing 4 bytes 11389 BIND(L_fill_4_bytes); 11390 testl(count, 1<<shift); 11391 jccb(Assembler::zero, L_fill_2_bytes); 11392 movl(Address(to, 0), value); 11393 if (t == T_BYTE || t == T_SHORT) { 11394 addptr(to, 4); 11395 BIND(L_fill_2_bytes); 11396 // fill trailing 2 bytes 11397 testl(count, 1<<(shift-1)); 11398 jccb(Assembler::zero, L_fill_byte); 11399 movw(Address(to, 0), value); 11400 if (t == T_BYTE) { 11401 addptr(to, 2); 11402 BIND(L_fill_byte); 11403 // fill trailing byte 11404 testl(count, 1); 11405 jccb(Assembler::zero, L_exit); 11406 movb(Address(to, 0), value); 11407 } else { 11408 BIND(L_fill_byte); 11409 } 11410 } else { 11411 BIND(L_fill_2_bytes); 11412 } 11413 BIND(L_exit); 11414 } 11415 #undef BIND 11416 #undef BLOCK_COMMENT 11417 11418 11419 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { 11420 switch (cond) { 11421 // Note some conditions are synonyms for others 11422 case Assembler::zero: return Assembler::notZero; 11423 case Assembler::notZero: return Assembler::zero; 11424 case Assembler::less: return Assembler::greaterEqual; 11425 case Assembler::lessEqual: return Assembler::greater; 11426 case Assembler::greater: return Assembler::lessEqual; 11427 case Assembler::greaterEqual: return Assembler::less; 11428 case Assembler::below: return Assembler::aboveEqual; 11429 case Assembler::belowEqual: return Assembler::above; 11430 case Assembler::above: return Assembler::belowEqual; 11431 case Assembler::aboveEqual: return Assembler::below; 11432 case Assembler::overflow: return Assembler::noOverflow; 11433 case Assembler::noOverflow: return Assembler::overflow; 11434 case Assembler::negative: return Assembler::positive; 11435 case Assembler::positive: return Assembler::negative; 11436 case Assembler::parity: return Assembler::noParity; 11437 case Assembler::noParity: return Assembler::parity; 11438 } 11439 ShouldNotReachHere(); return Assembler::overflow; 11440 } 11441 11442 SkipIfEqual::SkipIfEqual( 11443 MacroAssembler* masm, const bool* flag_addr, bool value) { 11444 _masm = masm; 11445 _masm->cmp8(ExternalAddress((address)flag_addr), value); 11446 _masm->jcc(Assembler::equal, _label); 11447 } 11448 11449 SkipIfEqual::~SkipIfEqual() { 11450 _masm->bind(_label); 11451 }