219 emit_byte(op2 | encode(dst)); 220 emit_byte(imm8); 221 } 222 223 224 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 225 assert(isByte(op1) && isByte(op2), "wrong opcode"); 226 assert((op1 & 0x01) == 1, "should be 32bit operation"); 227 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 228 if (is8bit(imm32)) { 229 emit_byte(op1 | 0x02); // set sign bit 230 emit_byte(op2 | encode(dst)); 231 emit_byte(imm32 & 0xFF); 232 } else { 233 emit_byte(op1); 234 emit_byte(op2 | encode(dst)); 235 emit_long(imm32); 236 } 237 } 238 239 // immediate-to-memory forms 240 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 241 assert((op1 & 0x01) == 1, "should be 32bit operation"); 242 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 243 if (is8bit(imm32)) { 244 emit_byte(op1 | 0x02); // set sign bit 245 emit_operand(rm, adr, 1); 246 emit_byte(imm32 & 0xFF); 247 } else { 248 emit_byte(op1); 249 emit_operand(rm, adr, 4); 250 emit_long(imm32); 251 } 252 } 253 254 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) { 255 LP64_ONLY(ShouldNotReachHere()); 256 assert(isByte(op1) && isByte(op2), "wrong opcode"); 257 assert((op1 & 0x01) == 1, "should be 32bit operation"); 258 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 922 } 923 924 void Assembler::addl(Register dst, int32_t imm32) { 925 prefix(dst); 926 emit_arith(0x81, 0xC0, dst, imm32); 927 } 928 929 void Assembler::addl(Register dst, Address src) { 930 InstructionMark im(this); 931 prefix(src, dst); 932 emit_byte(0x03); 933 emit_operand(dst, src); 934 } 935 936 void Assembler::addl(Register dst, Register src) { 937 (void) prefix_and_encode(dst->encoding(), src->encoding()); 938 emit_arith(0x03, 0xC0, dst, src); 939 } 940 941 void Assembler::addr_nop_4() { 942 // 4 bytes: NOP DWORD PTR [EAX+0] 943 emit_byte(0x0F); 944 emit_byte(0x1F); 945 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 946 emit_byte(0); // 8-bits offset (1 byte) 947 } 948 949 void Assembler::addr_nop_5() { 950 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 951 emit_byte(0x0F); 952 emit_byte(0x1F); 953 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 954 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 955 emit_byte(0); // 8-bits offset (1 byte) 956 } 957 958 void Assembler::addr_nop_7() { 959 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 960 emit_byte(0x0F); 961 emit_byte(0x1F); 962 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 963 emit_long(0); // 32-bits offset (4 bytes) 964 } 965 966 void Assembler::addr_nop_8() { 967 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 968 emit_byte(0x0F); 969 emit_byte(0x1F); 970 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 971 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 972 emit_long(0); // 32-bits offset (4 bytes) 973 } 974 975 void Assembler::addsd(XMMRegister dst, XMMRegister src) { 976 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 977 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 978 emit_byte(0x58); 979 emit_byte(0xC0 | encode); 980 } 981 982 void Assembler::addsd(XMMRegister dst, Address src) { 983 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 984 InstructionMark im(this); 985 simd_prefix(dst, dst, src, VEX_SIMD_F2); 986 emit_byte(0x58); 2752 } 2753 2754 void Assembler::subl(Address dst, int32_t imm32) { 2755 InstructionMark im(this); 2756 prefix(dst); 2757 emit_arith_operand(0x81, rbp, dst, imm32); 2758 } 2759 2760 void Assembler::subl(Address dst, Register src) { 2761 InstructionMark im(this); 2762 prefix(dst, src); 2763 emit_byte(0x29); 2764 emit_operand(src, dst); 2765 } 2766 2767 void Assembler::subl(Register dst, int32_t imm32) { 2768 prefix(dst); 2769 emit_arith(0x81, 0xE8, dst, imm32); 2770 } 2771 2772 void Assembler::subl(Register dst, Address src) { 2773 InstructionMark im(this); 2774 prefix(src, dst); 2775 emit_byte(0x2B); 2776 emit_operand(dst, src); 2777 } 2778 2779 void Assembler::subl(Register dst, Register src) { 2780 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2781 emit_arith(0x2B, 0xC0, dst, src); 2782 } 2783 2784 void Assembler::subsd(XMMRegister dst, XMMRegister src) { 2785 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2786 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 2787 emit_byte(0x5C); 2788 emit_byte(0xC0 | encode); 2789 } 2790 2791 void Assembler::subsd(XMMRegister dst, Address src) { 4743 } 4744 4745 void Assembler::subq(Address dst, int32_t imm32) { 4746 InstructionMark im(this); 4747 prefixq(dst); 4748 emit_arith_operand(0x81, rbp, dst, imm32); 4749 } 4750 4751 void Assembler::subq(Address dst, Register src) { 4752 InstructionMark im(this); 4753 prefixq(dst, src); 4754 emit_byte(0x29); 4755 emit_operand(src, dst); 4756 } 4757 4758 void Assembler::subq(Register dst, int32_t imm32) { 4759 (void) prefixq_and_encode(dst->encoding()); 4760 emit_arith(0x81, 0xE8, dst, imm32); 4761 } 4762 4763 void Assembler::subq(Register dst, Address src) { 4764 InstructionMark im(this); 4765 prefixq(src, dst); 4766 emit_byte(0x2B); 4767 emit_operand(dst, src); 4768 } 4769 4770 void Assembler::subq(Register dst, Register src) { 4771 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4772 emit_arith(0x2B, 0xC0, dst, src); 4773 } 4774 4775 void Assembler::testq(Register dst, int32_t imm32) { 4776 // not using emit_arith because test 4777 // doesn't support sign-extension of 4778 // 8bit operands 4779 int encode = dst->encoding(); 4780 if (encode == 0) { 4781 prefix(REX_W); 4782 emit_byte(0xA9); 5084 } 5085 5086 void MacroAssembler::cmpoop(Address src1, jobject obj) { 5087 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5088 } 5089 5090 void MacroAssembler::cmpoop(Register src1, jobject obj) { 5091 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5092 } 5093 5094 void MacroAssembler::extend_sign(Register hi, Register lo) { 5095 // According to Intel Doc. AP-526, "Integer Divide", p.18. 5096 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 5097 cdql(); 5098 } else { 5099 movl(hi, lo); 5100 sarl(hi, 31); 5101 } 5102 } 5103 5104 void MacroAssembler::fat_nop() { 5105 // A 5 byte nop that is safe for patching (see patch_verified_entry) 5106 emit_byte(0x26); // es: 5107 emit_byte(0x2e); // cs: 5108 emit_byte(0x64); // fs: 5109 emit_byte(0x65); // gs: 5110 emit_byte(0x90); 5111 } 5112 5113 void MacroAssembler::jC2(Register tmp, Label& L) { 5114 // set parity bit if FPU flag C2 is set (via rax) 5115 save_rax(tmp); 5116 fwait(); fnstsw_ax(); 5117 sahf(); 5118 restore_rax(tmp); 5119 // branch 5120 jcc(Assembler::parity, L); 5121 } 5122 5123 void MacroAssembler::jnC2(Register tmp, Label& L) { 5124 // set parity bit if FPU flag C2 is set (via rax) 5125 save_rax(tmp); 5126 fwait(); fnstsw_ax(); 5127 sahf(); 5128 restore_rax(tmp); 5129 // branch 5130 jcc(Assembler::noParity, L); 5131 } 5132 5687 5688 return idivq_offset; 5689 } 5690 5691 void MacroAssembler::decrementq(Register reg, int value) { 5692 if (value == min_jint) { subq(reg, value); return; } 5693 if (value < 0) { incrementq(reg, -value); return; } 5694 if (value == 0) { ; return; } 5695 if (value == 1 && UseIncDec) { decq(reg) ; return; } 5696 /* else */ { subq(reg, value) ; return; } 5697 } 5698 5699 void MacroAssembler::decrementq(Address dst, int value) { 5700 if (value == min_jint) { subq(dst, value); return; } 5701 if (value < 0) { incrementq(dst, -value); return; } 5702 if (value == 0) { ; return; } 5703 if (value == 1 && UseIncDec) { decq(dst) ; return; } 5704 /* else */ { subq(dst, value) ; return; } 5705 } 5706 5707 void MacroAssembler::fat_nop() { 5708 // A 5 byte nop that is safe for patching (see patch_verified_entry) 5709 // Recommened sequence from 'Software Optimization Guide for the AMD 5710 // Hammer Processor' 5711 emit_byte(0x66); 5712 emit_byte(0x66); 5713 emit_byte(0x90); 5714 emit_byte(0x66); 5715 emit_byte(0x90); 5716 } 5717 5718 void MacroAssembler::incrementq(Register reg, int value) { 5719 if (value == min_jint) { addq(reg, value); return; } 5720 if (value < 0) { decrementq(reg, -value); return; } 5721 if (value == 0) { ; return; } 5722 if (value == 1 && UseIncDec) { incq(reg) ; return; } 5723 /* else */ { addq(reg, value) ; return; } 5724 } 5725 5726 void MacroAssembler::incrementq(Address dst, int value) { 5727 if (value == min_jint) { addq(dst, value); return; } 5728 if (value < 0) { decrementq(dst, -value); return; } 5729 if (value == 0) { ; return; } 5730 if (value == 1 && UseIncDec) { incq(dst) ; return; } 5731 /* else */ { addq(dst, value) ; return; } 5732 } 5733 5734 // 32bit can do a case table jump in one instruction but we no longer allow the base 5735 // to be installed in the Address class 5736 void MacroAssembler::jump(ArrayAddress entry) { 5737 lea(rscratch1, entry.base()); 6749 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 6750 } 6751 // if end < obj then we wrapped around => object too long => slow case 6752 cmpptr(end, obj); 6753 jcc(Assembler::below, slow_case); 6754 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 6755 jcc(Assembler::above, slow_case); 6756 // Compare obj with the top addr, and if still equal, store the new top addr in 6757 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 6758 // it otherwise. Use lock prefix for atomicity on MPs. 6759 locked_cmpxchgptr(end, heap_top); 6760 jcc(Assembler::notEqual, retry); 6761 } 6762 } 6763 6764 void MacroAssembler::enter() { 6765 push(rbp); 6766 mov(rbp, rsp); 6767 } 6768 6769 void MacroAssembler::fcmp(Register tmp) { 6770 fcmp(tmp, 1, true, true); 6771 } 6772 6773 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 6774 assert(!pop_right || pop_left, "usage error"); 6775 if (VM_Version::supports_cmov()) { 6776 assert(tmp == noreg, "unneeded temp"); 6777 if (pop_left) { 6778 fucomip(index); 6779 } else { 6780 fucomi(index); 6781 } 6782 if (pop_right) { 6783 fpop(); 6784 } 6785 } else { 6786 assert(tmp != noreg, "need temp"); 6787 if (pop_left) { 6788 if (pop_right) { 7808 } else { 7809 // By doing it as an ExternalAddress disp could be converted to a rip-relative 7810 // displacement and done in a single instruction given favorable mapping and 7811 // a smarter version of as_Address. Worst case it is two instructions which 7812 // is no worse off then loading disp into a register and doing as a simple 7813 // Address() as above. 7814 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 7815 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 7816 // in some cases we'll get a single instruction version. 7817 7818 ExternalAddress cardtable((address)disp); 7819 Address index(noreg, obj, Address::times_1); 7820 movb(as_Address(ArrayAddress(cardtable, index)), 0); 7821 } 7822 } 7823 7824 void MacroAssembler::subptr(Register dst, int32_t imm32) { 7825 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 7826 } 7827 7828 void MacroAssembler::subptr(Register dst, Register src) { 7829 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 7830 } 7831 7832 // C++ bool manipulation 7833 void MacroAssembler::testbool(Register dst) { 7834 if(sizeof(bool) == 1) 7835 testb(dst, 0xff); 7836 else if(sizeof(bool) == 2) { 7837 // testw implementation needed for two byte bools 7838 ShouldNotReachHere(); 7839 } else if(sizeof(bool) == 4) 7840 testl(dst, dst); 7841 else 7842 // unsupported 7843 ShouldNotReachHere(); 7844 } 7845 7846 void MacroAssembler::testptr(Register dst, Register src) { 7847 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 9275 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9276 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 9277 } 9278 9279 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 9280 assert (UseCompressedOops, "should only be used for compressed headers"); 9281 assert (Universe::heap() != NULL, "java heap should be initialized"); 9282 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 9283 int oop_index = oop_recorder()->find_index(obj); 9284 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9285 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 9286 } 9287 9288 void MacroAssembler::reinit_heapbase() { 9289 if (UseCompressedOops) { 9290 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 9291 } 9292 } 9293 #endif // _LP64 9294 9295 // IndexOf for constant substrings with size >= 8 chars 9296 // which don't need to be loaded through stack. 9297 void MacroAssembler::string_indexofC8(Register str1, Register str2, 9298 Register cnt1, Register cnt2, 9299 int int_cnt2, Register result, 9300 XMMRegister vec, Register tmp) { 9301 ShortBranchVerifier sbv(this); 9302 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 9303 9304 // This method uses pcmpestri inxtruction with bound registers 9305 // inputs: 9306 // xmm - substring 9307 // rax - substring length (elements count) 9308 // mem - scanned string 9309 // rdx - string length (elements count) 9310 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 9311 // outputs: 9312 // rcx - matched index in string 9313 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 9314 | 219 emit_byte(op2 | encode(dst)); 220 emit_byte(imm8); 221 } 222 223 224 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) { 225 assert(isByte(op1) && isByte(op2), "wrong opcode"); 226 assert((op1 & 0x01) == 1, "should be 32bit operation"); 227 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 228 if (is8bit(imm32)) { 229 emit_byte(op1 | 0x02); // set sign bit 230 emit_byte(op2 | encode(dst)); 231 emit_byte(imm32 & 0xFF); 232 } else { 233 emit_byte(op1); 234 emit_byte(op2 | encode(dst)); 235 emit_long(imm32); 236 } 237 } 238 239 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) { 240 assert(isByte(op1) && isByte(op2), "wrong opcode"); 241 assert((op1 & 0x01) == 1, "should be 32bit operation"); 242 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 243 emit_byte(op1); 244 emit_byte(op2 | encode(dst)); 245 emit_long(imm32); 246 } 247 248 // immediate-to-memory forms 249 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 250 assert((op1 & 0x01) == 1, "should be 32bit operation"); 251 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 252 if (is8bit(imm32)) { 253 emit_byte(op1 | 0x02); // set sign bit 254 emit_operand(rm, adr, 1); 255 emit_byte(imm32 & 0xFF); 256 } else { 257 emit_byte(op1); 258 emit_operand(rm, adr, 4); 259 emit_long(imm32); 260 } 261 } 262 263 void Assembler::emit_arith(int op1, int op2, Register dst, jobject obj) { 264 LP64_ONLY(ShouldNotReachHere()); 265 assert(isByte(op1) && isByte(op2), "wrong opcode"); 266 assert((op1 & 0x01) == 1, "should be 32bit operation"); 267 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); 931 } 932 933 void Assembler::addl(Register dst, int32_t imm32) { 934 prefix(dst); 935 emit_arith(0x81, 0xC0, dst, imm32); 936 } 937 938 void Assembler::addl(Register dst, Address src) { 939 InstructionMark im(this); 940 prefix(src, dst); 941 emit_byte(0x03); 942 emit_operand(dst, src); 943 } 944 945 void Assembler::addl(Register dst, Register src) { 946 (void) prefix_and_encode(dst->encoding(), src->encoding()); 947 emit_arith(0x03, 0xC0, dst, src); 948 } 949 950 void Assembler::addr_nop_4() { 951 assert(UseAddressNop, "no CPU support"); 952 // 4 bytes: NOP DWORD PTR [EAX+0] 953 emit_byte(0x0F); 954 emit_byte(0x1F); 955 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 956 emit_byte(0); // 8-bits offset (1 byte) 957 } 958 959 void Assembler::addr_nop_5() { 960 assert(UseAddressNop, "no CPU support"); 961 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 962 emit_byte(0x0F); 963 emit_byte(0x1F); 964 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 965 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 966 emit_byte(0); // 8-bits offset (1 byte) 967 } 968 969 void Assembler::addr_nop_7() { 970 assert(UseAddressNop, "no CPU support"); 971 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 972 emit_byte(0x0F); 973 emit_byte(0x1F); 974 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 975 emit_long(0); // 32-bits offset (4 bytes) 976 } 977 978 void Assembler::addr_nop_8() { 979 assert(UseAddressNop, "no CPU support"); 980 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 981 emit_byte(0x0F); 982 emit_byte(0x1F); 983 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 984 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 985 emit_long(0); // 32-bits offset (4 bytes) 986 } 987 988 void Assembler::addsd(XMMRegister dst, XMMRegister src) { 989 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 990 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 991 emit_byte(0x58); 992 emit_byte(0xC0 | encode); 993 } 994 995 void Assembler::addsd(XMMRegister dst, Address src) { 996 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 997 InstructionMark im(this); 998 simd_prefix(dst, dst, src, VEX_SIMD_F2); 999 emit_byte(0x58); 2765 } 2766 2767 void Assembler::subl(Address dst, int32_t imm32) { 2768 InstructionMark im(this); 2769 prefix(dst); 2770 emit_arith_operand(0x81, rbp, dst, imm32); 2771 } 2772 2773 void Assembler::subl(Address dst, Register src) { 2774 InstructionMark im(this); 2775 prefix(dst, src); 2776 emit_byte(0x29); 2777 emit_operand(src, dst); 2778 } 2779 2780 void Assembler::subl(Register dst, int32_t imm32) { 2781 prefix(dst); 2782 emit_arith(0x81, 0xE8, dst, imm32); 2783 } 2784 2785 void Assembler::subl_imm32(Register dst, int32_t imm32) { 2786 prefix(dst); 2787 emit_arith_imm32(0x81, 0xE8, dst, imm32); 2788 } 2789 2790 void Assembler::subl(Register dst, Address src) { 2791 InstructionMark im(this); 2792 prefix(src, dst); 2793 emit_byte(0x2B); 2794 emit_operand(dst, src); 2795 } 2796 2797 void Assembler::subl(Register dst, Register src) { 2798 (void) prefix_and_encode(dst->encoding(), src->encoding()); 2799 emit_arith(0x2B, 0xC0, dst, src); 2800 } 2801 2802 void Assembler::subsd(XMMRegister dst, XMMRegister src) { 2803 NOT_LP64(assert(VM_Version::supports_sse2(), "")); 2804 int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); 2805 emit_byte(0x5C); 2806 emit_byte(0xC0 | encode); 2807 } 2808 2809 void Assembler::subsd(XMMRegister dst, Address src) { 4761 } 4762 4763 void Assembler::subq(Address dst, int32_t imm32) { 4764 InstructionMark im(this); 4765 prefixq(dst); 4766 emit_arith_operand(0x81, rbp, dst, imm32); 4767 } 4768 4769 void Assembler::subq(Address dst, Register src) { 4770 InstructionMark im(this); 4771 prefixq(dst, src); 4772 emit_byte(0x29); 4773 emit_operand(src, dst); 4774 } 4775 4776 void Assembler::subq(Register dst, int32_t imm32) { 4777 (void) prefixq_and_encode(dst->encoding()); 4778 emit_arith(0x81, 0xE8, dst, imm32); 4779 } 4780 4781 void Assembler::subq_imm32(Register dst, int32_t imm32) { 4782 (void) prefixq_and_encode(dst->encoding()); 4783 emit_arith_imm32(0x81, 0xE8, dst, imm32); 4784 } 4785 4786 void Assembler::subq(Register dst, Address src) { 4787 InstructionMark im(this); 4788 prefixq(src, dst); 4789 emit_byte(0x2B); 4790 emit_operand(dst, src); 4791 } 4792 4793 void Assembler::subq(Register dst, Register src) { 4794 (void) prefixq_and_encode(dst->encoding(), src->encoding()); 4795 emit_arith(0x2B, 0xC0, dst, src); 4796 } 4797 4798 void Assembler::testq(Register dst, int32_t imm32) { 4799 // not using emit_arith because test 4800 // doesn't support sign-extension of 4801 // 8bit operands 4802 int encode = dst->encoding(); 4803 if (encode == 0) { 4804 prefix(REX_W); 4805 emit_byte(0xA9); 5107 } 5108 5109 void MacroAssembler::cmpoop(Address src1, jobject obj) { 5110 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5111 } 5112 5113 void MacroAssembler::cmpoop(Register src1, jobject obj) { 5114 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); 5115 } 5116 5117 void MacroAssembler::extend_sign(Register hi, Register lo) { 5118 // According to Intel Doc. AP-526, "Integer Divide", p.18. 5119 if (VM_Version::is_P6() && hi == rdx && lo == rax) { 5120 cdql(); 5121 } else { 5122 movl(hi, lo); 5123 sarl(hi, 31); 5124 } 5125 } 5126 5127 void MacroAssembler::jC2(Register tmp, Label& L) { 5128 // set parity bit if FPU flag C2 is set (via rax) 5129 save_rax(tmp); 5130 fwait(); fnstsw_ax(); 5131 sahf(); 5132 restore_rax(tmp); 5133 // branch 5134 jcc(Assembler::parity, L); 5135 } 5136 5137 void MacroAssembler::jnC2(Register tmp, Label& L) { 5138 // set parity bit if FPU flag C2 is set (via rax) 5139 save_rax(tmp); 5140 fwait(); fnstsw_ax(); 5141 sahf(); 5142 restore_rax(tmp); 5143 // branch 5144 jcc(Assembler::noParity, L); 5145 } 5146 5701 5702 return idivq_offset; 5703 } 5704 5705 void MacroAssembler::decrementq(Register reg, int value) { 5706 if (value == min_jint) { subq(reg, value); return; } 5707 if (value < 0) { incrementq(reg, -value); return; } 5708 if (value == 0) { ; return; } 5709 if (value == 1 && UseIncDec) { decq(reg) ; return; } 5710 /* else */ { subq(reg, value) ; return; } 5711 } 5712 5713 void MacroAssembler::decrementq(Address dst, int value) { 5714 if (value == min_jint) { subq(dst, value); return; } 5715 if (value < 0) { incrementq(dst, -value); return; } 5716 if (value == 0) { ; return; } 5717 if (value == 1 && UseIncDec) { decq(dst) ; return; } 5718 /* else */ { subq(dst, value) ; return; } 5719 } 5720 5721 void MacroAssembler::incrementq(Register reg, int value) { 5722 if (value == min_jint) { addq(reg, value); return; } 5723 if (value < 0) { decrementq(reg, -value); return; } 5724 if (value == 0) { ; return; } 5725 if (value == 1 && UseIncDec) { incq(reg) ; return; } 5726 /* else */ { addq(reg, value) ; return; } 5727 } 5728 5729 void MacroAssembler::incrementq(Address dst, int value) { 5730 if (value == min_jint) { addq(dst, value); return; } 5731 if (value < 0) { decrementq(dst, -value); return; } 5732 if (value == 0) { ; return; } 5733 if (value == 1 && UseIncDec) { incq(dst) ; return; } 5734 /* else */ { addq(dst, value) ; return; } 5735 } 5736 5737 // 32bit can do a case table jump in one instruction but we no longer allow the base 5738 // to be installed in the Address class 5739 void MacroAssembler::jump(ArrayAddress entry) { 5740 lea(rscratch1, entry.base()); 6752 lea(end, Address(obj, var_size_in_bytes, Address::times_1)); 6753 } 6754 // if end < obj then we wrapped around => object too long => slow case 6755 cmpptr(end, obj); 6756 jcc(Assembler::below, slow_case); 6757 cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); 6758 jcc(Assembler::above, slow_case); 6759 // Compare obj with the top addr, and if still equal, store the new top addr in 6760 // end at the address of the top addr pointer. Sets ZF if was equal, and clears 6761 // it otherwise. Use lock prefix for atomicity on MPs. 6762 locked_cmpxchgptr(end, heap_top); 6763 jcc(Assembler::notEqual, retry); 6764 } 6765 } 6766 6767 void MacroAssembler::enter() { 6768 push(rbp); 6769 mov(rbp, rsp); 6770 } 6771 6772 // A 5 byte nop that is safe for patching (see patch_verified_entry) 6773 void MacroAssembler::fat_nop() { 6774 if (UseAddressNop) { 6775 addr_nop_5(); 6776 } else { 6777 emit_byte(0x26); // es: 6778 emit_byte(0x2e); // cs: 6779 emit_byte(0x64); // fs: 6780 emit_byte(0x65); // gs: 6781 emit_byte(0x90); 6782 } 6783 } 6784 6785 void MacroAssembler::fcmp(Register tmp) { 6786 fcmp(tmp, 1, true, true); 6787 } 6788 6789 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 6790 assert(!pop_right || pop_left, "usage error"); 6791 if (VM_Version::supports_cmov()) { 6792 assert(tmp == noreg, "unneeded temp"); 6793 if (pop_left) { 6794 fucomip(index); 6795 } else { 6796 fucomi(index); 6797 } 6798 if (pop_right) { 6799 fpop(); 6800 } 6801 } else { 6802 assert(tmp != noreg, "need temp"); 6803 if (pop_left) { 6804 if (pop_right) { 7824 } else { 7825 // By doing it as an ExternalAddress disp could be converted to a rip-relative 7826 // displacement and done in a single instruction given favorable mapping and 7827 // a smarter version of as_Address. Worst case it is two instructions which 7828 // is no worse off then loading disp into a register and doing as a simple 7829 // Address() as above. 7830 // We can't do as ExternalAddress as the only style since if disp == 0 we'll 7831 // assert since NULL isn't acceptable in a reloci (see 6644928). In any case 7832 // in some cases we'll get a single instruction version. 7833 7834 ExternalAddress cardtable((address)disp); 7835 Address index(noreg, obj, Address::times_1); 7836 movb(as_Address(ArrayAddress(cardtable, index)), 0); 7837 } 7838 } 7839 7840 void MacroAssembler::subptr(Register dst, int32_t imm32) { 7841 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 7842 } 7843 7844 void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { 7845 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); 7846 } 7847 7848 void MacroAssembler::subptr(Register dst, Register src) { 7849 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 7850 } 7851 7852 // C++ bool manipulation 7853 void MacroAssembler::testbool(Register dst) { 7854 if(sizeof(bool) == 1) 7855 testb(dst, 0xff); 7856 else if(sizeof(bool) == 2) { 7857 // testw implementation needed for two byte bools 7858 ShouldNotReachHere(); 7859 } else if(sizeof(bool) == 4) 7860 testl(dst, dst); 7861 else 7862 // unsupported 7863 ShouldNotReachHere(); 7864 } 7865 7866 void MacroAssembler::testptr(Register dst, Register src) { 7867 LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); 9295 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9296 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 9297 } 9298 9299 void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { 9300 assert (UseCompressedOops, "should only be used for compressed headers"); 9301 assert (Universe::heap() != NULL, "java heap should be initialized"); 9302 assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); 9303 int oop_index = oop_recorder()->find_index(obj); 9304 RelocationHolder rspec = oop_Relocation::spec(oop_index); 9305 Assembler::cmp_narrow_oop(dst, oop_index, rspec); 9306 } 9307 9308 void MacroAssembler::reinit_heapbase() { 9309 if (UseCompressedOops) { 9310 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 9311 } 9312 } 9313 #endif // _LP64 9314 9315 9316 // C2 compiled method's prolog code. 9317 void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { 9318 9319 // WARNING: Initial instruction MUST be 5 bytes or longer so that 9320 // NativeJump::patch_verified_entry will be able to patch out the entry 9321 // code safely. The push to verify stack depth is ok at 5 bytes, 9322 // the frame allocation can be either 3 or 6 bytes. So if we don't do 9323 // stack bang then we must use the 6 byte frame allocation even if 9324 // we have no frame. :-( 9325 9326 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 9327 // Remove word for return addr 9328 framesize -= wordSize; 9329 9330 // Calls to C2R adapters often do not accept exceptional returns. 9331 // We require that their callers must bang for them. But be careful, because 9332 // some VM calls (such as call site linkage) can use several kilobytes of 9333 // stack. But the stack safety zone should account for that. 9334 // See bugs 4446381, 4468289, 4497237. 9335 if (stack_bang) { 9336 generate_stack_overflow_check(framesize); 9337 9338 // We always push rbp, so that on return to interpreter rbp, will be 9339 // restored correctly and we can correct the stack. 9340 push(rbp); 9341 // Remove word for ebp 9342 framesize -= wordSize; 9343 9344 // Create frame 9345 if (framesize) { 9346 subptr(rsp, framesize); 9347 } 9348 } else { 9349 // Create frame 9350 subptr_imm32(rsp, framesize); 9351 9352 // Save RBP register now. 9353 framesize -= wordSize; 9354 movptr(Address(rsp, framesize), rbp); 9355 } 9356 9357 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth 9358 framesize -= wordSize; 9359 movptr(Address(rsp, framesize), (int32_t)0xbadb100d); 9360 } 9361 9362 #ifndef _LP64 9363 // If method sets FPU control word do it now 9364 if (fp_mode_24b) { 9365 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 9366 } 9367 if (UseSSE >= 2 && VerifyFPU) { 9368 verify_FPU(0, "FPU stack must be clean on entry"); 9369 } 9370 #endif 9371 9372 #ifdef ASSERT 9373 if (VerifyStackAtCalls) { 9374 Label L; 9375 push(rax); 9376 mov(rax, rsp); 9377 andptr(rax, StackAlignmentInBytes-1); 9378 cmpptr(rax, StackAlignmentInBytes-wordSize); 9379 pop(rax); 9380 jcc(Assembler::equal, L); 9381 stop("Stack is not properly aligned!"); 9382 bind(L); 9383 } 9384 #endif 9385 9386 } 9387 9388 9389 // IndexOf for constant substrings with size >= 8 chars 9390 // which don't need to be loaded through stack. 9391 void MacroAssembler::string_indexofC8(Register str1, Register str2, 9392 Register cnt1, Register cnt2, 9393 int int_cnt2, Register result, 9394 XMMRegister vec, Register tmp) { 9395 ShortBranchVerifier sbv(this); 9396 assert(UseSSE42Intrinsics, "SSE4.2 is required"); 9397 9398 // This method uses pcmpestri inxtruction with bound registers 9399 // inputs: 9400 // xmm - substring 9401 // rax - substring length (elements count) 9402 // mem - scanned string 9403 // rdx - string length (elements count) 9404 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) 9405 // outputs: 9406 // rcx - matched index in string 9407 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); 9408 |