95 // 32bit versions
96
97 Address MacroAssembler::as_Address(AddressLiteral adr) {
98 return Address(adr.target(), adr.rspec());
99 }
100
101 Address MacroAssembler::as_Address(ArrayAddress adr) {
102 return Address::make_array(adr);
103 }
104
105 void MacroAssembler::call_VM_leaf_base(address entry_point,
106 int number_of_arguments) {
107 call(RuntimeAddress(entry_point));
108 increment(rsp, number_of_arguments * wordSize);
109 }
110
111 void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
112 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
113 }
114
115 void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
116 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
117 }
118
119 void MacroAssembler::cmpoop_raw(Address src1, jobject obj) {
120 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
121 }
122
123 void MacroAssembler::cmpoop_raw(Register src1, jobject obj) {
124 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
125 }
126
127 void MacroAssembler::cmpoop(Address src1, jobject obj) {
128 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
129 bs->obj_equals(this, src1, obj);
130 }
131
132 void MacroAssembler::cmpoop(Register src1, jobject obj) {
133 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
134 bs->obj_equals(this, src1, obj);
2483 void MacroAssembler::movptr(Register dst, intptr_t src) {
2484 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
2485 }
2486
2487 void MacroAssembler::movptr(Address dst, Register src) {
2488 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2489 }
2490
2491 void MacroAssembler::movdqu(Address dst, XMMRegister src) {
2492 assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2493 Assembler::movdqu(dst, src);
2494 }
2495
2496 void MacroAssembler::movdqu(XMMRegister dst, Address src) {
2497 assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2498 Assembler::movdqu(dst, src);
2499 }
2500
2501 void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
2502 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2503 Assembler::movdqu(dst, src);
2504 }
2505
2506 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) {
2507 if (reachable(src)) {
2508 movdqu(dst, as_Address(src));
2509 } else {
2510 lea(scratchReg, src);
2511 movdqu(dst, Address(scratchReg, 0));
2512 }
2513 }
2514
2515 void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
2516 assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2517 Assembler::vmovdqu(dst, src);
2518 }
2519
2520 void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
2521 assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2522 Assembler::vmovdqu(dst, src);
2523 }
2524
2525 void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2526 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2527 Assembler::vmovdqu(dst, src);
2528 }
2529
2530 void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
2531 if (reachable(src)) {
2532 vmovdqu(dst, as_Address(src));
2533 }
2534 else {
2535 lea(scratch_reg, src);
2536 vmovdqu(dst, Address(scratch_reg, 0));
2537 }
2538 }
2539
2540 void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
2541 if (reachable(src)) {
2542 Assembler::evmovdquq(dst, as_Address(src), vector_len);
2543 } else {
2544 lea(rscratch, src);
2545 Assembler::evmovdquq(dst, Address(rscratch, 0), vector_len);
2546 }
2547 }
2548
2549 void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) {
2550 if (reachable(src)) {
2551 Assembler::movdqa(dst, as_Address(src));
2552 } else {
2553 lea(rscratch1, src);
2554 Assembler::movdqa(dst, Address(rscratch1, 0));
2555 }
2556 }
2557
2558 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
2559 if (reachable(src)) {
3006 lea(scratch_reg, src);
3007 Assembler::vpand(dst, nds, Address(scratch_reg, 0), vector_len);
3008 }
3009 }
3010
3011 void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
3012 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3013 Assembler::vpbroadcastw(dst, src, vector_len);
3014 }
3015
3016 void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3017 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3018 Assembler::vpcmpeqb(dst, nds, src, vector_len);
3019 }
3020
3021 void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3022 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3023 Assembler::vpcmpeqw(dst, nds, src, vector_len);
3024 }
3025
3026 void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
3027 assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3028 Assembler::vpmovzxbw(dst, src, vector_len);
3029 }
3030
3031 void MacroAssembler::vpmovmskb(Register dst, XMMRegister src) {
3032 assert((src->encoding() < 16),"XMM register should be 0-15");
3033 Assembler::vpmovmskb(dst, src);
3034 }
3035
3036 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3037 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3038 Assembler::vpmullw(dst, nds, src, vector_len);
3039 }
3040
3041 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3042 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3043 Assembler::vpmullw(dst, nds, src, vector_len);
3044 }
3045
3130 }
3131
3132 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3133 if (reachable(src)) {
3134 vandpd(dst, nds, as_Address(src), vector_len);
3135 } else {
3136 lea(scratch_reg, src);
3137 vandpd(dst, nds, Address(scratch_reg, 0), vector_len);
3138 }
3139 }
3140
3141 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3142 if (reachable(src)) {
3143 vandps(dst, nds, as_Address(src), vector_len);
3144 } else {
3145 lea(scratch_reg, src);
3146 vandps(dst, nds, Address(scratch_reg, 0), vector_len);
3147 }
3148 }
3149
3150 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3151 if (reachable(src)) {
3152 vdivsd(dst, nds, as_Address(src));
3153 } else {
3154 lea(rscratch1, src);
3155 vdivsd(dst, nds, Address(rscratch1, 0));
3156 }
3157 }
3158
3159 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3160 if (reachable(src)) {
3161 vdivss(dst, nds, as_Address(src));
3162 } else {
3163 lea(rscratch1, src);
3164 vdivss(dst, nds, Address(rscratch1, 0));
3165 }
3166 }
3167
3168 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3169 if (reachable(src)) {
3226 } else {
3227 lea(scratch_reg, src);
3228 vxorps(dst, nds, Address(scratch_reg, 0), vector_len);
3229 }
3230 }
3231
3232 void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3233 if (UseAVX > 1 || (vector_len < 1)) {
3234 if (reachable(src)) {
3235 Assembler::vpxor(dst, nds, as_Address(src), vector_len);
3236 } else {
3237 lea(scratch_reg, src);
3238 Assembler::vpxor(dst, nds, Address(scratch_reg, 0), vector_len);
3239 }
3240 }
3241 else {
3242 MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg);
3243 }
3244 }
3245
3246 //-------------------------------------------------------------------------------------------
3247
3248 void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
3249 const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
3250 STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
3251 // The inverted mask is sign-extended
3252 andptr(possibly_jweak, inverted_jweak_mask);
3253 }
3254
3255 void MacroAssembler::resolve_jobject(Register value,
3256 Register thread,
3257 Register tmp) {
3258 assert_different_registers(value, thread, tmp);
3259 Label done, not_weak;
3260 testptr(value, value);
3261 jcc(Assembler::zero, done); // Use NULL as-is.
3262 testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag.
3263 jcc(Assembler::zero, not_weak);
3264 // Resolve jweak.
3265 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
3266 value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);
5756
5757 //scale is in rcx in both Win64 and Unix
5758 ShortBranchVerifier sbv(this);
5759
5760 shlq(length);
5761 xorq(result, result);
5762
5763 if ((AVX3Threshold == 0) && (UseAVX > 2) &&
5764 VM_Version::supports_avx512vlbw()) {
5765 Label VECTOR64_LOOP, VECTOR64_NOT_EQUAL, VECTOR32_TAIL;
5766
5767 cmpq(length, 64);
5768 jcc(Assembler::less, VECTOR32_TAIL);
5769
5770 movq(tmp1, length);
5771 andq(tmp1, 0x3F); // tail count
5772 andq(length, ~(0x3F)); //vector count
5773
5774 bind(VECTOR64_LOOP);
5775 // AVX512 code to compare 64 byte vectors.
5776 evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit);
5777 evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
5778 kortestql(k7, k7);
5779 jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch
5780 addq(result, 64);
5781 subq(length, 64);
5782 jccb(Assembler::notZero, VECTOR64_LOOP);
5783
5784 //bind(VECTOR64_TAIL);
5785 testq(tmp1, tmp1);
5786 jcc(Assembler::zero, SAME_TILL_END);
5787
5788 //bind(VECTOR64_TAIL);
5789 // AVX512 code to compare upto 63 byte vectors.
5790 mov64(tmp2, 0xFFFFFFFFFFFFFFFF);
5791 shlxq(tmp2, tmp2, tmp1);
5792 notq(tmp2);
5793 kmovql(k3, tmp2);
5794
5795 evmovdqub(rymm0, k3, Address(obja, result), Assembler::AVX_512bit);
5796 evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
5797
5798 ktestql(k7, k3);
5799 jcc(Assembler::below, SAME_TILL_END); // not mismatch
5800
5801 bind(VECTOR64_NOT_EQUAL);
5802 kmovql(tmp1, k7);
5803 notq(tmp1);
5804 tzcntq(tmp1, tmp1);
5805 addq(result, tmp1);
5806 shrq(result);
5807 jmp(DONE);
5808 bind(VECTOR32_TAIL);
5809 }
5810
5811 cmpq(length, 8);
5812 jcc(Assembler::equal, VECTOR8_LOOP);
5813 jcc(Assembler::less, VECTOR4_TAIL);
5814
5815 if (UseAVX >= 2) {
7214 evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit);
7215
7216 testl(len, -64);
7217 jcc(Assembler::zero, post_alignment);
7218
7219 movl(tmp5, dst);
7220 andl(tmp5, (32 - 1));
7221 negl(tmp5);
7222 andl(tmp5, (32 - 1));
7223
7224 // bail out when there is nothing to be done
7225 testl(tmp5, 0xFFFFFFFF);
7226 jcc(Assembler::zero, post_alignment);
7227
7228 // ~(~0 << len), where len is the # of remaining elements to process
7229 movl(result, 0xFFFFFFFF);
7230 shlxl(result, result, tmp5);
7231 notl(result);
7232 kmovdl(k3, result);
7233
7234 evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
7235 evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7236 ktestd(k2, k3);
7237 jcc(Assembler::carryClear, return_zero);
7238
7239 evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
7240
7241 addptr(src, tmp5);
7242 addptr(src, tmp5);
7243 addptr(dst, tmp5);
7244 subl(len, tmp5);
7245
7246 bind(post_alignment);
7247 // end of alignment
7248
7249 movl(tmp5, len);
7250 andl(tmp5, (32 - 1)); // tail count (in chars)
7251 andl(len, ~(32 - 1)); // vector count (in chars)
7252 jcc(Assembler::zero, copy_loop_tail);
7253
7254 lea(src, Address(src, len, Address::times_2));
7255 lea(dst, Address(dst, len, Address::times_1));
7256 negptr(len);
7257
7258 bind(copy_32_loop);
7259 evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
7260 evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7261 kortestdl(k2, k2);
7262 jcc(Assembler::carryClear, return_zero);
7263
7264 // All elements in current processed chunk are valid candidates for
7265 // compression. Write a truncated byte elements to the memory.
7266 evpmovwb(Address(dst, len, Address::times_1), tmp1Reg, Assembler::AVX_512bit);
7267 addptr(len, 32);
7268 jcc(Assembler::notZero, copy_32_loop);
7269
7270 bind(copy_loop_tail);
7271 // bail out when there is nothing to be done
7272 testl(tmp5, 0xFFFFFFFF);
7273 jcc(Assembler::zero, return_length);
7274
7275 movl(len, tmp5);
7276
7277 // ~(~0 << len), where len is the # of remaining elements to process
7278 movl(result, 0xFFFFFFFF);
7279 shlxl(result, result, len);
7280 notl(result);
7281
7282 kmovdl(k3, result);
7283
7284 evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
7285 evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7286 ktestd(k2, k3);
7287 jcc(Assembler::carryClear, return_zero);
7288
7289 evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
7290 jmp(return_length);
7291
7292 bind(below_threshold);
7293 }
7294
7295 if (UseSSE42Intrinsics) {
7296 Label copy_32_loop, copy_16, copy_tail;
7297
7298 movl(result, len);
7299
7300 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vectors
7301
7302 // vectored compression
7303 andl(len, 0xfffffff0); // vector count (in chars)
7304 andl(result, 0x0000000f); // tail count (in chars)
7409 testl(len, -16);
7410 jcc(Assembler::zero, below_threshold);
7411
7412 testl(len, -1 * AVX3Threshold);
7413 jcc(Assembler::zero, avx3_threshold);
7414
7415 // In order to use only one arithmetic operation for the main loop we use
7416 // this pre-calculation
7417 andl(tmp2, (32 - 1)); // tail count (in chars), 32 element wide loop
7418 andl(len, -32); // vector count
7419 jccb(Assembler::zero, copy_tail);
7420
7421 lea(src, Address(src, len, Address::times_1));
7422 lea(dst, Address(dst, len, Address::times_2));
7423 negptr(len);
7424
7425
7426 // inflate 32 chars per iter
7427 bind(copy_32_loop);
7428 vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
7429 evmovdquw(Address(dst, len, Address::times_2), tmp1, Assembler::AVX_512bit);
7430 addptr(len, 32);
7431 jcc(Assembler::notZero, copy_32_loop);
7432
7433 bind(copy_tail);
7434 // bail out when there is nothing to be done
7435 testl(tmp2, -1); // we don't destroy the contents of tmp2 here
7436 jcc(Assembler::zero, done);
7437
7438 // ~(~0 << length), where length is the # of remaining elements to process
7439 movl(tmp3_aliased, -1);
7440 shlxl(tmp3_aliased, tmp3_aliased, tmp2);
7441 notl(tmp3_aliased);
7442 kmovdl(k2, tmp3_aliased);
7443 evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
7444 evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit);
7445
7446 jmp(done);
7447 bind(avx3_threshold);
7448 }
7449 if (UseSSE42Intrinsics) {
7450 Label copy_16_loop, copy_8_loop, copy_bytes, copy_new_tail, copy_tail;
7451
7452 if (UseAVX > 1) {
7453 andl(tmp2, (16 - 1));
7454 andl(len, -16);
7455 jccb(Assembler::zero, copy_new_tail);
7456 } else {
7457 andl(tmp2, 0x00000007); // tail count (in chars)
7458 andl(len, 0xfffffff8); // vector count (in chars)
7459 jccb(Assembler::zero, copy_tail);
7460 }
7461
7462 // vectored inflation
7463 lea(src, Address(src, len, Address::times_1));
7464 lea(dst, Address(dst, len, Address::times_2));
|
95 // 32bit versions
96
97 Address MacroAssembler::as_Address(AddressLiteral adr) {
98 return Address(adr.target(), adr.rspec());
99 }
100
101 Address MacroAssembler::as_Address(ArrayAddress adr) {
102 return Address::make_array(adr);
103 }
104
105 void MacroAssembler::call_VM_leaf_base(address entry_point,
106 int number_of_arguments) {
107 call(RuntimeAddress(entry_point));
108 increment(rsp, number_of_arguments * wordSize);
109 }
110
111 void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
112 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
113 }
114
115
116 void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
117 cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
118 }
119
120 void MacroAssembler::cmpoop_raw(Address src1, jobject obj) {
121 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
122 }
123
124 void MacroAssembler::cmpoop_raw(Register src1, jobject obj) {
125 cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
126 }
127
128 void MacroAssembler::cmpoop(Address src1, jobject obj) {
129 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
130 bs->obj_equals(this, src1, obj);
131 }
132
133 void MacroAssembler::cmpoop(Register src1, jobject obj) {
134 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
135 bs->obj_equals(this, src1, obj);
2484 void MacroAssembler::movptr(Register dst, intptr_t src) {
2485 LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
2486 }
2487
2488 void MacroAssembler::movptr(Address dst, Register src) {
2489 LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2490 }
2491
2492 void MacroAssembler::movdqu(Address dst, XMMRegister src) {
2493 assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2494 Assembler::movdqu(dst, src);
2495 }
2496
2497 void MacroAssembler::movdqu(XMMRegister dst, Address src) {
2498 assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2499 Assembler::movdqu(dst, src);
2500 }
2501
2502 void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
2503 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2504 if (dst->encoding() == src->encoding()) return;
2505 Assembler::movdqu(dst, src);
2506 }
2507
2508 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) {
2509 if (reachable(src)) {
2510 movdqu(dst, as_Address(src));
2511 } else {
2512 lea(scratchReg, src);
2513 movdqu(dst, Address(scratchReg, 0));
2514 }
2515 }
2516
2517 void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
2518 assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2519 Assembler::vmovdqu(dst, src);
2520 }
2521
2522 void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
2523 assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2524 Assembler::vmovdqu(dst, src);
2525 }
2526
2527 void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2528 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2529 if (dst->encoding() == src->encoding()) return;
2530 Assembler::vmovdqu(dst, src);
2531 }
2532
2533 void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
2534 if (reachable(src)) {
2535 vmovdqu(dst, as_Address(src));
2536 }
2537 else {
2538 lea(scratch_reg, src);
2539 vmovdqu(dst, Address(scratch_reg, 0));
2540 }
2541 }
2542
2543
2544 void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) {
2545 if (reachable(src)) {
2546 kmovwl(dst, as_Address(src));
2547 } else {
2548 lea(scratch_reg, src);
2549 kmovwl(dst, Address(scratch_reg, 0));
2550 }
2551 }
2552
2553 void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2554 int vector_len, Register scratch_reg) {
2555 if (reachable(src)) {
2556 if (mask == k0) {
2557 Assembler::evmovdqub(dst, as_Address(src), merge, vector_len);
2558 } else {
2559 Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len);
2560 }
2561 } else {
2562 lea(scratch_reg, src);
2563 if (mask == k0) {
2564 Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len);
2565 } else {
2566 Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2567 }
2568 }
2569 }
2570
2571 void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2572 int vector_len, Register scratch_reg) {
2573 if (reachable(src)) {
2574 Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len);
2575 } else {
2576 lea(scratch_reg, src);
2577 Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2578 }
2579 }
2580
2581 void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2582 int vector_len, Register scratch_reg) {
2583 if (reachable(src)) {
2584 Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len);
2585 } else {
2586 lea(scratch_reg, src);
2587 Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2588 }
2589 }
2590
2591 void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2592 int vector_len, Register scratch_reg) {
2593 if (reachable(src)) {
2594 Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len);
2595 } else {
2596 lea(scratch_reg, src);
2597 Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2598 }
2599 }
2600
2601 void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
2602 if (reachable(src)) {
2603 Assembler::evmovdquq(dst, as_Address(src), vector_len);
2604 } else {
2605 lea(rscratch, src);
2606 Assembler::evmovdquq(dst, Address(rscratch, 0), vector_len);
2607 }
2608 }
2609
2610 void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) {
2611 if (reachable(src)) {
2612 Assembler::movdqa(dst, as_Address(src));
2613 } else {
2614 lea(rscratch1, src);
2615 Assembler::movdqa(dst, Address(rscratch1, 0));
2616 }
2617 }
2618
2619 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
2620 if (reachable(src)) {
3067 lea(scratch_reg, src);
3068 Assembler::vpand(dst, nds, Address(scratch_reg, 0), vector_len);
3069 }
3070 }
3071
3072 void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
3073 assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3074 Assembler::vpbroadcastw(dst, src, vector_len);
3075 }
3076
3077 void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3078 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3079 Assembler::vpcmpeqb(dst, nds, src, vector_len);
3080 }
3081
3082 void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3083 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3084 Assembler::vpcmpeqw(dst, nds, src, vector_len);
3085 }
3086
3087 void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds,
3088 AddressLiteral src, int vector_len, Register scratch_reg) {
3089 if (reachable(src)) {
3090 Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len);
3091 } else {
3092 lea(scratch_reg, src);
3093 Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len);
3094 }
3095 }
3096
3097 void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3098 int comparison, int vector_len, Register scratch_reg) {
3099 if (reachable(src)) {
3100 Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, vector_len);
3101 } else {
3102 lea(scratch_reg, src);
3103 Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
3104 }
3105 }
3106
3107 void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3108 int comparison, int vector_len, Register scratch_reg) {
3109 if (reachable(src)) {
3110 Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, vector_len);
3111 } else {
3112 lea(scratch_reg, src);
3113 Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
3114 }
3115 }
3116
3117 void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3118 int comparison, int vector_len, Register scratch_reg) {
3119 if (reachable(src)) {
3120 Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, vector_len);
3121 } else {
3122 lea(scratch_reg, src);
3123 Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
3124 }
3125 }
3126
3127 void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3128 int comparison, int vector_len, Register scratch_reg) {
3129 if (reachable(src)) {
3130 Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, vector_len);
3131 } else {
3132 lea(scratch_reg, src);
3133 Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
3134 }
3135 }
3136
3137 void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) {
3138 if (width == Assembler::Q) {
3139 Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len);
3140 } else {
3141 Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len);
3142 }
3143 }
3144
3145 void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) {
3146 int eq_cond_enc = 0x29;
3147 int gt_cond_enc = 0x37;
3148 if (width != Assembler::Q) {
3149 eq_cond_enc = 0x74 + width;
3150 gt_cond_enc = 0x64 + width;
3151 }
3152 switch (cond) {
3153 case eq:
3154 vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
3155 break;
3156 case neq:
3157 vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
3158 vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
3159 break;
3160 case le:
3161 vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
3162 vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
3163 break;
3164 case nlt:
3165 vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
3166 vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
3167 break;
3168 case lt:
3169 vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
3170 break;
3171 case nle:
3172 vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
3173 break;
3174 default:
3175 assert(false, "Should not reach here");
3176 }
3177 }
3178
3179 void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
3180 assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3181 Assembler::vpmovzxbw(dst, src, vector_len);
3182 }
3183
3184 void MacroAssembler::vpmovmskb(Register dst, XMMRegister src) {
3185 assert((src->encoding() < 16),"XMM register should be 0-15");
3186 Assembler::vpmovmskb(dst, src);
3187 }
3188
3189 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3190 assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3191 Assembler::vpmullw(dst, nds, src, vector_len);
3192 }
3193
3194 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3195 assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3196 Assembler::vpmullw(dst, nds, src, vector_len);
3197 }
3198
3283 }
3284
3285 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3286 if (reachable(src)) {
3287 vandpd(dst, nds, as_Address(src), vector_len);
3288 } else {
3289 lea(scratch_reg, src);
3290 vandpd(dst, nds, Address(scratch_reg, 0), vector_len);
3291 }
3292 }
3293
3294 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3295 if (reachable(src)) {
3296 vandps(dst, nds, as_Address(src), vector_len);
3297 } else {
3298 lea(scratch_reg, src);
3299 vandps(dst, nds, Address(scratch_reg, 0), vector_len);
3300 }
3301 }
3302
3303 void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src,
3304 bool merge, int vector_len, Register scratch_reg) {
3305 if (reachable(src)) {
3306 Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len);
3307 } else {
3308 lea(scratch_reg, src);
3309 Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len);
3310 }
3311 }
3312
3313 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3314 if (reachable(src)) {
3315 vdivsd(dst, nds, as_Address(src));
3316 } else {
3317 lea(rscratch1, src);
3318 vdivsd(dst, nds, Address(rscratch1, 0));
3319 }
3320 }
3321
3322 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3323 if (reachable(src)) {
3324 vdivss(dst, nds, as_Address(src));
3325 } else {
3326 lea(rscratch1, src);
3327 vdivss(dst, nds, Address(rscratch1, 0));
3328 }
3329 }
3330
3331 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3332 if (reachable(src)) {
3389 } else {
3390 lea(scratch_reg, src);
3391 vxorps(dst, nds, Address(scratch_reg, 0), vector_len);
3392 }
3393 }
3394
3395 void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3396 if (UseAVX > 1 || (vector_len < 1)) {
3397 if (reachable(src)) {
3398 Assembler::vpxor(dst, nds, as_Address(src), vector_len);
3399 } else {
3400 lea(scratch_reg, src);
3401 Assembler::vpxor(dst, nds, Address(scratch_reg, 0), vector_len);
3402 }
3403 }
3404 else {
3405 MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg);
3406 }
3407 }
3408
3409 void MacroAssembler::vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3410 if (reachable(src)) {
3411 Assembler::vpermd(dst, nds, as_Address(src), vector_len);
3412 } else {
3413 lea(scratch_reg, src);
3414 Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len);
3415 }
3416 }
3417
3418 void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
3419 const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
3420 STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
3421 // The inverted mask is sign-extended
3422 andptr(possibly_jweak, inverted_jweak_mask);
3423 }
3424
3425 void MacroAssembler::resolve_jobject(Register value,
3426 Register thread,
3427 Register tmp) {
3428 assert_different_registers(value, thread, tmp);
3429 Label done, not_weak;
3430 testptr(value, value);
3431 jcc(Assembler::zero, done); // Use NULL as-is.
3432 testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag.
3433 jcc(Assembler::zero, not_weak);
3434 // Resolve jweak.
3435 access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
3436 value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);
5926
5927 //scale is in rcx in both Win64 and Unix
5928 ShortBranchVerifier sbv(this);
5929
5930 shlq(length);
5931 xorq(result, result);
5932
5933 if ((AVX3Threshold == 0) && (UseAVX > 2) &&
5934 VM_Version::supports_avx512vlbw()) {
5935 Label VECTOR64_LOOP, VECTOR64_NOT_EQUAL, VECTOR32_TAIL;
5936
5937 cmpq(length, 64);
5938 jcc(Assembler::less, VECTOR32_TAIL);
5939
5940 movq(tmp1, length);
5941 andq(tmp1, 0x3F); // tail count
5942 andq(length, ~(0x3F)); //vector count
5943
5944 bind(VECTOR64_LOOP);
5945 // AVX512 code to compare 64 byte vectors.
5946 evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit);
5947 evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
5948 kortestql(k7, k7);
5949 jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch
5950 addq(result, 64);
5951 subq(length, 64);
5952 jccb(Assembler::notZero, VECTOR64_LOOP);
5953
5954 //bind(VECTOR64_TAIL);
5955 testq(tmp1, tmp1);
5956 jcc(Assembler::zero, SAME_TILL_END);
5957
5958 //bind(VECTOR64_TAIL);
5959 // AVX512 code to compare upto 63 byte vectors.
5960 mov64(tmp2, 0xFFFFFFFFFFFFFFFF);
5961 shlxq(tmp2, tmp2, tmp1);
5962 notq(tmp2);
5963 kmovql(k3, tmp2);
5964
5965 evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit);
5966 evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
5967
5968 ktestql(k7, k3);
5969 jcc(Assembler::below, SAME_TILL_END); // not mismatch
5970
5971 bind(VECTOR64_NOT_EQUAL);
5972 kmovql(tmp1, k7);
5973 notq(tmp1);
5974 tzcntq(tmp1, tmp1);
5975 addq(result, tmp1);
5976 shrq(result);
5977 jmp(DONE);
5978 bind(VECTOR32_TAIL);
5979 }
5980
5981 cmpq(length, 8);
5982 jcc(Assembler::equal, VECTOR8_LOOP);
5983 jcc(Assembler::less, VECTOR4_TAIL);
5984
5985 if (UseAVX >= 2) {
7384 evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit);
7385
7386 testl(len, -64);
7387 jcc(Assembler::zero, post_alignment);
7388
7389 movl(tmp5, dst);
7390 andl(tmp5, (32 - 1));
7391 negl(tmp5);
7392 andl(tmp5, (32 - 1));
7393
7394 // bail out when there is nothing to be done
7395 testl(tmp5, 0xFFFFFFFF);
7396 jcc(Assembler::zero, post_alignment);
7397
7398 // ~(~0 << len), where len is the # of remaining elements to process
7399 movl(result, 0xFFFFFFFF);
7400 shlxl(result, result, tmp5);
7401 notl(result);
7402 kmovdl(k3, result);
7403
7404 evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
7405 evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7406 ktestd(k2, k3);
7407 jcc(Assembler::carryClear, return_zero);
7408
7409 evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
7410
7411 addptr(src, tmp5);
7412 addptr(src, tmp5);
7413 addptr(dst, tmp5);
7414 subl(len, tmp5);
7415
7416 bind(post_alignment);
7417 // end of alignment
7418
7419 movl(tmp5, len);
7420 andl(tmp5, (32 - 1)); // tail count (in chars)
7421 andl(len, ~(32 - 1)); // vector count (in chars)
7422 jcc(Assembler::zero, copy_loop_tail);
7423
7424 lea(src, Address(src, len, Address::times_2));
7425 lea(dst, Address(dst, len, Address::times_1));
7426 negptr(len);
7427
7428 bind(copy_32_loop);
7429 evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit);
7430 evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7431 kortestdl(k2, k2);
7432 jcc(Assembler::carryClear, return_zero);
7433
7434 // All elements in current processed chunk are valid candidates for
7435 // compression. Write a truncated byte elements to the memory.
7436 evpmovwb(Address(dst, len, Address::times_1), tmp1Reg, Assembler::AVX_512bit);
7437 addptr(len, 32);
7438 jcc(Assembler::notZero, copy_32_loop);
7439
7440 bind(copy_loop_tail);
7441 // bail out when there is nothing to be done
7442 testl(tmp5, 0xFFFFFFFF);
7443 jcc(Assembler::zero, return_length);
7444
7445 movl(len, tmp5);
7446
7447 // ~(~0 << len), where len is the # of remaining elements to process
7448 movl(result, 0xFFFFFFFF);
7449 shlxl(result, result, len);
7450 notl(result);
7451
7452 kmovdl(k3, result);
7453
7454 evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
7455 evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7456 ktestd(k2, k3);
7457 jcc(Assembler::carryClear, return_zero);
7458
7459 evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
7460 jmp(return_length);
7461
7462 bind(below_threshold);
7463 }
7464
7465 if (UseSSE42Intrinsics) {
7466 Label copy_32_loop, copy_16, copy_tail;
7467
7468 movl(result, len);
7469
7470 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vectors
7471
7472 // vectored compression
7473 andl(len, 0xfffffff0); // vector count (in chars)
7474 andl(result, 0x0000000f); // tail count (in chars)
7579 testl(len, -16);
7580 jcc(Assembler::zero, below_threshold);
7581
7582 testl(len, -1 * AVX3Threshold);
7583 jcc(Assembler::zero, avx3_threshold);
7584
7585 // In order to use only one arithmetic operation for the main loop we use
7586 // this pre-calculation
7587 andl(tmp2, (32 - 1)); // tail count (in chars), 32 element wide loop
7588 andl(len, -32); // vector count
7589 jccb(Assembler::zero, copy_tail);
7590
7591 lea(src, Address(src, len, Address::times_1));
7592 lea(dst, Address(dst, len, Address::times_2));
7593 negptr(len);
7594
7595
7596 // inflate 32 chars per iter
7597 bind(copy_32_loop);
7598 vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
7599 evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit);
7600 addptr(len, 32);
7601 jcc(Assembler::notZero, copy_32_loop);
7602
7603 bind(copy_tail);
7604 // bail out when there is nothing to be done
7605 testl(tmp2, -1); // we don't destroy the contents of tmp2 here
7606 jcc(Assembler::zero, done);
7607
7608 // ~(~0 << length), where length is the # of remaining elements to process
7609 movl(tmp3_aliased, -1);
7610 shlxl(tmp3_aliased, tmp3_aliased, tmp2);
7611 notl(tmp3_aliased);
7612 kmovdl(k2, tmp3_aliased);
7613 evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
7614 evmovdquw(Address(dst, 0), k2, tmp1, /*merge*/ true, Assembler::AVX_512bit);
7615
7616 jmp(done);
7617 bind(avx3_threshold);
7618 }
7619 if (UseSSE42Intrinsics) {
7620 Label copy_16_loop, copy_8_loop, copy_bytes, copy_new_tail, copy_tail;
7621
7622 if (UseAVX > 1) {
7623 andl(tmp2, (16 - 1));
7624 andl(len, -16);
7625 jccb(Assembler::zero, copy_new_tail);
7626 } else {
7627 andl(tmp2, 0x00000007); // tail count (in chars)
7628 andl(len, 0xfffffff8); // vector count (in chars)
7629 jccb(Assembler::zero, copy_tail);
7630 }
7631
7632 // vectored inflation
7633 lea(src, Address(src, len, Address::times_1));
7634 lea(dst, Address(dst, len, Address::times_2));
|