< prev index next >

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Print this page
rev 60516 : manual merge with default


  95 // 32bit versions
  96 
  97 Address MacroAssembler::as_Address(AddressLiteral adr) {
  98   return Address(adr.target(), adr.rspec());
  99 }
 100 
 101 Address MacroAssembler::as_Address(ArrayAddress adr) {
 102   return Address::make_array(adr);
 103 }
 104 
 105 void MacroAssembler::call_VM_leaf_base(address entry_point,
 106                                        int number_of_arguments) {
 107   call(RuntimeAddress(entry_point));
 108   increment(rsp, number_of_arguments * wordSize);
 109 }
 110 
 111 void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
 112   cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 113 }
 114 

 115 void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
 116   cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 117 }
 118 
 119 void MacroAssembler::cmpoop_raw(Address src1, jobject obj) {
 120   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
 121 }
 122 
 123 void MacroAssembler::cmpoop_raw(Register src1, jobject obj) {
 124   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
 125 }
 126 
 127 void MacroAssembler::cmpoop(Address src1, jobject obj) {
 128   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 129   bs->obj_equals(this, src1, obj);
 130 }
 131 
 132 void MacroAssembler::cmpoop(Register src1, jobject obj) {
 133   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 134   bs->obj_equals(this, src1, obj);


2483 void MacroAssembler::movptr(Register dst, intptr_t src) {
2484   LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
2485 }
2486 
2487 void MacroAssembler::movptr(Address dst, Register src) {
2488   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2489 }
2490 
2491 void MacroAssembler::movdqu(Address dst, XMMRegister src) {
2492     assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2493     Assembler::movdqu(dst, src);
2494 }
2495 
2496 void MacroAssembler::movdqu(XMMRegister dst, Address src) {
2497     assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2498     Assembler::movdqu(dst, src);
2499 }
2500 
2501 void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
2502     assert(((dst->encoding() < 16  && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");

2503     Assembler::movdqu(dst, src);
2504 }
2505 
2506 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) {
2507   if (reachable(src)) {
2508     movdqu(dst, as_Address(src));
2509   } else {
2510     lea(scratchReg, src);
2511     movdqu(dst, Address(scratchReg, 0));
2512   }
2513 }
2514 
2515 void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
2516     assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2517     Assembler::vmovdqu(dst, src);
2518 }
2519 
2520 void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
2521     assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2522     Assembler::vmovdqu(dst, src);
2523 }
2524 
2525 void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2526     assert(((dst->encoding() < 16  && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");

2527     Assembler::vmovdqu(dst, src);
2528 }
2529 
2530 void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
2531   if (reachable(src)) {
2532     vmovdqu(dst, as_Address(src));
2533   }
2534   else {
2535     lea(scratch_reg, src);
2536     vmovdqu(dst, Address(scratch_reg, 0));
2537   }
2538 }
2539 


























































2540 void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
2541   if (reachable(src)) {
2542     Assembler::evmovdquq(dst, as_Address(src), vector_len);
2543   } else {
2544     lea(rscratch, src);
2545     Assembler::evmovdquq(dst, Address(rscratch, 0), vector_len);
2546   }
2547 }
2548 
2549 void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) {
2550   if (reachable(src)) {
2551     Assembler::movdqa(dst, as_Address(src));
2552   } else {
2553     lea(rscratch1, src);
2554     Assembler::movdqa(dst, Address(rscratch1, 0));
2555   }
2556 }
2557 
2558 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
2559   if (reachable(src)) {


3006     lea(scratch_reg, src);
3007     Assembler::vpand(dst, nds, Address(scratch_reg, 0), vector_len);
3008   }
3009 }
3010 
3011 void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
3012   assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3013   Assembler::vpbroadcastw(dst, src, vector_len);
3014 }
3015 
3016 void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3017   assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3018   Assembler::vpcmpeqb(dst, nds, src, vector_len);
3019 }
3020 
3021 void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3022   assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3023   Assembler::vpcmpeqw(dst, nds, src, vector_len);
3024 }
3025 




























































































3026 void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
3027   assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3028   Assembler::vpmovzxbw(dst, src, vector_len);
3029 }
3030 
3031 void MacroAssembler::vpmovmskb(Register dst, XMMRegister src) {
3032   assert((src->encoding() < 16),"XMM register should be 0-15");
3033   Assembler::vpmovmskb(dst, src);
3034 }
3035 
3036 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3037   assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3038   Assembler::vpmullw(dst, nds, src, vector_len);
3039 }
3040 
3041 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3042   assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3043   Assembler::vpmullw(dst, nds, src, vector_len);
3044 }
3045 


3130 }
3131 
3132 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3133   if (reachable(src)) {
3134     vandpd(dst, nds, as_Address(src), vector_len);
3135   } else {
3136     lea(scratch_reg, src);
3137     vandpd(dst, nds, Address(scratch_reg, 0), vector_len);
3138   }
3139 }
3140 
3141 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3142   if (reachable(src)) {
3143     vandps(dst, nds, as_Address(src), vector_len);
3144   } else {
3145     lea(scratch_reg, src);
3146     vandps(dst, nds, Address(scratch_reg, 0), vector_len);
3147   }
3148 }
3149 










3150 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3151   if (reachable(src)) {
3152     vdivsd(dst, nds, as_Address(src));
3153   } else {
3154     lea(rscratch1, src);
3155     vdivsd(dst, nds, Address(rscratch1, 0));
3156   }
3157 }
3158 
3159 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3160   if (reachable(src)) {
3161     vdivss(dst, nds, as_Address(src));
3162   } else {
3163     lea(rscratch1, src);
3164     vdivss(dst, nds, Address(rscratch1, 0));
3165   }
3166 }
3167 
3168 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3169   if (reachable(src)) {


3226   } else {
3227     lea(scratch_reg, src);
3228     vxorps(dst, nds, Address(scratch_reg, 0), vector_len);
3229   }
3230 }
3231 
3232 void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3233   if (UseAVX > 1 || (vector_len < 1)) {
3234     if (reachable(src)) {
3235       Assembler::vpxor(dst, nds, as_Address(src), vector_len);
3236     } else {
3237       lea(scratch_reg, src);
3238       Assembler::vpxor(dst, nds, Address(scratch_reg, 0), vector_len);
3239     }
3240   }
3241   else {
3242     MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg);
3243   }
3244 }
3245 
3246 //-------------------------------------------------------------------------------------------







3247 
3248 void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
3249   const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
3250   STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
3251   // The inverted mask is sign-extended
3252   andptr(possibly_jweak, inverted_jweak_mask);
3253 }
3254 
3255 void MacroAssembler::resolve_jobject(Register value,
3256                                      Register thread,
3257                                      Register tmp) {
3258   assert_different_registers(value, thread, tmp);
3259   Label done, not_weak;
3260   testptr(value, value);
3261   jcc(Assembler::zero, done);                // Use NULL as-is.
3262   testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag.
3263   jcc(Assembler::zero, not_weak);
3264   // Resolve jweak.
3265   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
3266                  value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);


5756 
5757   //scale is in rcx in both Win64 and Unix
5758   ShortBranchVerifier sbv(this);
5759 
5760   shlq(length);
5761   xorq(result, result);
5762 
5763   if ((AVX3Threshold == 0) && (UseAVX > 2) &&
5764       VM_Version::supports_avx512vlbw()) {
5765     Label VECTOR64_LOOP, VECTOR64_NOT_EQUAL, VECTOR32_TAIL;
5766 
5767     cmpq(length, 64);
5768     jcc(Assembler::less, VECTOR32_TAIL);
5769 
5770     movq(tmp1, length);
5771     andq(tmp1, 0x3F);      // tail count
5772     andq(length, ~(0x3F)); //vector count
5773 
5774     bind(VECTOR64_LOOP);
5775     // AVX512 code to compare 64 byte vectors.
5776     evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit);
5777     evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
5778     kortestql(k7, k7);
5779     jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL);     // mismatch
5780     addq(result, 64);
5781     subq(length, 64);
5782     jccb(Assembler::notZero, VECTOR64_LOOP);
5783 
5784     //bind(VECTOR64_TAIL);
5785     testq(tmp1, tmp1);
5786     jcc(Assembler::zero, SAME_TILL_END);
5787 
5788     //bind(VECTOR64_TAIL);
5789     // AVX512 code to compare upto 63 byte vectors.
5790     mov64(tmp2, 0xFFFFFFFFFFFFFFFF);
5791     shlxq(tmp2, tmp2, tmp1);
5792     notq(tmp2);
5793     kmovql(k3, tmp2);
5794 
5795     evmovdqub(rymm0, k3, Address(obja, result), Assembler::AVX_512bit);
5796     evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
5797 
5798     ktestql(k7, k3);
5799     jcc(Assembler::below, SAME_TILL_END);     // not mismatch
5800 
5801     bind(VECTOR64_NOT_EQUAL);
5802     kmovql(tmp1, k7);
5803     notq(tmp1);
5804     tzcntq(tmp1, tmp1);
5805     addq(result, tmp1);
5806     shrq(result);
5807     jmp(DONE);
5808     bind(VECTOR32_TAIL);
5809   }
5810 
5811   cmpq(length, 8);
5812   jcc(Assembler::equal, VECTOR8_LOOP);
5813   jcc(Assembler::less, VECTOR4_TAIL);
5814 
5815   if (UseAVX >= 2) {


7214     evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit);
7215 
7216     testl(len, -64);
7217     jcc(Assembler::zero, post_alignment);
7218 
7219     movl(tmp5, dst);
7220     andl(tmp5, (32 - 1));
7221     negl(tmp5);
7222     andl(tmp5, (32 - 1));
7223 
7224     // bail out when there is nothing to be done
7225     testl(tmp5, 0xFFFFFFFF);
7226     jcc(Assembler::zero, post_alignment);
7227 
7228     // ~(~0 << len), where len is the # of remaining elements to process
7229     movl(result, 0xFFFFFFFF);
7230     shlxl(result, result, tmp5);
7231     notl(result);
7232     kmovdl(k3, result);
7233 
7234     evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
7235     evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7236     ktestd(k2, k3);
7237     jcc(Assembler::carryClear, return_zero);
7238 
7239     evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
7240 
7241     addptr(src, tmp5);
7242     addptr(src, tmp5);
7243     addptr(dst, tmp5);
7244     subl(len, tmp5);
7245 
7246     bind(post_alignment);
7247     // end of alignment
7248 
7249     movl(tmp5, len);
7250     andl(tmp5, (32 - 1));    // tail count (in chars)
7251     andl(len, ~(32 - 1));    // vector count (in chars)
7252     jcc(Assembler::zero, copy_loop_tail);
7253 
7254     lea(src, Address(src, len, Address::times_2));
7255     lea(dst, Address(dst, len, Address::times_1));
7256     negptr(len);
7257 
7258     bind(copy_32_loop);
7259     evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
7260     evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7261     kortestdl(k2, k2);
7262     jcc(Assembler::carryClear, return_zero);
7263 
7264     // All elements in current processed chunk are valid candidates for
7265     // compression. Write a truncated byte elements to the memory.
7266     evpmovwb(Address(dst, len, Address::times_1), tmp1Reg, Assembler::AVX_512bit);
7267     addptr(len, 32);
7268     jcc(Assembler::notZero, copy_32_loop);
7269 
7270     bind(copy_loop_tail);
7271     // bail out when there is nothing to be done
7272     testl(tmp5, 0xFFFFFFFF);
7273     jcc(Assembler::zero, return_length);
7274 
7275     movl(len, tmp5);
7276 
7277     // ~(~0 << len), where len is the # of remaining elements to process
7278     movl(result, 0xFFFFFFFF);
7279     shlxl(result, result, len);
7280     notl(result);
7281 
7282     kmovdl(k3, result);
7283 
7284     evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
7285     evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7286     ktestd(k2, k3);
7287     jcc(Assembler::carryClear, return_zero);
7288 
7289     evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
7290     jmp(return_length);
7291 
7292     bind(below_threshold);
7293   }
7294 
7295   if (UseSSE42Intrinsics) {
7296     Label copy_32_loop, copy_16, copy_tail;
7297 
7298     movl(result, len);
7299 
7300     movl(tmp5, 0xff00ff00);   // create mask to test for Unicode chars in vectors
7301 
7302     // vectored compression
7303     andl(len, 0xfffffff0);    // vector count (in chars)
7304     andl(result, 0x0000000f);    // tail count (in chars)


7409     testl(len, -16);
7410     jcc(Assembler::zero, below_threshold);
7411 
7412     testl(len, -1 * AVX3Threshold);
7413     jcc(Assembler::zero, avx3_threshold);
7414 
7415     // In order to use only one arithmetic operation for the main loop we use
7416     // this pre-calculation
7417     andl(tmp2, (32 - 1)); // tail count (in chars), 32 element wide loop
7418     andl(len, -32);     // vector count
7419     jccb(Assembler::zero, copy_tail);
7420 
7421     lea(src, Address(src, len, Address::times_1));
7422     lea(dst, Address(dst, len, Address::times_2));
7423     negptr(len);
7424 
7425 
7426     // inflate 32 chars per iter
7427     bind(copy_32_loop);
7428     vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
7429     evmovdquw(Address(dst, len, Address::times_2), tmp1, Assembler::AVX_512bit);
7430     addptr(len, 32);
7431     jcc(Assembler::notZero, copy_32_loop);
7432 
7433     bind(copy_tail);
7434     // bail out when there is nothing to be done
7435     testl(tmp2, -1); // we don't destroy the contents of tmp2 here
7436     jcc(Assembler::zero, done);
7437 
7438     // ~(~0 << length), where length is the # of remaining elements to process
7439     movl(tmp3_aliased, -1);
7440     shlxl(tmp3_aliased, tmp3_aliased, tmp2);
7441     notl(tmp3_aliased);
7442     kmovdl(k2, tmp3_aliased);
7443     evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
7444     evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit);
7445 
7446     jmp(done);
7447     bind(avx3_threshold);
7448   }
7449   if (UseSSE42Intrinsics) {
7450     Label copy_16_loop, copy_8_loop, copy_bytes, copy_new_tail, copy_tail;
7451 
7452     if (UseAVX > 1) {
7453       andl(tmp2, (16 - 1));
7454       andl(len, -16);
7455       jccb(Assembler::zero, copy_new_tail);
7456     } else {
7457       andl(tmp2, 0x00000007);   // tail count (in chars)
7458       andl(len, 0xfffffff8);    // vector count (in chars)
7459       jccb(Assembler::zero, copy_tail);
7460     }
7461 
7462     // vectored inflation
7463     lea(src, Address(src, len, Address::times_1));
7464     lea(dst, Address(dst, len, Address::times_2));




  95 // 32bit versions
  96 
  97 Address MacroAssembler::as_Address(AddressLiteral adr) {
  98   return Address(adr.target(), adr.rspec());
  99 }
 100 
 101 Address MacroAssembler::as_Address(ArrayAddress adr) {
 102   return Address::make_array(adr);
 103 }
 104 
 105 void MacroAssembler::call_VM_leaf_base(address entry_point,
 106                                        int number_of_arguments) {
 107   call(RuntimeAddress(entry_point));
 108   increment(rsp, number_of_arguments * wordSize);
 109 }
 110 
 111 void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
 112   cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 113 }
 114 
 115 
 116 void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
 117   cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 118 }
 119 
 120 void MacroAssembler::cmpoop_raw(Address src1, jobject obj) {
 121   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
 122 }
 123 
 124 void MacroAssembler::cmpoop_raw(Register src1, jobject obj) {
 125   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
 126 }
 127 
 128 void MacroAssembler::cmpoop(Address src1, jobject obj) {
 129   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 130   bs->obj_equals(this, src1, obj);
 131 }
 132 
 133 void MacroAssembler::cmpoop(Register src1, jobject obj) {
 134   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 135   bs->obj_equals(this, src1, obj);


2484 void MacroAssembler::movptr(Register dst, intptr_t src) {
2485   LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
2486 }
2487 
2488 void MacroAssembler::movptr(Address dst, Register src) {
2489   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2490 }
2491 
2492 void MacroAssembler::movdqu(Address dst, XMMRegister src) {
2493     assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2494     Assembler::movdqu(dst, src);
2495 }
2496 
2497 void MacroAssembler::movdqu(XMMRegister dst, Address src) {
2498     assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2499     Assembler::movdqu(dst, src);
2500 }
2501 
2502 void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
2503     assert(((dst->encoding() < 16  && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2504     if (dst->encoding() == src->encoding()) return;
2505     Assembler::movdqu(dst, src);
2506 }
2507 
2508 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) {
2509   if (reachable(src)) {
2510     movdqu(dst, as_Address(src));
2511   } else {
2512     lea(scratchReg, src);
2513     movdqu(dst, Address(scratchReg, 0));
2514   }
2515 }
2516 
2517 void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
2518     assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2519     Assembler::vmovdqu(dst, src);
2520 }
2521 
2522 void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
2523     assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2524     Assembler::vmovdqu(dst, src);
2525 }
2526 
2527 void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2528     assert(((dst->encoding() < 16  && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2529     if (dst->encoding() == src->encoding()) return;
2530     Assembler::vmovdqu(dst, src);
2531 }
2532 
2533 void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
2534   if (reachable(src)) {
2535     vmovdqu(dst, as_Address(src));
2536   }
2537   else {
2538     lea(scratch_reg, src);
2539     vmovdqu(dst, Address(scratch_reg, 0));
2540   }
2541 }
2542 
2543 
2544 void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) {
2545   if (reachable(src)) {
2546     kmovwl(dst, as_Address(src));
2547   } else {
2548     lea(scratch_reg, src);
2549     kmovwl(dst, Address(scratch_reg, 0));
2550   }
2551 }
2552 
2553 void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2554                                int vector_len, Register scratch_reg) {
2555   if (reachable(src)) {
2556     if (mask == k0) {
2557       Assembler::evmovdqub(dst, as_Address(src), merge, vector_len);
2558     } else {
2559       Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len);
2560     }
2561   } else {
2562     lea(scratch_reg, src);
2563     if (mask == k0) {
2564       Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len);
2565     } else {
2566       Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2567     }
2568   }
2569 }
2570 
2571 void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2572                                int vector_len, Register scratch_reg) {
2573   if (reachable(src)) {
2574     Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len);
2575   } else {
2576     lea(scratch_reg, src);
2577     Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2578   }
2579 }
2580 
2581 void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2582                                int vector_len, Register scratch_reg) {
2583   if (reachable(src)) {
2584     Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len);
2585   } else {
2586     lea(scratch_reg, src);
2587     Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2588   }
2589 }
2590 
2591 void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2592                                int vector_len, Register scratch_reg) {
2593   if (reachable(src)) {
2594     Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len);
2595   } else {
2596     lea(scratch_reg, src);
2597     Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2598   }
2599 }
2600 
2601 void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
2602   if (reachable(src)) {
2603     Assembler::evmovdquq(dst, as_Address(src), vector_len);
2604   } else {
2605     lea(rscratch, src);
2606     Assembler::evmovdquq(dst, Address(rscratch, 0), vector_len);
2607   }
2608 }
2609 
2610 void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) {
2611   if (reachable(src)) {
2612     Assembler::movdqa(dst, as_Address(src));
2613   } else {
2614     lea(rscratch1, src);
2615     Assembler::movdqa(dst, Address(rscratch1, 0));
2616   }
2617 }
2618 
2619 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
2620   if (reachable(src)) {


3067     lea(scratch_reg, src);
3068     Assembler::vpand(dst, nds, Address(scratch_reg, 0), vector_len);
3069   }
3070 }
3071 
3072 void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
3073   assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3074   Assembler::vpbroadcastw(dst, src, vector_len);
3075 }
3076 
3077 void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3078   assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3079   Assembler::vpcmpeqb(dst, nds, src, vector_len);
3080 }
3081 
3082 void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3083   assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3084   Assembler::vpcmpeqw(dst, nds, src, vector_len);
3085 }
3086 
3087 void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds,
3088                                AddressLiteral src, int vector_len, Register scratch_reg) {
3089   if (reachable(src)) {
3090     Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len);
3091   } else {
3092     lea(scratch_reg, src);
3093     Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len);
3094   }
3095 }
3096 
3097 void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3098                              int comparison, int vector_len, Register scratch_reg) {
3099   if (reachable(src)) {
3100     Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, vector_len);
3101   } else {
3102     lea(scratch_reg, src);
3103     Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
3104   }
3105 }
3106 
3107 void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3108                              int comparison, int vector_len, Register scratch_reg) {
3109   if (reachable(src)) {
3110     Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, vector_len);
3111   } else {
3112     lea(scratch_reg, src);
3113     Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
3114   }
3115 }
3116 
3117 void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3118                              int comparison, int vector_len, Register scratch_reg) {
3119   if (reachable(src)) {
3120     Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, vector_len);
3121   } else {
3122     lea(scratch_reg, src);
3123     Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
3124   }
3125 }
3126 
3127 void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3128                              int comparison, int vector_len, Register scratch_reg) {
3129   if (reachable(src)) {
3130     Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, vector_len);
3131   } else {
3132     lea(scratch_reg, src);
3133     Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
3134   }
3135 }
3136 
3137 void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) {
3138   if (width == Assembler::Q) {
3139     Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len);
3140   } else {
3141     Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len);
3142   }
3143 }
3144 
3145 void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) {
3146   int eq_cond_enc = 0x29;
3147   int gt_cond_enc = 0x37;
3148   if (width != Assembler::Q) {
3149     eq_cond_enc = 0x74 + width;
3150     gt_cond_enc = 0x64 + width;
3151   }
3152   switch (cond) {
3153   case eq:
3154     vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
3155     break;
3156   case neq:
3157     vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
3158     vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
3159     break;
3160   case le:
3161     vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
3162     vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
3163     break;
3164   case nlt:
3165     vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
3166     vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
3167     break;
3168   case lt:
3169     vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
3170     break;
3171   case nle:
3172     vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
3173     break;
3174   default:
3175     assert(false, "Should not reach here");
3176   }
3177 }
3178 
3179 void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
3180   assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3181   Assembler::vpmovzxbw(dst, src, vector_len);
3182 }
3183 
3184 void MacroAssembler::vpmovmskb(Register dst, XMMRegister src) {
3185   assert((src->encoding() < 16),"XMM register should be 0-15");
3186   Assembler::vpmovmskb(dst, src);
3187 }
3188 
3189 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3190   assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3191   Assembler::vpmullw(dst, nds, src, vector_len);
3192 }
3193 
3194 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3195   assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3196   Assembler::vpmullw(dst, nds, src, vector_len);
3197 }
3198 


3283 }
3284 
3285 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3286   if (reachable(src)) {
3287     vandpd(dst, nds, as_Address(src), vector_len);
3288   } else {
3289     lea(scratch_reg, src);
3290     vandpd(dst, nds, Address(scratch_reg, 0), vector_len);
3291   }
3292 }
3293 
3294 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3295   if (reachable(src)) {
3296     vandps(dst, nds, as_Address(src), vector_len);
3297   } else {
3298     lea(scratch_reg, src);
3299     vandps(dst, nds, Address(scratch_reg, 0), vector_len);
3300   }
3301 }
3302 
3303 void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src,
3304                             bool merge, int vector_len, Register scratch_reg) {
3305   if (reachable(src)) {
3306     Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len);
3307   } else {
3308     lea(scratch_reg, src);
3309     Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len);
3310   }
3311 }
3312 
3313 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3314   if (reachable(src)) {
3315     vdivsd(dst, nds, as_Address(src));
3316   } else {
3317     lea(rscratch1, src);
3318     vdivsd(dst, nds, Address(rscratch1, 0));
3319   }
3320 }
3321 
3322 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3323   if (reachable(src)) {
3324     vdivss(dst, nds, as_Address(src));
3325   } else {
3326     lea(rscratch1, src);
3327     vdivss(dst, nds, Address(rscratch1, 0));
3328   }
3329 }
3330 
3331 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3332   if (reachable(src)) {


3389   } else {
3390     lea(scratch_reg, src);
3391     vxorps(dst, nds, Address(scratch_reg, 0), vector_len);
3392   }
3393 }
3394 
3395 void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3396   if (UseAVX > 1 || (vector_len < 1)) {
3397     if (reachable(src)) {
3398       Assembler::vpxor(dst, nds, as_Address(src), vector_len);
3399     } else {
3400       lea(scratch_reg, src);
3401       Assembler::vpxor(dst, nds, Address(scratch_reg, 0), vector_len);
3402     }
3403   }
3404   else {
3405     MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg);
3406   }
3407 }
3408 
3409 void MacroAssembler::vpermd(XMMRegister dst,  XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3410   if (reachable(src)) {
3411     Assembler::vpermd(dst, nds, as_Address(src), vector_len);
3412   } else {
3413     lea(scratch_reg, src);
3414     Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len);
3415   }
3416 }
3417 
3418 void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
3419   const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
3420   STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
3421   // The inverted mask is sign-extended
3422   andptr(possibly_jweak, inverted_jweak_mask);
3423 }
3424 
3425 void MacroAssembler::resolve_jobject(Register value,
3426                                      Register thread,
3427                                      Register tmp) {
3428   assert_different_registers(value, thread, tmp);
3429   Label done, not_weak;
3430   testptr(value, value);
3431   jcc(Assembler::zero, done);                // Use NULL as-is.
3432   testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag.
3433   jcc(Assembler::zero, not_weak);
3434   // Resolve jweak.
3435   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
3436                  value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);


5926 
5927   //scale is in rcx in both Win64 and Unix
5928   ShortBranchVerifier sbv(this);
5929 
5930   shlq(length);
5931   xorq(result, result);
5932 
5933   if ((AVX3Threshold == 0) && (UseAVX > 2) &&
5934       VM_Version::supports_avx512vlbw()) {
5935     Label VECTOR64_LOOP, VECTOR64_NOT_EQUAL, VECTOR32_TAIL;
5936 
5937     cmpq(length, 64);
5938     jcc(Assembler::less, VECTOR32_TAIL);
5939 
5940     movq(tmp1, length);
5941     andq(tmp1, 0x3F);      // tail count
5942     andq(length, ~(0x3F)); //vector count
5943 
5944     bind(VECTOR64_LOOP);
5945     // AVX512 code to compare 64 byte vectors.
5946     evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit);
5947     evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
5948     kortestql(k7, k7);
5949     jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL);     // mismatch
5950     addq(result, 64);
5951     subq(length, 64);
5952     jccb(Assembler::notZero, VECTOR64_LOOP);
5953 
5954     //bind(VECTOR64_TAIL);
5955     testq(tmp1, tmp1);
5956     jcc(Assembler::zero, SAME_TILL_END);
5957 
5958     //bind(VECTOR64_TAIL);
5959     // AVX512 code to compare upto 63 byte vectors.
5960     mov64(tmp2, 0xFFFFFFFFFFFFFFFF);
5961     shlxq(tmp2, tmp2, tmp1);
5962     notq(tmp2);
5963     kmovql(k3, tmp2);
5964 
5965     evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit);
5966     evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
5967 
5968     ktestql(k7, k3);
5969     jcc(Assembler::below, SAME_TILL_END);     // not mismatch
5970 
5971     bind(VECTOR64_NOT_EQUAL);
5972     kmovql(tmp1, k7);
5973     notq(tmp1);
5974     tzcntq(tmp1, tmp1);
5975     addq(result, tmp1);
5976     shrq(result);
5977     jmp(DONE);
5978     bind(VECTOR32_TAIL);
5979   }
5980 
5981   cmpq(length, 8);
5982   jcc(Assembler::equal, VECTOR8_LOOP);
5983   jcc(Assembler::less, VECTOR4_TAIL);
5984 
5985   if (UseAVX >= 2) {


7384     evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit);
7385 
7386     testl(len, -64);
7387     jcc(Assembler::zero, post_alignment);
7388 
7389     movl(tmp5, dst);
7390     andl(tmp5, (32 - 1));
7391     negl(tmp5);
7392     andl(tmp5, (32 - 1));
7393 
7394     // bail out when there is nothing to be done
7395     testl(tmp5, 0xFFFFFFFF);
7396     jcc(Assembler::zero, post_alignment);
7397 
7398     // ~(~0 << len), where len is the # of remaining elements to process
7399     movl(result, 0xFFFFFFFF);
7400     shlxl(result, result, tmp5);
7401     notl(result);
7402     kmovdl(k3, result);
7403 
7404     evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
7405     evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7406     ktestd(k2, k3);
7407     jcc(Assembler::carryClear, return_zero);
7408 
7409     evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
7410 
7411     addptr(src, tmp5);
7412     addptr(src, tmp5);
7413     addptr(dst, tmp5);
7414     subl(len, tmp5);
7415 
7416     bind(post_alignment);
7417     // end of alignment
7418 
7419     movl(tmp5, len);
7420     andl(tmp5, (32 - 1));    // tail count (in chars)
7421     andl(len, ~(32 - 1));    // vector count (in chars)
7422     jcc(Assembler::zero, copy_loop_tail);
7423 
7424     lea(src, Address(src, len, Address::times_2));
7425     lea(dst, Address(dst, len, Address::times_1));
7426     negptr(len);
7427 
7428     bind(copy_32_loop);
7429     evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit);
7430     evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7431     kortestdl(k2, k2);
7432     jcc(Assembler::carryClear, return_zero);
7433 
7434     // All elements in current processed chunk are valid candidates for
7435     // compression. Write a truncated byte elements to the memory.
7436     evpmovwb(Address(dst, len, Address::times_1), tmp1Reg, Assembler::AVX_512bit);
7437     addptr(len, 32);
7438     jcc(Assembler::notZero, copy_32_loop);
7439 
7440     bind(copy_loop_tail);
7441     // bail out when there is nothing to be done
7442     testl(tmp5, 0xFFFFFFFF);
7443     jcc(Assembler::zero, return_length);
7444 
7445     movl(len, tmp5);
7446 
7447     // ~(~0 << len), where len is the # of remaining elements to process
7448     movl(result, 0xFFFFFFFF);
7449     shlxl(result, result, len);
7450     notl(result);
7451 
7452     kmovdl(k3, result);
7453 
7454     evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
7455     evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7456     ktestd(k2, k3);
7457     jcc(Assembler::carryClear, return_zero);
7458 
7459     evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
7460     jmp(return_length);
7461 
7462     bind(below_threshold);
7463   }
7464 
7465   if (UseSSE42Intrinsics) {
7466     Label copy_32_loop, copy_16, copy_tail;
7467 
7468     movl(result, len);
7469 
7470     movl(tmp5, 0xff00ff00);   // create mask to test for Unicode chars in vectors
7471 
7472     // vectored compression
7473     andl(len, 0xfffffff0);    // vector count (in chars)
7474     andl(result, 0x0000000f);    // tail count (in chars)


7579     testl(len, -16);
7580     jcc(Assembler::zero, below_threshold);
7581 
7582     testl(len, -1 * AVX3Threshold);
7583     jcc(Assembler::zero, avx3_threshold);
7584 
7585     // In order to use only one arithmetic operation for the main loop we use
7586     // this pre-calculation
7587     andl(tmp2, (32 - 1)); // tail count (in chars), 32 element wide loop
7588     andl(len, -32);     // vector count
7589     jccb(Assembler::zero, copy_tail);
7590 
7591     lea(src, Address(src, len, Address::times_1));
7592     lea(dst, Address(dst, len, Address::times_2));
7593     negptr(len);
7594 
7595 
7596     // inflate 32 chars per iter
7597     bind(copy_32_loop);
7598     vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
7599     evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit);
7600     addptr(len, 32);
7601     jcc(Assembler::notZero, copy_32_loop);
7602 
7603     bind(copy_tail);
7604     // bail out when there is nothing to be done
7605     testl(tmp2, -1); // we don't destroy the contents of tmp2 here
7606     jcc(Assembler::zero, done);
7607 
7608     // ~(~0 << length), where length is the # of remaining elements to process
7609     movl(tmp3_aliased, -1);
7610     shlxl(tmp3_aliased, tmp3_aliased, tmp2);
7611     notl(tmp3_aliased);
7612     kmovdl(k2, tmp3_aliased);
7613     evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
7614     evmovdquw(Address(dst, 0), k2, tmp1, /*merge*/ true, Assembler::AVX_512bit);
7615 
7616     jmp(done);
7617     bind(avx3_threshold);
7618   }
7619   if (UseSSE42Intrinsics) {
7620     Label copy_16_loop, copy_8_loop, copy_bytes, copy_new_tail, copy_tail;
7621 
7622     if (UseAVX > 1) {
7623       andl(tmp2, (16 - 1));
7624       andl(len, -16);
7625       jccb(Assembler::zero, copy_new_tail);
7626     } else {
7627       andl(tmp2, 0x00000007);   // tail count (in chars)
7628       andl(len, 0xfffffff8);    // vector count (in chars)
7629       jccb(Assembler::zero, copy_tail);
7630     }
7631 
7632     // vectored inflation
7633     lea(src, Address(src, len, Address::times_1));
7634     lea(dst, Address(dst, len, Address::times_2));


< prev index next >