< prev index next >

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Print this page
rev 61868 : manual merge with default


  95 // 32bit versions
  96 
  97 Address MacroAssembler::as_Address(AddressLiteral adr) {
  98   return Address(adr.target(), adr.rspec());
  99 }
 100 
 101 Address MacroAssembler::as_Address(ArrayAddress adr) {
 102   return Address::make_array(adr);
 103 }
 104 
 105 void MacroAssembler::call_VM_leaf_base(address entry_point,
 106                                        int number_of_arguments) {
 107   call(RuntimeAddress(entry_point));
 108   increment(rsp, number_of_arguments * wordSize);
 109 }
 110 
 111 void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
 112   cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 113 }
 114 

 115 void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
 116   cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 117 }
 118 
 119 void MacroAssembler::cmpoop_raw(Address src1, jobject obj) {
 120   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
 121 }
 122 
 123 void MacroAssembler::cmpoop_raw(Register src1, jobject obj) {
 124   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
 125 }
 126 
 127 void MacroAssembler::cmpoop(Address src1, jobject obj) {
 128   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 129   bs->obj_equals(this, src1, obj);
 130 }
 131 
 132 void MacroAssembler::cmpoop(Register src1, jobject obj) {
 133   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 134   bs->obj_equals(this, src1, obj);


2485 void MacroAssembler::movptr(Register dst, intptr_t src) {
2486   LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
2487 }
2488 
2489 void MacroAssembler::movptr(Address dst, Register src) {
2490   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2491 }
2492 
2493 void MacroAssembler::movdqu(Address dst, XMMRegister src) {
2494     assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2495     Assembler::movdqu(dst, src);
2496 }
2497 
2498 void MacroAssembler::movdqu(XMMRegister dst, Address src) {
2499     assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2500     Assembler::movdqu(dst, src);
2501 }
2502 
2503 void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
2504     assert(((dst->encoding() < 16  && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");

2505     Assembler::movdqu(dst, src);
2506 }
2507 
2508 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) {
2509   if (reachable(src)) {
2510     movdqu(dst, as_Address(src));
2511   } else {
2512     lea(scratchReg, src);
2513     movdqu(dst, Address(scratchReg, 0));
2514   }
2515 }
2516 
2517 void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
2518     assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2519     Assembler::vmovdqu(dst, src);
2520 }
2521 
2522 void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
2523     assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2524     Assembler::vmovdqu(dst, src);
2525 }
2526 
2527 void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2528     assert(((dst->encoding() < 16  && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");

2529     Assembler::vmovdqu(dst, src);
2530 }
2531 
2532 void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
2533   if (reachable(src)) {
2534     vmovdqu(dst, as_Address(src));
2535   }
2536   else {
2537     lea(scratch_reg, src);
2538     vmovdqu(dst, Address(scratch_reg, 0));
2539   }
2540 }
2541 


























































2542 void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
2543   if (reachable(src)) {
2544     Assembler::evmovdquq(dst, as_Address(src), vector_len);
2545   } else {
2546     lea(rscratch, src);
2547     Assembler::evmovdquq(dst, Address(rscratch, 0), vector_len);
2548   }
2549 }
2550 
2551 void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) {
2552   if (reachable(src)) {
2553     Assembler::movdqa(dst, as_Address(src));
2554   } else {
2555     lea(rscratch1, src);
2556     Assembler::movdqa(dst, Address(rscratch1, 0));
2557   }
2558 }
2559 
2560 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
2561   if (reachable(src)) {


3008     lea(scratch_reg, src);
3009     Assembler::vpand(dst, nds, Address(scratch_reg, 0), vector_len);
3010   }
3011 }
3012 
3013 void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
3014   assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3015   Assembler::vpbroadcastw(dst, src, vector_len);
3016 }
3017 
3018 void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3019   assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3020   Assembler::vpcmpeqb(dst, nds, src, vector_len);
3021 }
3022 
3023 void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3024   assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3025   Assembler::vpcmpeqw(dst, nds, src, vector_len);
3026 }
3027 




























































































3028 void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
3029   assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3030   Assembler::vpmovzxbw(dst, src, vector_len);
3031 }
3032 
3033 void MacroAssembler::vpmovmskb(Register dst, XMMRegister src) {
3034   assert((src->encoding() < 16),"XMM register should be 0-15");
3035   Assembler::vpmovmskb(dst, src);
3036 }
3037 
3038 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3039   assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3040   Assembler::vpmullw(dst, nds, src, vector_len);
3041 }
3042 
3043 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3044   assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3045   Assembler::vpmullw(dst, nds, src, vector_len);
3046 }
3047 


3132 }
3133 
3134 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3135   if (reachable(src)) {
3136     vandpd(dst, nds, as_Address(src), vector_len);
3137   } else {
3138     lea(scratch_reg, src);
3139     vandpd(dst, nds, Address(scratch_reg, 0), vector_len);
3140   }
3141 }
3142 
3143 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3144   if (reachable(src)) {
3145     vandps(dst, nds, as_Address(src), vector_len);
3146   } else {
3147     lea(scratch_reg, src);
3148     vandps(dst, nds, Address(scratch_reg, 0), vector_len);
3149   }
3150 }
3151 










3152 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3153   if (reachable(src)) {
3154     vdivsd(dst, nds, as_Address(src));
3155   } else {
3156     lea(rscratch1, src);
3157     vdivsd(dst, nds, Address(rscratch1, 0));
3158   }
3159 }
3160 
3161 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3162   if (reachable(src)) {
3163     vdivss(dst, nds, as_Address(src));
3164   } else {
3165     lea(rscratch1, src);
3166     vdivss(dst, nds, Address(rscratch1, 0));
3167   }
3168 }
3169 
3170 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3171   if (reachable(src)) {


3228   } else {
3229     lea(scratch_reg, src);
3230     vxorps(dst, nds, Address(scratch_reg, 0), vector_len);
3231   }
3232 }
3233 
3234 void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3235   if (UseAVX > 1 || (vector_len < 1)) {
3236     if (reachable(src)) {
3237       Assembler::vpxor(dst, nds, as_Address(src), vector_len);
3238     } else {
3239       lea(scratch_reg, src);
3240       Assembler::vpxor(dst, nds, Address(scratch_reg, 0), vector_len);
3241     }
3242   }
3243   else {
3244     MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg);
3245   }
3246 }
3247 
3248 //-------------------------------------------------------------------------------------------







3249 
3250 void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
3251   const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
3252   STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
3253   // The inverted mask is sign-extended
3254   andptr(possibly_jweak, inverted_jweak_mask);
3255 }
3256 
3257 void MacroAssembler::resolve_jobject(Register value,
3258                                      Register thread,
3259                                      Register tmp) {
3260   assert_different_registers(value, thread, tmp);
3261   Label done, not_weak;
3262   testptr(value, value);
3263   jcc(Assembler::zero, done);                // Use NULL as-is.
3264   testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag.
3265   jcc(Assembler::zero, not_weak);
3266   // Resolve jweak.
3267   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
3268                  value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);


5754 
5755   //scale is in rcx in both Win64 and Unix
5756   ShortBranchVerifier sbv(this);
5757 
5758   shlq(length);
5759   xorq(result, result);
5760 
5761   if ((AVX3Threshold == 0) && (UseAVX > 2) &&
5762       VM_Version::supports_avx512vlbw()) {
5763     Label VECTOR64_LOOP, VECTOR64_NOT_EQUAL, VECTOR32_TAIL;
5764 
5765     cmpq(length, 64);
5766     jcc(Assembler::less, VECTOR32_TAIL);
5767 
5768     movq(tmp1, length);
5769     andq(tmp1, 0x3F);      // tail count
5770     andq(length, ~(0x3F)); //vector count
5771 
5772     bind(VECTOR64_LOOP);
5773     // AVX512 code to compare 64 byte vectors.
5774     evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit);
5775     evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
5776     kortestql(k7, k7);
5777     jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL);     // mismatch
5778     addq(result, 64);
5779     subq(length, 64);
5780     jccb(Assembler::notZero, VECTOR64_LOOP);
5781 
5782     //bind(VECTOR64_TAIL);
5783     testq(tmp1, tmp1);
5784     jcc(Assembler::zero, SAME_TILL_END);
5785 
5786     //bind(VECTOR64_TAIL);
5787     // AVX512 code to compare upto 63 byte vectors.
5788     mov64(tmp2, 0xFFFFFFFFFFFFFFFF);
5789     shlxq(tmp2, tmp2, tmp1);
5790     notq(tmp2);
5791     kmovql(k3, tmp2);
5792 
5793     evmovdqub(rymm0, k3, Address(obja, result), Assembler::AVX_512bit);
5794     evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
5795 
5796     ktestql(k7, k3);
5797     jcc(Assembler::below, SAME_TILL_END);     // not mismatch
5798 
5799     bind(VECTOR64_NOT_EQUAL);
5800     kmovql(tmp1, k7);
5801     notq(tmp1);
5802     tzcntq(tmp1, tmp1);
5803     addq(result, tmp1);
5804     shrq(result);
5805     jmp(DONE);
5806     bind(VECTOR32_TAIL);
5807   }
5808 
5809   cmpq(length, 8);
5810   jcc(Assembler::equal, VECTOR8_LOOP);
5811   jcc(Assembler::less, VECTOR4_TAIL);
5812 
5813   if (UseAVX >= 2) {


7568     evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit);
7569 
7570     testl(len, -64);
7571     jcc(Assembler::zero, post_alignment);
7572 
7573     movl(tmp5, dst);
7574     andl(tmp5, (32 - 1));
7575     negl(tmp5);
7576     andl(tmp5, (32 - 1));
7577 
7578     // bail out when there is nothing to be done
7579     testl(tmp5, 0xFFFFFFFF);
7580     jcc(Assembler::zero, post_alignment);
7581 
7582     // ~(~0 << len), where len is the # of remaining elements to process
7583     movl(result, 0xFFFFFFFF);
7584     shlxl(result, result, tmp5);
7585     notl(result);
7586     kmovdl(k3, result);
7587 
7588     evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
7589     evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7590     ktestd(k2, k3);
7591     jcc(Assembler::carryClear, return_zero);
7592 
7593     evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
7594 
7595     addptr(src, tmp5);
7596     addptr(src, tmp5);
7597     addptr(dst, tmp5);
7598     subl(len, tmp5);
7599 
7600     bind(post_alignment);
7601     // end of alignment
7602 
7603     movl(tmp5, len);
7604     andl(tmp5, (32 - 1));    // tail count (in chars)
7605     andl(len, ~(32 - 1));    // vector count (in chars)
7606     jcc(Assembler::zero, copy_loop_tail);
7607 
7608     lea(src, Address(src, len, Address::times_2));
7609     lea(dst, Address(dst, len, Address::times_1));
7610     negptr(len);
7611 
7612     bind(copy_32_loop);
7613     evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
7614     evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7615     kortestdl(k2, k2);
7616     jcc(Assembler::carryClear, return_zero);
7617 
7618     // All elements in current processed chunk are valid candidates for
7619     // compression. Write a truncated byte elements to the memory.
7620     evpmovwb(Address(dst, len, Address::times_1), tmp1Reg, Assembler::AVX_512bit);
7621     addptr(len, 32);
7622     jcc(Assembler::notZero, copy_32_loop);
7623 
7624     bind(copy_loop_tail);
7625     // bail out when there is nothing to be done
7626     testl(tmp5, 0xFFFFFFFF);
7627     jcc(Assembler::zero, return_length);
7628 
7629     movl(len, tmp5);
7630 
7631     // ~(~0 << len), where len is the # of remaining elements to process
7632     movl(result, 0xFFFFFFFF);
7633     shlxl(result, result, len);
7634     notl(result);
7635 
7636     kmovdl(k3, result);
7637 
7638     evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
7639     evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7640     ktestd(k2, k3);
7641     jcc(Assembler::carryClear, return_zero);
7642 
7643     evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
7644     jmp(return_length);
7645 
7646     bind(below_threshold);
7647   }
7648 
7649   if (UseSSE42Intrinsics) {
7650     Label copy_32_loop, copy_16, copy_tail;
7651 
7652     movl(result, len);
7653 
7654     movl(tmp5, 0xff00ff00);   // create mask to test for Unicode chars in vectors
7655 
7656     // vectored compression
7657     andl(len, 0xfffffff0);    // vector count (in chars)
7658     andl(result, 0x0000000f);    // tail count (in chars)


7763     testl(len, -16);
7764     jcc(Assembler::zero, below_threshold);
7765 
7766     testl(len, -1 * AVX3Threshold);
7767     jcc(Assembler::zero, avx3_threshold);
7768 
7769     // In order to use only one arithmetic operation for the main loop we use
7770     // this pre-calculation
7771     andl(tmp2, (32 - 1)); // tail count (in chars), 32 element wide loop
7772     andl(len, -32);     // vector count
7773     jccb(Assembler::zero, copy_tail);
7774 
7775     lea(src, Address(src, len, Address::times_1));
7776     lea(dst, Address(dst, len, Address::times_2));
7777     negptr(len);
7778 
7779 
7780     // inflate 32 chars per iter
7781     bind(copy_32_loop);
7782     vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
7783     evmovdquw(Address(dst, len, Address::times_2), tmp1, Assembler::AVX_512bit);
7784     addptr(len, 32);
7785     jcc(Assembler::notZero, copy_32_loop);
7786 
7787     bind(copy_tail);
7788     // bail out when there is nothing to be done
7789     testl(tmp2, -1); // we don't destroy the contents of tmp2 here
7790     jcc(Assembler::zero, done);
7791 
7792     // ~(~0 << length), where length is the # of remaining elements to process
7793     movl(tmp3_aliased, -1);
7794     shlxl(tmp3_aliased, tmp3_aliased, tmp2);
7795     notl(tmp3_aliased);
7796     kmovdl(k2, tmp3_aliased);
7797     evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
7798     evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit);
7799 
7800     jmp(done);
7801     bind(avx3_threshold);
7802   }
7803   if (UseSSE42Intrinsics) {
7804     Label copy_16_loop, copy_8_loop, copy_bytes, copy_new_tail, copy_tail;
7805 
7806     if (UseAVX > 1) {
7807       andl(tmp2, (16 - 1));
7808       andl(len, -16);
7809       jccb(Assembler::zero, copy_new_tail);
7810     } else {
7811       andl(tmp2, 0x00000007);   // tail count (in chars)
7812       andl(len, 0xfffffff8);    // vector count (in chars)
7813       jccb(Assembler::zero, copy_tail);
7814     }
7815 
7816     // vectored inflation
7817     lea(src, Address(src, len, Address::times_1));
7818     lea(dst, Address(dst, len, Address::times_2));




  95 // 32bit versions
  96 
  97 Address MacroAssembler::as_Address(AddressLiteral adr) {
  98   return Address(adr.target(), adr.rspec());
  99 }
 100 
 101 Address MacroAssembler::as_Address(ArrayAddress adr) {
 102   return Address::make_array(adr);
 103 }
 104 
 105 void MacroAssembler::call_VM_leaf_base(address entry_point,
 106                                        int number_of_arguments) {
 107   call(RuntimeAddress(entry_point));
 108   increment(rsp, number_of_arguments * wordSize);
 109 }
 110 
 111 void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
 112   cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 113 }
 114 
 115 
 116 void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
 117   cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
 118 }
 119 
 120 void MacroAssembler::cmpoop_raw(Address src1, jobject obj) {
 121   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
 122 }
 123 
 124 void MacroAssembler::cmpoop_raw(Register src1, jobject obj) {
 125   cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate());
 126 }
 127 
 128 void MacroAssembler::cmpoop(Address src1, jobject obj) {
 129   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 130   bs->obj_equals(this, src1, obj);
 131 }
 132 
 133 void MacroAssembler::cmpoop(Register src1, jobject obj) {
 134   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 135   bs->obj_equals(this, src1, obj);


2486 void MacroAssembler::movptr(Register dst, intptr_t src) {
2487   LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src));
2488 }
2489 
2490 void MacroAssembler::movptr(Address dst, Register src) {
2491   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2492 }
2493 
2494 void MacroAssembler::movdqu(Address dst, XMMRegister src) {
2495     assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2496     Assembler::movdqu(dst, src);
2497 }
2498 
2499 void MacroAssembler::movdqu(XMMRegister dst, Address src) {
2500     assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2501     Assembler::movdqu(dst, src);
2502 }
2503 
2504 void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
2505     assert(((dst->encoding() < 16  && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2506     if (dst->encoding() == src->encoding()) return;
2507     Assembler::movdqu(dst, src);
2508 }
2509 
2510 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) {
2511   if (reachable(src)) {
2512     movdqu(dst, as_Address(src));
2513   } else {
2514     lea(scratchReg, src);
2515     movdqu(dst, Address(scratchReg, 0));
2516   }
2517 }
2518 
2519 void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
2520     assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2521     Assembler::vmovdqu(dst, src);
2522 }
2523 
2524 void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
2525     assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2526     Assembler::vmovdqu(dst, src);
2527 }
2528 
2529 void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2530     assert(((dst->encoding() < 16  && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
2531     if (dst->encoding() == src->encoding()) return;
2532     Assembler::vmovdqu(dst, src);
2533 }
2534 
2535 void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
2536   if (reachable(src)) {
2537     vmovdqu(dst, as_Address(src));
2538   }
2539   else {
2540     lea(scratch_reg, src);
2541     vmovdqu(dst, Address(scratch_reg, 0));
2542   }
2543 }
2544 
2545 
2546 void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) {
2547   if (reachable(src)) {
2548     kmovwl(dst, as_Address(src));
2549   } else {
2550     lea(scratch_reg, src);
2551     kmovwl(dst, Address(scratch_reg, 0));
2552   }
2553 }
2554 
2555 void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2556                                int vector_len, Register scratch_reg) {
2557   if (reachable(src)) {
2558     if (mask == k0) {
2559       Assembler::evmovdqub(dst, as_Address(src), merge, vector_len);
2560     } else {
2561       Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len);
2562     }
2563   } else {
2564     lea(scratch_reg, src);
2565     if (mask == k0) {
2566       Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len);
2567     } else {
2568       Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2569     }
2570   }
2571 }
2572 
2573 void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2574                                int vector_len, Register scratch_reg) {
2575   if (reachable(src)) {
2576     Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len);
2577   } else {
2578     lea(scratch_reg, src);
2579     Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2580   }
2581 }
2582 
2583 void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2584                                int vector_len, Register scratch_reg) {
2585   if (reachable(src)) {
2586     Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len);
2587   } else {
2588     lea(scratch_reg, src);
2589     Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2590   }
2591 }
2592 
2593 void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
2594                                int vector_len, Register scratch_reg) {
2595   if (reachable(src)) {
2596     Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len);
2597   } else {
2598     lea(scratch_reg, src);
2599     Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len);
2600   }
2601 }
2602 
2603 void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
2604   if (reachable(src)) {
2605     Assembler::evmovdquq(dst, as_Address(src), vector_len);
2606   } else {
2607     lea(rscratch, src);
2608     Assembler::evmovdquq(dst, Address(rscratch, 0), vector_len);
2609   }
2610 }
2611 
2612 void MacroAssembler::movdqa(XMMRegister dst, AddressLiteral src) {
2613   if (reachable(src)) {
2614     Assembler::movdqa(dst, as_Address(src));
2615   } else {
2616     lea(rscratch1, src);
2617     Assembler::movdqa(dst, Address(rscratch1, 0));
2618   }
2619 }
2620 
2621 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
2622   if (reachable(src)) {


3069     lea(scratch_reg, src);
3070     Assembler::vpand(dst, nds, Address(scratch_reg, 0), vector_len);
3071   }
3072 }
3073 
3074 void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
3075   assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3076   Assembler::vpbroadcastw(dst, src, vector_len);
3077 }
3078 
3079 void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3080   assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3081   Assembler::vpcmpeqb(dst, nds, src, vector_len);
3082 }
3083 
3084 void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3085   assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3086   Assembler::vpcmpeqw(dst, nds, src, vector_len);
3087 }
3088 
3089 void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds,
3090                                AddressLiteral src, int vector_len, Register scratch_reg) {
3091   if (reachable(src)) {
3092     Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len);
3093   } else {
3094     lea(scratch_reg, src);
3095     Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len);
3096   }
3097 }
3098 
3099 void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3100                              int comparison, int vector_len, Register scratch_reg) {
3101   if (reachable(src)) {
3102     Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, vector_len);
3103   } else {
3104     lea(scratch_reg, src);
3105     Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
3106   }
3107 }
3108 
3109 void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3110                              int comparison, int vector_len, Register scratch_reg) {
3111   if (reachable(src)) {
3112     Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, vector_len);
3113   } else {
3114     lea(scratch_reg, src);
3115     Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
3116   }
3117 }
3118 
3119 void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3120                              int comparison, int vector_len, Register scratch_reg) {
3121   if (reachable(src)) {
3122     Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, vector_len);
3123   } else {
3124     lea(scratch_reg, src);
3125     Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
3126   }
3127 }
3128 
3129 void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
3130                              int comparison, int vector_len, Register scratch_reg) {
3131   if (reachable(src)) {
3132     Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, vector_len);
3133   } else {
3134     lea(scratch_reg, src);
3135     Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
3136   }
3137 }
3138 
3139 void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) {
3140   if (width == Assembler::Q) {
3141     Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len);
3142   } else {
3143     Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len);
3144   }
3145 }
3146 
3147 void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) {
3148   int eq_cond_enc = 0x29;
3149   int gt_cond_enc = 0x37;
3150   if (width != Assembler::Q) {
3151     eq_cond_enc = 0x74 + width;
3152     gt_cond_enc = 0x64 + width;
3153   }
3154   switch (cond) {
3155   case eq:
3156     vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
3157     break;
3158   case neq:
3159     vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
3160     vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
3161     break;
3162   case le:
3163     vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
3164     vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
3165     break;
3166   case nlt:
3167     vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
3168     vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
3169     break;
3170   case lt:
3171     vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
3172     break;
3173   case nle:
3174     vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
3175     break;
3176   default:
3177     assert(false, "Should not reach here");
3178   }
3179 }
3180 
3181 void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
3182   assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3183   Assembler::vpmovzxbw(dst, src, vector_len);
3184 }
3185 
3186 void MacroAssembler::vpmovmskb(Register dst, XMMRegister src) {
3187   assert((src->encoding() < 16),"XMM register should be 0-15");
3188   Assembler::vpmovmskb(dst, src);
3189 }
3190 
3191 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3192   assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3193   Assembler::vpmullw(dst, nds, src, vector_len);
3194 }
3195 
3196 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
3197   assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
3198   Assembler::vpmullw(dst, nds, src, vector_len);
3199 }
3200 


3285 }
3286 
3287 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3288   if (reachable(src)) {
3289     vandpd(dst, nds, as_Address(src), vector_len);
3290   } else {
3291     lea(scratch_reg, src);
3292     vandpd(dst, nds, Address(scratch_reg, 0), vector_len);
3293   }
3294 }
3295 
3296 void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3297   if (reachable(src)) {
3298     vandps(dst, nds, as_Address(src), vector_len);
3299   } else {
3300     lea(scratch_reg, src);
3301     vandps(dst, nds, Address(scratch_reg, 0), vector_len);
3302   }
3303 }
3304 
3305 void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src,
3306                             bool merge, int vector_len, Register scratch_reg) {
3307   if (reachable(src)) {
3308     Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len);
3309   } else {
3310     lea(scratch_reg, src);
3311     Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len);
3312   }
3313 }
3314 
3315 void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3316   if (reachable(src)) {
3317     vdivsd(dst, nds, as_Address(src));
3318   } else {
3319     lea(rscratch1, src);
3320     vdivsd(dst, nds, Address(rscratch1, 0));
3321   }
3322 }
3323 
3324 void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3325   if (reachable(src)) {
3326     vdivss(dst, nds, as_Address(src));
3327   } else {
3328     lea(rscratch1, src);
3329     vdivss(dst, nds, Address(rscratch1, 0));
3330   }
3331 }
3332 
3333 void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
3334   if (reachable(src)) {


3391   } else {
3392     lea(scratch_reg, src);
3393     vxorps(dst, nds, Address(scratch_reg, 0), vector_len);
3394   }
3395 }
3396 
3397 void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3398   if (UseAVX > 1 || (vector_len < 1)) {
3399     if (reachable(src)) {
3400       Assembler::vpxor(dst, nds, as_Address(src), vector_len);
3401     } else {
3402       lea(scratch_reg, src);
3403       Assembler::vpxor(dst, nds, Address(scratch_reg, 0), vector_len);
3404     }
3405   }
3406   else {
3407     MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg);
3408   }
3409 }
3410 
3411 void MacroAssembler::vpermd(XMMRegister dst,  XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
3412   if (reachable(src)) {
3413     Assembler::vpermd(dst, nds, as_Address(src), vector_len);
3414   } else {
3415     lea(scratch_reg, src);
3416     Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len);
3417   }
3418 }
3419 
3420 void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
3421   const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
3422   STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
3423   // The inverted mask is sign-extended
3424   andptr(possibly_jweak, inverted_jweak_mask);
3425 }
3426 
3427 void MacroAssembler::resolve_jobject(Register value,
3428                                      Register thread,
3429                                      Register tmp) {
3430   assert_different_registers(value, thread, tmp);
3431   Label done, not_weak;
3432   testptr(value, value);
3433   jcc(Assembler::zero, done);                // Use NULL as-is.
3434   testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag.
3435   jcc(Assembler::zero, not_weak);
3436   // Resolve jweak.
3437   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
3438                  value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);


5924 
5925   //scale is in rcx in both Win64 and Unix
5926   ShortBranchVerifier sbv(this);
5927 
5928   shlq(length);
5929   xorq(result, result);
5930 
5931   if ((AVX3Threshold == 0) && (UseAVX > 2) &&
5932       VM_Version::supports_avx512vlbw()) {
5933     Label VECTOR64_LOOP, VECTOR64_NOT_EQUAL, VECTOR32_TAIL;
5934 
5935     cmpq(length, 64);
5936     jcc(Assembler::less, VECTOR32_TAIL);
5937 
5938     movq(tmp1, length);
5939     andq(tmp1, 0x3F);      // tail count
5940     andq(length, ~(0x3F)); //vector count
5941 
5942     bind(VECTOR64_LOOP);
5943     // AVX512 code to compare 64 byte vectors.
5944     evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit);
5945     evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
5946     kortestql(k7, k7);
5947     jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL);     // mismatch
5948     addq(result, 64);
5949     subq(length, 64);
5950     jccb(Assembler::notZero, VECTOR64_LOOP);
5951 
5952     //bind(VECTOR64_TAIL);
5953     testq(tmp1, tmp1);
5954     jcc(Assembler::zero, SAME_TILL_END);
5955 
5956     //bind(VECTOR64_TAIL);
5957     // AVX512 code to compare upto 63 byte vectors.
5958     mov64(tmp2, 0xFFFFFFFFFFFFFFFF);
5959     shlxq(tmp2, tmp2, tmp1);
5960     notq(tmp2);
5961     kmovql(k3, tmp2);
5962 
5963     evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit);
5964     evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
5965 
5966     ktestql(k7, k3);
5967     jcc(Assembler::below, SAME_TILL_END);     // not mismatch
5968 
5969     bind(VECTOR64_NOT_EQUAL);
5970     kmovql(tmp1, k7);
5971     notq(tmp1);
5972     tzcntq(tmp1, tmp1);
5973     addq(result, tmp1);
5974     shrq(result);
5975     jmp(DONE);
5976     bind(VECTOR32_TAIL);
5977   }
5978 
5979   cmpq(length, 8);
5980   jcc(Assembler::equal, VECTOR8_LOOP);
5981   jcc(Assembler::less, VECTOR4_TAIL);
5982 
5983   if (UseAVX >= 2) {


7738     evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit);
7739 
7740     testl(len, -64);
7741     jcc(Assembler::zero, post_alignment);
7742 
7743     movl(tmp5, dst);
7744     andl(tmp5, (32 - 1));
7745     negl(tmp5);
7746     andl(tmp5, (32 - 1));
7747 
7748     // bail out when there is nothing to be done
7749     testl(tmp5, 0xFFFFFFFF);
7750     jcc(Assembler::zero, post_alignment);
7751 
7752     // ~(~0 << len), where len is the # of remaining elements to process
7753     movl(result, 0xFFFFFFFF);
7754     shlxl(result, result, tmp5);
7755     notl(result);
7756     kmovdl(k3, result);
7757 
7758     evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
7759     evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7760     ktestd(k2, k3);
7761     jcc(Assembler::carryClear, return_zero);
7762 
7763     evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
7764 
7765     addptr(src, tmp5);
7766     addptr(src, tmp5);
7767     addptr(dst, tmp5);
7768     subl(len, tmp5);
7769 
7770     bind(post_alignment);
7771     // end of alignment
7772 
7773     movl(tmp5, len);
7774     andl(tmp5, (32 - 1));    // tail count (in chars)
7775     andl(len, ~(32 - 1));    // vector count (in chars)
7776     jcc(Assembler::zero, copy_loop_tail);
7777 
7778     lea(src, Address(src, len, Address::times_2));
7779     lea(dst, Address(dst, len, Address::times_1));
7780     negptr(len);
7781 
7782     bind(copy_32_loop);
7783     evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit);
7784     evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7785     kortestdl(k2, k2);
7786     jcc(Assembler::carryClear, return_zero);
7787 
7788     // All elements in current processed chunk are valid candidates for
7789     // compression. Write a truncated byte elements to the memory.
7790     evpmovwb(Address(dst, len, Address::times_1), tmp1Reg, Assembler::AVX_512bit);
7791     addptr(len, 32);
7792     jcc(Assembler::notZero, copy_32_loop);
7793 
7794     bind(copy_loop_tail);
7795     // bail out when there is nothing to be done
7796     testl(tmp5, 0xFFFFFFFF);
7797     jcc(Assembler::zero, return_length);
7798 
7799     movl(len, tmp5);
7800 
7801     // ~(~0 << len), where len is the # of remaining elements to process
7802     movl(result, 0xFFFFFFFF);
7803     shlxl(result, result, len);
7804     notl(result);
7805 
7806     kmovdl(k3, result);
7807 
7808     evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
7809     evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
7810     ktestd(k2, k3);
7811     jcc(Assembler::carryClear, return_zero);
7812 
7813     evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
7814     jmp(return_length);
7815 
7816     bind(below_threshold);
7817   }
7818 
7819   if (UseSSE42Intrinsics) {
7820     Label copy_32_loop, copy_16, copy_tail;
7821 
7822     movl(result, len);
7823 
7824     movl(tmp5, 0xff00ff00);   // create mask to test for Unicode chars in vectors
7825 
7826     // vectored compression
7827     andl(len, 0xfffffff0);    // vector count (in chars)
7828     andl(result, 0x0000000f);    // tail count (in chars)


7933     testl(len, -16);
7934     jcc(Assembler::zero, below_threshold);
7935 
7936     testl(len, -1 * AVX3Threshold);
7937     jcc(Assembler::zero, avx3_threshold);
7938 
7939     // In order to use only one arithmetic operation for the main loop we use
7940     // this pre-calculation
7941     andl(tmp2, (32 - 1)); // tail count (in chars), 32 element wide loop
7942     andl(len, -32);     // vector count
7943     jccb(Assembler::zero, copy_tail);
7944 
7945     lea(src, Address(src, len, Address::times_1));
7946     lea(dst, Address(dst, len, Address::times_2));
7947     negptr(len);
7948 
7949 
7950     // inflate 32 chars per iter
7951     bind(copy_32_loop);
7952     vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
7953     evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit);
7954     addptr(len, 32);
7955     jcc(Assembler::notZero, copy_32_loop);
7956 
7957     bind(copy_tail);
7958     // bail out when there is nothing to be done
7959     testl(tmp2, -1); // we don't destroy the contents of tmp2 here
7960     jcc(Assembler::zero, done);
7961 
7962     // ~(~0 << length), where length is the # of remaining elements to process
7963     movl(tmp3_aliased, -1);
7964     shlxl(tmp3_aliased, tmp3_aliased, tmp2);
7965     notl(tmp3_aliased);
7966     kmovdl(k2, tmp3_aliased);
7967     evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
7968     evmovdquw(Address(dst, 0), k2, tmp1, /*merge*/ true, Assembler::AVX_512bit);
7969 
7970     jmp(done);
7971     bind(avx3_threshold);
7972   }
7973   if (UseSSE42Intrinsics) {
7974     Label copy_16_loop, copy_8_loop, copy_bytes, copy_new_tail, copy_tail;
7975 
7976     if (UseAVX > 1) {
7977       andl(tmp2, (16 - 1));
7978       andl(len, -16);
7979       jccb(Assembler::zero, copy_new_tail);
7980     } else {
7981       andl(tmp2, 0x00000007);   // tail count (in chars)
7982       andl(len, 0xfffffff8);    // vector count (in chars)
7983       jccb(Assembler::zero, copy_tail);
7984     }
7985 
7986     // vectored inflation
7987     lea(src, Address(src, len, Address::times_1));
7988     lea(dst, Address(dst, len, Address::times_2));


< prev index next >