src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File 8143208 Sdiff src/cpu/x86/vm

src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page




6958   xorptr(tmp, tmp);
6959   if (UseFastStosb) {
6960     shlptr(cnt,3); // convert to number of bytes
6961     rep_stosb();
6962   } else {
6963     NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM
6964     rep_stos();
6965   }
6966 }
6967 
6968 #ifdef COMPILER2
6969 
6970 // IndexOf for constant substrings with size >= 8 chars
6971 // which don't need to be loaded through stack.
6972 void MacroAssembler::string_indexofC8(Register str1, Register str2,
6973                                       Register cnt1, Register cnt2,
6974                                       int int_cnt2,  Register result,
6975                                       XMMRegister vec, Register tmp,
6976                                       int ae) {
6977   ShortBranchVerifier sbv(this);
6978   assert(UseSSE42Intrinsics, "SSE4.2 is required");

6979   assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
6980 
6981   // This method uses the pcmpestri instruction with bound registers
6982   //   inputs:
6983   //     xmm - substring
6984   //     rax - substring length (elements count)
6985   //     mem - scanned string
6986   //     rdx - string length (elements count)
6987   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
6988   //     0xc - mode: 1100 (substring search) + 00 (unsigned bytes)
6989   //   outputs:
6990   //     rcx - matched index in string
6991   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
6992   int mode   = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts
6993   int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
6994   Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2;
6995   Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1;
6996 
6997   Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
6998         RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,


7136   } // (int_cnt2 > 8)
7137 
7138   bind(RET_FOUND);
7139   // Found result if we matched full small substring.
7140   // Compute substr offset
7141   subptr(result, str1);
7142   if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
7143     shrl(result, 1); // index
7144   }
7145   bind(EXIT);
7146 
7147 } // string_indexofC8
7148 
7149 // Small strings are loaded through stack if they cross page boundary.
7150 void MacroAssembler::string_indexof(Register str1, Register str2,
7151                                     Register cnt1, Register cnt2,
7152                                     int int_cnt2,  Register result,
7153                                     XMMRegister vec, Register tmp,
7154                                     int ae) {
7155   ShortBranchVerifier sbv(this);
7156   assert(UseSSE42Intrinsics, "SSE4.2 is required");

7157   assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
7158 
7159   //
7160   // int_cnt2 is length of small (< 8 chars) constant substring
7161   // or (-1) for non constant substring in which case its length
7162   // is in cnt2 register.
7163   //
7164   // Note, inline_string_indexOf() generates checks:
7165   // if (substr.count > string.count) return -1;
7166   // if (substr.count == 0) return 0;
7167   //
7168   int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
7169   assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < stride), "should be != 0");
7170   // This method uses the pcmpestri instruction with bound registers
7171   //   inputs:
7172   //     xmm - substring
7173   //     rax - substring length (elements count)
7174   //     mem - scanned string
7175   //     rdx - string length (elements count)
7176   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)


7453     jmpb(SCAN_SUBSTR);
7454 
7455     bind(RET_FOUND_LONG);
7456     movptr(str1, Address(rsp, wordSize));
7457   } // non constant
7458 
7459   bind(RET_FOUND);
7460   // Compute substr offset
7461   subptr(result, str1);
7462   if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
7463     shrl(result, 1); // index
7464   }
7465   bind(CLEANUP);
7466   pop(rsp); // restore SP
7467 
7468 } // string_indexof
7469 
7470 void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
7471                                          XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
7472   ShortBranchVerifier sbv(this);
7473   assert(UseSSE42Intrinsics, "SSE4.2 is required");

7474 
7475   int stride = 8;
7476 
7477   Label FOUND_CHAR, SCAN_TO_CHAR, SCAN_TO_CHAR_LOOP,
7478         SCAN_TO_8_CHAR, SCAN_TO_8_CHAR_LOOP, SCAN_TO_16_CHAR_LOOP,
7479         RET_NOT_FOUND, SCAN_TO_8_CHAR_INIT,
7480         FOUND_SEQ_CHAR, DONE_LABEL;
7481 
7482   movptr(result, str1);
7483   if (UseAVX >= 2) {
7484     cmpl(cnt1, stride);
7485     jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
7486     cmpl(cnt1, 2*stride);
7487     jccb(Assembler::less, SCAN_TO_8_CHAR_INIT);
7488     movdl(vec1, ch);
7489     vpbroadcastw(vec1, vec1);
7490     vpxor(vec2, vec2);
7491     movl(tmp, cnt1);
7492     andl(tmp, 0xFFFFFFF0);  //vector count (in chars)
7493     andl(cnt1,0x0000000F);  //tail count (in chars)
7494 
7495     bind(SCAN_TO_16_CHAR_LOOP);
7496     vmovdqu(vec3, Address(result, 0));
7497     vpcmpeqw(vec3, vec3, vec1, 1);
7498     vptest(vec2, vec3);
7499     jcc(Assembler::carryClear, FOUND_CHAR);
7500     addptr(result, 32);
7501     subl(tmp, 2*stride);
7502     jccb(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);
7503     jmp(SCAN_TO_8_CHAR);
7504     bind(SCAN_TO_8_CHAR_INIT);
7505     movdl(vec1, ch);
7506     pshuflw(vec1, vec1, 0x00);
7507     pshufd(vec1, vec1, 0);
7508     pxor(vec2, vec2);
7509   }
7510   if (UseAVX >= 2 || UseSSE42Intrinsics) {
7511     bind(SCAN_TO_8_CHAR);
7512     cmpl(cnt1, stride);
7513     if (UseAVX >= 2) {
7514       jccb(Assembler::less, SCAN_TO_CHAR);
7515     }
7516     if (!(UseAVX >= 2)) {
7517       jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
7518       movdl(vec1, ch);
7519       pshuflw(vec1, vec1, 0x00);
7520       pshufd(vec1, vec1, 0);
7521       pxor(vec2, vec2);
7522     }
7523     movl(tmp, cnt1);
7524     andl(tmp, 0xFFFFFFF8);  //vector count (in chars)
7525     andl(cnt1,0x00000007);  //tail count (in chars)
7526 
7527     bind(SCAN_TO_8_CHAR_LOOP);
7528     movdqu(vec3, Address(result, 0));
7529     pcmpeqw(vec3, vec1);
7530     ptest(vec2, vec3);
7531     jcc(Assembler::carryClear, FOUND_CHAR);
7532     addptr(result, 16);
7533     subl(tmp, stride);
7534     jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP);
7535   }
7536   bind(SCAN_TO_CHAR);
7537   testl(cnt1, cnt1);
7538   jcc(Assembler::zero, RET_NOT_FOUND);
7539 
7540   bind(SCAN_TO_CHAR_LOOP);
7541   load_unsigned_short(tmp, Address(result, 0));
7542   cmpl(ch, tmp);
7543   jccb(Assembler::equal, FOUND_SEQ_CHAR);
7544   addptr(result, 2);
7545   subl(cnt1, 1);
7546   jccb(Assembler::zero, RET_NOT_FOUND);
7547   jmp(SCAN_TO_CHAR_LOOP);
7548 
7549   bind(RET_NOT_FOUND);
7550   movl(result, -1);
7551   jmpb(DONE_LABEL);
7552 
7553   if (UseAVX >= 2 || UseSSE42Intrinsics) {
7554     bind(FOUND_CHAR);
7555     if (UseAVX >= 2) {
7556       vpmovmskb(tmp, vec3);
7557     } else {
7558       pmovmskb(tmp, vec3);
7559     }
7560     bsfl(ch, tmp);
7561     addl(result, ch);
7562   }
7563 
7564   bind(FOUND_SEQ_CHAR);
7565   subptr(result, str1);
7566   shrl(result, 1);
7567 
7568   bind(DONE_LABEL);
7569 } // string_indexof_char
7570 
7571 // helper function for string_compare
7572 void MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
7573                                         Address::ScaleFactor scale, Address::ScaleFactor scale1,
7574                                         Address::ScaleFactor scale2, Register index, int ae) {
7575   if (ae == StrIntrinsicNode::LL) {
7576     load_unsigned_byte(elem1, Address(str1, index, scale, 0));
7577     load_unsigned_byte(elem2, Address(str2, index, scale, 0));
7578   } else if (ae == StrIntrinsicNode::UU) {
7579     load_unsigned_short(elem1, Address(str1, index, scale, 0));
7580     load_unsigned_short(elem2, Address(str2, index, scale, 0));
7581   } else {
7582     load_unsigned_byte(elem1, Address(str1, index, scale1, 0));


7631 
7632   // Check if the strings start at the same location and setup scale and stride
7633   if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7634     cmpptr(str1, str2);
7635     jcc(Assembler::equal, LENGTH_DIFF_LABEL);
7636     if (ae == StrIntrinsicNode::LL) {
7637       scale = Address::times_1;
7638       stride = 16;
7639     } else {
7640       scale = Address::times_2;
7641       stride = 8;
7642     }
7643   } else {
7644     scale = Address::no_scale;  // not used
7645     scale1 = Address::times_1;
7646     scale2 = Address::times_2;
7647     stride = 8;
7648   }
7649 
7650   if (UseAVX >= 2 && UseSSE42Intrinsics) {

7651     Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
7652     Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
7653     Label COMPARE_TAIL_LONG;
7654     int pcmpmask = 0x19;
7655     if (ae == StrIntrinsicNode::LL) {
7656       pcmpmask &= ~0x01;
7657     }
7658 
7659     // Setup to compare 16-chars (32-bytes) vectors,
7660     // start from first character again because it has aligned address.
7661     if (ae == StrIntrinsicNode::LL) {
7662       stride2 = 32;
7663     } else {
7664       stride2 = 16;
7665     }
7666     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7667       adr_stride = stride << scale;
7668     } else {
7669       adr_stride1 = 8;  //stride << scale1;
7670       adr_stride2 = 16; //stride << scale2;


7766       movdqu(vec1, Address(str1, 0));
7767     } else {
7768       pmovzxbw(vec1, Address(str1, 0));
7769     }
7770     pcmpestri(vec1, Address(str2, 0), pcmpmask);
7771     jcc(Assembler::below, COMPARE_INDEX_CHAR);
7772     subptr(cnt2, stride);
7773     jccb(Assembler::zero, LENGTH_DIFF_LABEL);
7774     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7775       lea(str1, Address(str1, result, scale));
7776       lea(str2, Address(str2, result, scale));
7777     } else {
7778       lea(str1, Address(str1, result, scale1));
7779       lea(str2, Address(str2, result, scale2));
7780     }
7781     negptr(cnt2);
7782     jmpb(WHILE_HEAD_LABEL);
7783 
7784     bind(COMPARE_SMALL_STR);
7785   } else if (UseSSE42Intrinsics) {

7786     Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
7787     int pcmpmask = 0x19;
7788     // Setup to compare 8-char (16-byte) vectors,
7789     // start from first character again because it has aligned address.
7790     movl(result, cnt2);
7791     andl(cnt2, ~(stride - 1));   // cnt2 holds the vector count
7792     if (ae == StrIntrinsicNode::LL) {
7793       pcmpmask &= ~0x01;
7794     }
7795     jccb(Assembler::zero, COMPARE_TAIL);
7796     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7797       lea(str1, Address(str1, result, scale));
7798       lea(str2, Address(str2, result, scale));
7799     } else {
7800       lea(str1, Address(str1, result, scale1));
7801       lea(str2, Address(str2, result, scale2));
7802     }
7803     negptr(result);
7804 
7805     // pcmpestri


7898 // Search for Non-ASCII character (Negative byte value) in a byte array,
7899 // return true if it has any and false otherwise.
7900 void MacroAssembler::has_negatives(Register ary1, Register len,
7901                                    Register result, Register tmp1,
7902                                    XMMRegister vec1, XMMRegister vec2) {
7903 
7904   // rsi: byte array
7905   // rcx: len
7906   // rax: result
7907   ShortBranchVerifier sbv(this);
7908   assert_different_registers(ary1, len, result, tmp1);
7909   assert_different_registers(vec1, vec2);
7910   Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_CHAR, COMPARE_VECTORS, COMPARE_BYTE;
7911 
7912   // len == 0
7913   testl(len, len);
7914   jcc(Assembler::zero, FALSE_LABEL);
7915 
7916   movl(result, len); // copy
7917 
7918   if (UseAVX >= 2) {

7919     // With AVX2, use 32-byte vector compare
7920     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
7921 
7922     // Compare 32-byte vectors
7923     andl(result, 0x0000001f);  //   tail count (in bytes)
7924     andl(len, 0xffffffe0);   // vector count (in bytes)
7925     jccb(Assembler::zero, COMPARE_TAIL);
7926 
7927     lea(ary1, Address(ary1, len, Address::times_1));
7928     negptr(len);
7929 
7930     movl(tmp1, 0x80808080);   // create mask to test for Unicode chars in vector
7931     movdl(vec2, tmp1);
7932     vpbroadcastd(vec2, vec2);
7933 
7934     bind(COMPARE_WIDE_VECTORS);
7935     vmovdqu(vec1, Address(ary1, len, Address::times_1));
7936     vptest(vec1, vec2);
7937     jccb(Assembler::notZero, TRUE_LABEL);
7938     addptr(len, 32);
7939     jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
7940 
7941     testl(result, result);
7942     jccb(Assembler::zero, FALSE_LABEL);
7943 
7944     vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
7945     vptest(vec1, vec2);
7946     jccb(Assembler::notZero, TRUE_LABEL);
7947     jmpb(FALSE_LABEL);
7948 
7949     bind(COMPARE_TAIL); // len is zero
7950     movl(len, result);
7951     // Fallthru to tail compare
7952   } else if (UseSSE42Intrinsics) {

7953     // With SSE4.2, use double quad vector compare
7954     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
7955 
7956     // Compare 16-byte vectors
7957     andl(result, 0x0000000f);  //   tail count (in bytes)
7958     andl(len, 0xfffffff0);   // vector count (in bytes)
7959     jccb(Assembler::zero, COMPARE_TAIL);
7960 
7961     lea(ary1, Address(ary1, len, Address::times_1));
7962     negptr(len);
7963 
7964     movl(tmp1, 0x80808080);
7965     movdl(vec2, tmp1);
7966     pshufd(vec2, vec2, 0);
7967 
7968     bind(COMPARE_WIDE_VECTORS);
7969     movdqu(vec1, Address(ary1, len, Address::times_1));
7970     ptest(vec1, vec2);
7971     jccb(Assembler::notZero, TRUE_LABEL);
7972     addptr(len, 16);


8009   subptr(result, 2);
8010   lea(ary1, Address(ary1, 2));
8011 
8012   bind(COMPARE_BYTE);
8013   testl(result, 0x1);   // tail  byte
8014   jccb(Assembler::zero, FALSE_LABEL);
8015   load_unsigned_byte(tmp1, Address(ary1, 0));
8016   andl(tmp1, 0x00000080);
8017   jccb(Assembler::notEqual, TRUE_LABEL);
8018   jmpb(FALSE_LABEL);
8019 
8020   bind(TRUE_LABEL);
8021   movl(result, 1);   // return true
8022   jmpb(DONE);
8023 
8024   bind(FALSE_LABEL);
8025   xorl(result, result); // return false
8026 
8027   // That's it
8028   bind(DONE);
8029   if (UseAVX >= 2) {
8030     // clean upper bits of YMM registers
8031     vpxor(vec1, vec1);
8032     vpxor(vec2, vec2);
8033   }
8034 }
8035 
8036 // Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
8037 void MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
8038                                    Register limit, Register result, Register chr,
8039                                    XMMRegister vec1, XMMRegister vec2, bool is_char) {
8040   ShortBranchVerifier sbv(this);
8041   Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE;
8042 
8043   int length_offset  = arrayOopDesc::length_offset_in_bytes();
8044   int base_offset    = arrayOopDesc::base_offset_in_bytes(is_char ? T_CHAR : T_BYTE);
8045 
8046   if (is_array_equ) {
8047     // Check the input args
8048     cmpptr(ary1, ary2);
8049     jcc(Assembler::equal, TRUE_LABEL);


8097     vptest(vec1, vec1);
8098     jccb(Assembler::notZero, FALSE_LABEL);
8099     addptr(limit, 32);
8100     jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
8101 
8102     testl(result, result);
8103     jccb(Assembler::zero, TRUE_LABEL);
8104 
8105     vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
8106     vmovdqu(vec2, Address(ary2, result, Address::times_1, -32));
8107     vpxor(vec1, vec2);
8108 
8109     vptest(vec1, vec1);
8110     jccb(Assembler::notZero, FALSE_LABEL);
8111     jmpb(TRUE_LABEL);
8112 
8113     bind(COMPARE_TAIL); // limit is zero
8114     movl(limit, result);
8115     // Fallthru to tail compare
8116   } else if (UseSSE42Intrinsics) {

8117     // With SSE4.2, use double quad vector compare
8118     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
8119 
8120     // Compare 16-byte vectors
8121     andl(result, 0x0000000f);  //   tail count (in bytes)
8122     andl(limit, 0xfffffff0);   // vector count (in bytes)
8123     jccb(Assembler::zero, COMPARE_TAIL);
8124 
8125     lea(ary1, Address(ary1, limit, Address::times_1));
8126     lea(ary2, Address(ary2, limit, Address::times_1));
8127     negptr(limit);
8128 
8129     bind(COMPARE_WIDE_VECTORS);
8130     movdqu(vec1, Address(ary1, limit, Address::times_1));
8131     movdqu(vec2, Address(ary2, limit, Address::times_1));
8132     pxor(vec1, vec2);
8133 
8134     ptest(vec1, vec1);
8135     jccb(Assembler::notZero, FALSE_LABEL);
8136     addptr(limit, 16);


8446   // rdx: len
8447   // rcx: tmp5
8448   // rax: result
8449   ShortBranchVerifier sbv(this);
8450   assert_different_registers(src, dst, len, tmp5, result);
8451   Label L_done, L_copy_1_char, L_copy_1_char_exit;
8452 
8453   // set result
8454   xorl(result, result);
8455   // check for zero length
8456   testl(len, len);
8457   jcc(Assembler::zero, L_done);
8458   movl(result, len);
8459 
8460   // Setup pointers
8461   lea(src, Address(src, len, Address::times_2)); // char[]
8462   lea(dst, Address(dst, len, Address::times_1)); // byte[]
8463   negptr(len);
8464 
8465   if (UseSSE42Intrinsics || UseAVX >= 2) {

8466     Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit;
8467     Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit;
8468 
8469     if (UseAVX >= 2) {
8470       Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit;
8471       movl(tmp5, 0xff00ff00);   // create mask to test for Unicode chars in vector
8472       movdl(tmp1Reg, tmp5);
8473       vpbroadcastd(tmp1Reg, tmp1Reg);
8474       jmpb(L_chars_32_check);
8475 
8476       bind(L_copy_32_chars);
8477       vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64));
8478       vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32));
8479       vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1);
8480       vptest(tmp2Reg, tmp1Reg);       // check for Unicode chars in  vector
8481       jccb(Assembler::notZero, L_copy_32_chars_exit);
8482       vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1);
8483       vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector_len */ 1);
8484       vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg);
8485 


10218                                          XMMRegister tmp3Reg, XMMRegister tmp4Reg,
10219                                          Register tmp5, Register result) {
10220   Label copy_chars_loop, return_length, return_zero, done;
10221 
10222   // rsi: src
10223   // rdi: dst
10224   // rdx: len
10225   // rcx: tmp5
10226   // rax: result
10227 
10228   // rsi holds start addr of source char[] to be compressed
10229   // rdi holds start addr of destination byte[]
10230   // rdx holds length
10231 
10232   assert(len != result, "");
10233 
10234   // save length for return
10235   push(len);
10236 
10237   if (UseSSE42Intrinsics) {

10238     Label copy_32_loop, copy_16, copy_tail;
10239 
10240     movl(result, len);
10241     movl(tmp5, 0xff00ff00);   // create mask to test for Unicode chars in vectors
10242 
10243     // vectored compression
10244     andl(len, 0xfffffff0);    // vector count (in chars)
10245     andl(result, 0x0000000f);    // tail count (in chars)
10246     testl(len, len);
10247     jccb(Assembler::zero, copy_16);
10248 
10249     // compress 16 chars per iter
10250     movdl(tmp1Reg, tmp5);
10251     pshufd(tmp1Reg, tmp1Reg, 0);   // store Unicode mask in tmp1Reg
10252     pxor(tmp4Reg, tmp4Reg);
10253 
10254     lea(src, Address(src, len, Address::times_2));
10255     lea(dst, Address(dst, len, Address::times_1));
10256     negptr(len);
10257 


10317 
10318   bind(done);
10319 }
10320 
10321 // Inflate byte[] array to char[].
10322 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
10323                                         XMMRegister tmp1, Register tmp2) {
10324   Label copy_chars_loop, done;
10325 
10326   // rsi: src
10327   // rdi: dst
10328   // rdx: len
10329   // rcx: tmp2
10330 
10331   // rsi holds start addr of source byte[] to be inflated
10332   // rdi holds start addr of destination char[]
10333   // rdx holds length
10334   assert_different_registers(src, dst, len, tmp2);
10335 
10336   if (UseSSE42Intrinsics) {

10337     Label copy_8_loop, copy_bytes, copy_tail;
10338 
10339     movl(tmp2, len);
10340     andl(tmp2, 0x00000007);   // tail count (in chars)
10341     andl(len, 0xfffffff8);    // vector count (in chars)
10342     jccb(Assembler::zero, copy_tail);
10343 
10344     // vectored inflation
10345     lea(src, Address(src, len, Address::times_1));
10346     lea(dst, Address(dst, len, Address::times_2));
10347     negptr(len);
10348 
10349     // inflate 8 chars per iter
10350     bind(copy_8_loop);
10351     pmovzxbw(tmp1, Address(src, len, Address::times_1));  // unpack to 8 words
10352     movdqu(Address(dst, len, Address::times_2), tmp1);
10353     addptr(len, 8);
10354     jcc(Assembler::notZero, copy_8_loop);
10355 
10356     bind(copy_tail);




6958   xorptr(tmp, tmp);
6959   if (UseFastStosb) {
6960     shlptr(cnt,3); // convert to number of bytes
6961     rep_stosb();
6962   } else {
6963     NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM
6964     rep_stos();
6965   }
6966 }
6967 
6968 #ifdef COMPILER2
6969 
6970 // IndexOf for constant substrings with size >= 8 chars
6971 // which don't need to be loaded through stack.
6972 void MacroAssembler::string_indexofC8(Register str1, Register str2,
6973                                       Register cnt1, Register cnt2,
6974                                       int int_cnt2,  Register result,
6975                                       XMMRegister vec, Register tmp,
6976                                       int ae) {
6977   ShortBranchVerifier sbv(this);
6978   assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required");
6979   assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
6980   assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
6981 
6982   // This method uses the pcmpestri instruction with bound registers
6983   //   inputs:
6984   //     xmm - substring
6985   //     rax - substring length (elements count)
6986   //     mem - scanned string
6987   //     rdx - string length (elements count)
6988   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
6989   //     0xc - mode: 1100 (substring search) + 00 (unsigned bytes)
6990   //   outputs:
6991   //     rcx - matched index in string
6992   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
6993   int mode   = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts
6994   int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
6995   Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2;
6996   Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1;
6997 
6998   Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
6999         RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,


7137   } // (int_cnt2 > 8)
7138 
7139   bind(RET_FOUND);
7140   // Found result if we matched full small substring.
7141   // Compute substr offset
7142   subptr(result, str1);
7143   if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
7144     shrl(result, 1); // index
7145   }
7146   bind(EXIT);
7147 
7148 } // string_indexofC8
7149 
7150 // Small strings are loaded through stack if they cross page boundary.
7151 void MacroAssembler::string_indexof(Register str1, Register str2,
7152                                     Register cnt1, Register cnt2,
7153                                     int int_cnt2,  Register result,
7154                                     XMMRegister vec, Register tmp,
7155                                     int ae) {
7156   ShortBranchVerifier sbv(this);
7157   assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required");
7158   assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
7159   assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
7160 
7161   //
7162   // int_cnt2 is length of small (< 8 chars) constant substring
7163   // or (-1) for non constant substring in which case its length
7164   // is in cnt2 register.
7165   //
7166   // Note, inline_string_indexOf() generates checks:
7167   // if (substr.count > string.count) return -1;
7168   // if (substr.count == 0) return 0;
7169   //
7170   int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
7171   assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < stride), "should be != 0");
7172   // This method uses the pcmpestri instruction with bound registers
7173   //   inputs:
7174   //     xmm - substring
7175   //     rax - substring length (elements count)
7176   //     mem - scanned string
7177   //     rdx - string length (elements count)
7178   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)


7455     jmpb(SCAN_SUBSTR);
7456 
7457     bind(RET_FOUND_LONG);
7458     movptr(str1, Address(rsp, wordSize));
7459   } // non constant
7460 
7461   bind(RET_FOUND);
7462   // Compute substr offset
7463   subptr(result, str1);
7464   if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
7465     shrl(result, 1); // index
7466   }
7467   bind(CLEANUP);
7468   pop(rsp); // restore SP
7469 
7470 } // string_indexof
7471 
7472 void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
7473                                          XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
7474   ShortBranchVerifier sbv(this);
7475   assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required");
7476   assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
7477 
7478   int stride = 8;
7479 
7480   Label FOUND_CHAR, SCAN_TO_CHAR, SCAN_TO_CHAR_LOOP,
7481         SCAN_TO_8_CHAR, SCAN_TO_8_CHAR_LOOP, SCAN_TO_16_CHAR_LOOP,
7482         RET_NOT_FOUND, SCAN_TO_8_CHAR_INIT,
7483         FOUND_SEQ_CHAR, DONE_LABEL;
7484 
7485   movptr(result, str1);
7486   if (UseAVX >= 2) {
7487     cmpl(cnt1, stride);
7488     jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
7489     cmpl(cnt1, 2*stride);
7490     jccb(Assembler::less, SCAN_TO_8_CHAR_INIT);
7491     movdl(vec1, ch);
7492     vpbroadcastw(vec1, vec1);
7493     vpxor(vec2, vec2);
7494     movl(tmp, cnt1);
7495     andl(tmp, 0xFFFFFFF0);  //vector count (in chars)
7496     andl(cnt1,0x0000000F);  //tail count (in chars)
7497 
7498     bind(SCAN_TO_16_CHAR_LOOP);
7499     vmovdqu(vec3, Address(result, 0));
7500     vpcmpeqw(vec3, vec3, vec1, 1);
7501     vptest(vec2, vec3);
7502     jcc(Assembler::carryClear, FOUND_CHAR);
7503     addptr(result, 32);
7504     subl(tmp, 2*stride);
7505     jccb(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);
7506     jmp(SCAN_TO_8_CHAR);
7507     bind(SCAN_TO_8_CHAR_INIT);
7508     movdl(vec1, ch);
7509     pshuflw(vec1, vec1, 0x00);
7510     pshufd(vec1, vec1, 0);
7511     pxor(vec2, vec2);
7512   }

7513   bind(SCAN_TO_8_CHAR);
7514   cmpl(cnt1, stride);
7515   if (UseAVX >= 2) {
7516     jccb(Assembler::less, SCAN_TO_CHAR);
7517   } else {

7518     jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
7519     movdl(vec1, ch);
7520     pshuflw(vec1, vec1, 0x00);
7521     pshufd(vec1, vec1, 0);
7522     pxor(vec2, vec2);
7523   }
7524   movl(tmp, cnt1);
7525   andl(tmp, 0xFFFFFFF8);  //vector count (in chars)
7526   andl(cnt1,0x00000007);  //tail count (in chars)
7527 
7528   bind(SCAN_TO_8_CHAR_LOOP);
7529   movdqu(vec3, Address(result, 0));
7530   pcmpeqw(vec3, vec1);
7531   ptest(vec2, vec3);
7532   jcc(Assembler::carryClear, FOUND_CHAR);
7533   addptr(result, 16);
7534   subl(tmp, stride);
7535   jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP);

7536   bind(SCAN_TO_CHAR);
7537   testl(cnt1, cnt1);
7538   jcc(Assembler::zero, RET_NOT_FOUND);

7539   bind(SCAN_TO_CHAR_LOOP);
7540   load_unsigned_short(tmp, Address(result, 0));
7541   cmpl(ch, tmp);
7542   jccb(Assembler::equal, FOUND_SEQ_CHAR);
7543   addptr(result, 2);
7544   subl(cnt1, 1);
7545   jccb(Assembler::zero, RET_NOT_FOUND);
7546   jmp(SCAN_TO_CHAR_LOOP);
7547 
7548   bind(RET_NOT_FOUND);
7549   movl(result, -1);
7550   jmpb(DONE_LABEL);
7551 

7552   bind(FOUND_CHAR);
7553   if (UseAVX >= 2) {
7554     vpmovmskb(tmp, vec3);
7555   } else {
7556     pmovmskb(tmp, vec3);
7557   }
7558   bsfl(ch, tmp);
7559   addl(result, ch);

7560 
7561   bind(FOUND_SEQ_CHAR);
7562   subptr(result, str1);
7563   shrl(result, 1);
7564 
7565   bind(DONE_LABEL);
7566 } // string_indexof_char
7567 
7568 // helper function for string_compare
7569 void MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
7570                                         Address::ScaleFactor scale, Address::ScaleFactor scale1,
7571                                         Address::ScaleFactor scale2, Register index, int ae) {
7572   if (ae == StrIntrinsicNode::LL) {
7573     load_unsigned_byte(elem1, Address(str1, index, scale, 0));
7574     load_unsigned_byte(elem2, Address(str2, index, scale, 0));
7575   } else if (ae == StrIntrinsicNode::UU) {
7576     load_unsigned_short(elem1, Address(str1, index, scale, 0));
7577     load_unsigned_short(elem2, Address(str2, index, scale, 0));
7578   } else {
7579     load_unsigned_byte(elem1, Address(str1, index, scale1, 0));


7628 
7629   // Check if the strings start at the same location and setup scale and stride
7630   if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7631     cmpptr(str1, str2);
7632     jcc(Assembler::equal, LENGTH_DIFF_LABEL);
7633     if (ae == StrIntrinsicNode::LL) {
7634       scale = Address::times_1;
7635       stride = 16;
7636     } else {
7637       scale = Address::times_2;
7638       stride = 8;
7639     }
7640   } else {
7641     scale = Address::no_scale;  // not used
7642     scale1 = Address::times_1;
7643     scale2 = Address::times_2;
7644     stride = 8;
7645   }
7646 
7647   if (UseAVX >= 2 && UseSSE42Intrinsics) {
7648     assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
7649     Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
7650     Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
7651     Label COMPARE_TAIL_LONG;
7652     int pcmpmask = 0x19;
7653     if (ae == StrIntrinsicNode::LL) {
7654       pcmpmask &= ~0x01;
7655     }
7656 
7657     // Setup to compare 16-chars (32-bytes) vectors,
7658     // start from first character again because it has aligned address.
7659     if (ae == StrIntrinsicNode::LL) {
7660       stride2 = 32;
7661     } else {
7662       stride2 = 16;
7663     }
7664     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7665       adr_stride = stride << scale;
7666     } else {
7667       adr_stride1 = 8;  //stride << scale1;
7668       adr_stride2 = 16; //stride << scale2;


7764       movdqu(vec1, Address(str1, 0));
7765     } else {
7766       pmovzxbw(vec1, Address(str1, 0));
7767     }
7768     pcmpestri(vec1, Address(str2, 0), pcmpmask);
7769     jcc(Assembler::below, COMPARE_INDEX_CHAR);
7770     subptr(cnt2, stride);
7771     jccb(Assembler::zero, LENGTH_DIFF_LABEL);
7772     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7773       lea(str1, Address(str1, result, scale));
7774       lea(str2, Address(str2, result, scale));
7775     } else {
7776       lea(str1, Address(str1, result, scale1));
7777       lea(str2, Address(str2, result, scale2));
7778     }
7779     negptr(cnt2);
7780     jmpb(WHILE_HEAD_LABEL);
7781 
7782     bind(COMPARE_SMALL_STR);
7783   } else if (UseSSE42Intrinsics) {
7784     assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
7785     Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
7786     int pcmpmask = 0x19;
7787     // Setup to compare 8-char (16-byte) vectors,
7788     // start from first character again because it has aligned address.
7789     movl(result, cnt2);
7790     andl(cnt2, ~(stride - 1));   // cnt2 holds the vector count
7791     if (ae == StrIntrinsicNode::LL) {
7792       pcmpmask &= ~0x01;
7793     }
7794     jccb(Assembler::zero, COMPARE_TAIL);
7795     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7796       lea(str1, Address(str1, result, scale));
7797       lea(str2, Address(str2, result, scale));
7798     } else {
7799       lea(str1, Address(str1, result, scale1));
7800       lea(str2, Address(str2, result, scale2));
7801     }
7802     negptr(result);
7803 
7804     // pcmpestri


7897 // Search for Non-ASCII character (Negative byte value) in a byte array,
7898 // return true if it has any and false otherwise.
7899 void MacroAssembler::has_negatives(Register ary1, Register len,
7900                                    Register result, Register tmp1,
7901                                    XMMRegister vec1, XMMRegister vec2) {
7902 
7903   // rsi: byte array
7904   // rcx: len
7905   // rax: result
7906   ShortBranchVerifier sbv(this);
7907   assert_different_registers(ary1, len, result, tmp1);
7908   assert_different_registers(vec1, vec2);
7909   Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_CHAR, COMPARE_VECTORS, COMPARE_BYTE;
7910 
7911   // len == 0
7912   testl(len, len);
7913   jcc(Assembler::zero, FALSE_LABEL);
7914 
7915   movl(result, len); // copy
7916 
7917   if (UseAVX >= 2 && UseSSE42Intrinsics) {
7918     assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
7919     // With AVX2, use 32-byte vector compare
7920     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
7921 
7922     // Compare 32-byte vectors
7923     andl(result, 0x0000001f);  //   tail count (in bytes)
7924     andl(len, 0xffffffe0);   // vector count (in bytes)
7925     jccb(Assembler::zero, COMPARE_TAIL);
7926 
7927     lea(ary1, Address(ary1, len, Address::times_1));
7928     negptr(len);
7929 
7930     movl(tmp1, 0x80808080);   // create mask to test for Unicode chars in vector
7931     movdl(vec2, tmp1);
7932     vpbroadcastd(vec2, vec2);
7933 
7934     bind(COMPARE_WIDE_VECTORS);
7935     vmovdqu(vec1, Address(ary1, len, Address::times_1));
7936     vptest(vec1, vec2);
7937     jccb(Assembler::notZero, TRUE_LABEL);
7938     addptr(len, 32);
7939     jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
7940 
7941     testl(result, result);
7942     jccb(Assembler::zero, FALSE_LABEL);
7943 
7944     vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
7945     vptest(vec1, vec2);
7946     jccb(Assembler::notZero, TRUE_LABEL);
7947     jmpb(FALSE_LABEL);
7948 
7949     bind(COMPARE_TAIL); // len is zero
7950     movl(len, result);
7951     // Fallthru to tail compare
7952   } else if (UseSSE42Intrinsics) {
7953     assert(UseSSE >= 4, "SSE4 must be  for SSE4.2 intrinsics to be available");
7954     // With SSE4.2, use double quad vector compare
7955     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
7956 
7957     // Compare 16-byte vectors
7958     andl(result, 0x0000000f);  //   tail count (in bytes)
7959     andl(len, 0xfffffff0);   // vector count (in bytes)
7960     jccb(Assembler::zero, COMPARE_TAIL);
7961 
7962     lea(ary1, Address(ary1, len, Address::times_1));
7963     negptr(len);
7964 
7965     movl(tmp1, 0x80808080);
7966     movdl(vec2, tmp1);
7967     pshufd(vec2, vec2, 0);
7968 
7969     bind(COMPARE_WIDE_VECTORS);
7970     movdqu(vec1, Address(ary1, len, Address::times_1));
7971     ptest(vec1, vec2);
7972     jccb(Assembler::notZero, TRUE_LABEL);
7973     addptr(len, 16);


8010   subptr(result, 2);
8011   lea(ary1, Address(ary1, 2));
8012 
8013   bind(COMPARE_BYTE);
8014   testl(result, 0x1);   // tail  byte
8015   jccb(Assembler::zero, FALSE_LABEL);
8016   load_unsigned_byte(tmp1, Address(ary1, 0));
8017   andl(tmp1, 0x00000080);
8018   jccb(Assembler::notEqual, TRUE_LABEL);
8019   jmpb(FALSE_LABEL);
8020 
8021   bind(TRUE_LABEL);
8022   movl(result, 1);   // return true
8023   jmpb(DONE);
8024 
8025   bind(FALSE_LABEL);
8026   xorl(result, result); // return false
8027 
8028   // That's it
8029   bind(DONE);
8030   if (UseAVX >= 2 && UseSSE42Intrinsics) {
8031     // clean upper bits of YMM registers
8032     vpxor(vec1, vec1);
8033     vpxor(vec2, vec2);
8034   }
8035 }
8036 
8037 // Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
8038 void MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
8039                                    Register limit, Register result, Register chr,
8040                                    XMMRegister vec1, XMMRegister vec2, bool is_char) {
8041   ShortBranchVerifier sbv(this);
8042   Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE;
8043 
8044   int length_offset  = arrayOopDesc::length_offset_in_bytes();
8045   int base_offset    = arrayOopDesc::base_offset_in_bytes(is_char ? T_CHAR : T_BYTE);
8046 
8047   if (is_array_equ) {
8048     // Check the input args
8049     cmpptr(ary1, ary2);
8050     jcc(Assembler::equal, TRUE_LABEL);


8098     vptest(vec1, vec1);
8099     jccb(Assembler::notZero, FALSE_LABEL);
8100     addptr(limit, 32);
8101     jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
8102 
8103     testl(result, result);
8104     jccb(Assembler::zero, TRUE_LABEL);
8105 
8106     vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
8107     vmovdqu(vec2, Address(ary2, result, Address::times_1, -32));
8108     vpxor(vec1, vec2);
8109 
8110     vptest(vec1, vec1);
8111     jccb(Assembler::notZero, FALSE_LABEL);
8112     jmpb(TRUE_LABEL);
8113 
8114     bind(COMPARE_TAIL); // limit is zero
8115     movl(limit, result);
8116     // Fallthru to tail compare
8117   } else if (UseSSE42Intrinsics) {
8118     assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
8119     // With SSE4.2, use double quad vector compare
8120     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
8121 
8122     // Compare 16-byte vectors
8123     andl(result, 0x0000000f);  //   tail count (in bytes)
8124     andl(limit, 0xfffffff0);   // vector count (in bytes)
8125     jccb(Assembler::zero, COMPARE_TAIL);
8126 
8127     lea(ary1, Address(ary1, limit, Address::times_1));
8128     lea(ary2, Address(ary2, limit, Address::times_1));
8129     negptr(limit);
8130 
8131     bind(COMPARE_WIDE_VECTORS);
8132     movdqu(vec1, Address(ary1, limit, Address::times_1));
8133     movdqu(vec2, Address(ary2, limit, Address::times_1));
8134     pxor(vec1, vec2);
8135 
8136     ptest(vec1, vec1);
8137     jccb(Assembler::notZero, FALSE_LABEL);
8138     addptr(limit, 16);


8448   // rdx: len
8449   // rcx: tmp5
8450   // rax: result
8451   ShortBranchVerifier sbv(this);
8452   assert_different_registers(src, dst, len, tmp5, result);
8453   Label L_done, L_copy_1_char, L_copy_1_char_exit;
8454 
8455   // set result
8456   xorl(result, result);
8457   // check for zero length
8458   testl(len, len);
8459   jcc(Assembler::zero, L_done);
8460   movl(result, len);
8461 
8462   // Setup pointers
8463   lea(src, Address(src, len, Address::times_2)); // char[]
8464   lea(dst, Address(dst, len, Address::times_1)); // byte[]
8465   negptr(len);
8466 
8467   if (UseSSE42Intrinsics || UseAVX >= 2) {
8468     assert(UseSSE42Intrinsics ? UseSSE >= 4 : true, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
8469     Label L_chars_8_check, L_copy_8_chars, L_copy_8_chars_exit;
8470     Label L_chars_16_check, L_copy_16_chars, L_copy_16_chars_exit;
8471 
8472     if (UseAVX >= 2) {
8473       Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit;
8474       movl(tmp5, 0xff00ff00);   // create mask to test for Unicode chars in vector
8475       movdl(tmp1Reg, tmp5);
8476       vpbroadcastd(tmp1Reg, tmp1Reg);
8477       jmpb(L_chars_32_check);
8478 
8479       bind(L_copy_32_chars);
8480       vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64));
8481       vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32));
8482       vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1);
8483       vptest(tmp2Reg, tmp1Reg);       // check for Unicode chars in  vector
8484       jccb(Assembler::notZero, L_copy_32_chars_exit);
8485       vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1);
8486       vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector_len */ 1);
8487       vmovdqu(Address(dst, len, Address::times_1, -32), tmp4Reg);
8488 


10221                                          XMMRegister tmp3Reg, XMMRegister tmp4Reg,
10222                                          Register tmp5, Register result) {
10223   Label copy_chars_loop, return_length, return_zero, done;
10224 
10225   // rsi: src
10226   // rdi: dst
10227   // rdx: len
10228   // rcx: tmp5
10229   // rax: result
10230 
10231   // rsi holds start addr of source char[] to be compressed
10232   // rdi holds start addr of destination byte[]
10233   // rdx holds length
10234 
10235   assert(len != result, "");
10236 
10237   // save length for return
10238   push(len);
10239 
10240   if (UseSSE42Intrinsics) {
10241     assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
10242     Label copy_32_loop, copy_16, copy_tail;
10243 
10244     movl(result, len);
10245     movl(tmp5, 0xff00ff00);   // create mask to test for Unicode chars in vectors
10246 
10247     // vectored compression
10248     andl(len, 0xfffffff0);    // vector count (in chars)
10249     andl(result, 0x0000000f);    // tail count (in chars)
10250     testl(len, len);
10251     jccb(Assembler::zero, copy_16);
10252 
10253     // compress 16 chars per iter
10254     movdl(tmp1Reg, tmp5);
10255     pshufd(tmp1Reg, tmp1Reg, 0);   // store Unicode mask in tmp1Reg
10256     pxor(tmp4Reg, tmp4Reg);
10257 
10258     lea(src, Address(src, len, Address::times_2));
10259     lea(dst, Address(dst, len, Address::times_1));
10260     negptr(len);
10261 


10321 
10322   bind(done);
10323 }
10324 
10325 // Inflate byte[] array to char[].
10326 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
10327                                         XMMRegister tmp1, Register tmp2) {
10328   Label copy_chars_loop, done;
10329 
10330   // rsi: src
10331   // rdi: dst
10332   // rdx: len
10333   // rcx: tmp2
10334 
10335   // rsi holds start addr of source byte[] to be inflated
10336   // rdi holds start addr of destination char[]
10337   // rdx holds length
10338   assert_different_registers(src, dst, len, tmp2);
10339 
10340   if (UseSSE42Intrinsics) {
10341     assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
10342     Label copy_8_loop, copy_bytes, copy_tail;
10343 
10344     movl(tmp2, len);
10345     andl(tmp2, 0x00000007);   // tail count (in chars)
10346     andl(len, 0xfffffff8);    // vector count (in chars)
10347     jccb(Assembler::zero, copy_tail);
10348 
10349     // vectored inflation
10350     lea(src, Address(src, len, Address::times_1));
10351     lea(dst, Address(dst, len, Address::times_2));
10352     negptr(len);
10353 
10354     // inflate 8 chars per iter
10355     bind(copy_8_loop);
10356     pmovzxbw(tmp1, Address(src, len, Address::times_1));  // unpack to 8 words
10357     movdqu(Address(dst, len, Address::times_2), tmp1);
10358     addptr(len, 8);
10359     jcc(Assembler::notZero, copy_8_loop);
10360 
10361     bind(copy_tail);


src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File