< prev index next >

src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page




  29 #include "gc/shared/cardTableModRefBS.hpp"
  30 #include "gc/shared/collectedHeap.inline.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "memory/resourceArea.hpp"
  33 #include "memory/universe.hpp"
  34 #include "oops/klass.inline.hpp"
  35 #include "prims/methodHandles.hpp"
  36 #include "runtime/biasedLocking.hpp"
  37 #include "runtime/interfaceSupport.hpp"
  38 #include "runtime/objectMonitor.hpp"
  39 #include "runtime/os.hpp"
  40 #include "runtime/sharedRuntime.hpp"
  41 #include "runtime/stubRoutines.hpp"
  42 #include "utilities/macros.hpp"
  43 #if INCLUDE_ALL_GCS
  44 #include "gc/g1/g1CollectedHeap.inline.hpp"
  45 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  46 #include "gc/g1/heapRegion.hpp"
  47 #endif // INCLUDE_ALL_GCS
  48 #include "crc32c.h"



  49 
  50 #ifdef PRODUCT
  51 #define BLOCK_COMMENT(str) /* nothing */
  52 #define STOP(error) stop(error)
  53 #else
  54 #define BLOCK_COMMENT(str) block_comment(str)
  55 #define STOP(error) block_comment(error); stop(error)
  56 #endif
  57 
  58 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  59 
  60 #ifdef ASSERT
  61 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
  62 #endif
  63 
  64 static Assembler::Condition reverse[] = {
  65     Assembler::noOverflow     /* overflow      = 0x0 */ ,
  66     Assembler::overflow       /* noOverflow    = 0x1 */ ,
  67     Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
  68     Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,


6282 
6283 }
6284 
6285 void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) {
6286   // cnt - number of qwords (8-byte words).
6287   // base - start address, qword aligned.
6288   assert(base==rdi, "base register must be edi for rep stos");
6289   assert(tmp==rax,   "tmp register must be eax for rep stos");
6290   assert(cnt==rcx,   "cnt register must be ecx for rep stos");
6291 
6292   xorptr(tmp, tmp);
6293   if (UseFastStosb) {
6294     shlptr(cnt,3); // convert to number of bytes
6295     rep_stosb();
6296   } else {
6297     NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM
6298     rep_stos();
6299   }
6300 }
6301 


6302 // IndexOf for constant substrings with size >= 8 chars
6303 // which don't need to be loaded through stack.
6304 void MacroAssembler::string_indexofC8(Register str1, Register str2,
6305                                       Register cnt1, Register cnt2,
6306                                       int int_cnt2,  Register result,
6307                                       XMMRegister vec, Register tmp) {

6308   ShortBranchVerifier sbv(this);
6309   assert(UseSSE42Intrinsics, "SSE4.2 is required");

6310 
6311   // This method uses pcmpestri instruction with bound registers
6312   //   inputs:
6313   //     xmm - substring
6314   //     rax - substring length (elements count)
6315   //     mem - scanned string
6316   //     rdx - string length (elements count)
6317   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)

6318   //   outputs:
6319   //     rcx - matched index in string
6320   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");




6321 
6322   Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
6323         RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
6324         MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
6325 
6326   // Note, inline_string_indexOf() generates checks:
6327   // if (substr.count > string.count) return -1;
6328   // if (substr.count == 0) return 0;
6329   assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
6330 
6331   // Load substring.



6332   movdqu(vec, Address(str2, 0));

6333   movl(cnt2, int_cnt2);
6334   movptr(result, str1); // string addr
6335 
6336   if (int_cnt2 > 8) {
6337     jmpb(SCAN_TO_SUBSTR);
6338 
6339     // Reload substr for rescan, this code
6340     // is executed only for large substrings (> 8 chars)
6341     bind(RELOAD_SUBSTR);



6342     movdqu(vec, Address(str2, 0));

6343     negptr(cnt2); // Jumped here with negative cnt2, convert to positive
6344 
6345     bind(RELOAD_STR);
6346     // We came here after the beginning of the substring was
6347     // matched but the rest of it was not so we need to search
6348     // again. Start from the next element after the previous match.
6349 
6350     // cnt2 is number of substring reminding elements and
6351     // cnt1 is number of string reminding elements when cmp failed.
6352     // Restored cnt1 = cnt1 - cnt2 + int_cnt2
6353     subl(cnt1, cnt2);
6354     addl(cnt1, int_cnt2);
6355     movl(cnt2, int_cnt2); // Now restore cnt2
6356 
6357     decrementl(cnt1);     // Shift to next element
6358     cmpl(cnt1, cnt2);
6359     jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
6360 
6361     addptr(result, 2);
6362 
6363   } // (int_cnt2 > 8)
6364 
6365   // Scan string for start of substr in 16-byte vectors
6366   bind(SCAN_TO_SUBSTR);
6367   pcmpestri(vec, Address(result, 0), 0x0d);
6368   jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
6369   subl(cnt1, 8);
6370   jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
6371   cmpl(cnt1, cnt2);
6372   jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
6373   addptr(result, 16);
6374   jmpb(SCAN_TO_SUBSTR);
6375 
6376   // Found a potential substr
6377   bind(FOUND_CANDIDATE);
6378   // Matched whole vector if first element matched (tmp(rcx) == 0).
6379   if (int_cnt2 == 8) {
6380     jccb(Assembler::overflow, RET_FOUND);    // OF == 1
6381   } else { // int_cnt2 > 8
6382     jccb(Assembler::overflow, FOUND_SUBSTR);
6383   }
6384   // After pcmpestri tmp(rcx) contains matched element index
6385   // Compute start addr of substr
6386   lea(result, Address(result, tmp, Address::times_2));
6387 
6388   // Make sure string is still long enough
6389   subl(cnt1, tmp);
6390   cmpl(cnt1, cnt2);
6391   if (int_cnt2 == 8) {
6392     jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
6393   } else { // int_cnt2 > 8
6394     jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
6395   }
6396   // Left less then substring.
6397 
6398   bind(RET_NOT_FOUND);
6399   movl(result, -1);
6400   jmpb(EXIT);
6401 
6402   if (int_cnt2 > 8) {
6403     // This code is optimized for the case when whole substring
6404     // is matched if its head is matched.
6405     bind(MATCH_SUBSTR_HEAD);
6406     pcmpestri(vec, Address(result, 0), 0x0d);
6407     // Reload only string if does not match
6408     jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
6409 
6410     Label CONT_SCAN_SUBSTR;
6411     // Compare the rest of substring (> 8 chars).
6412     bind(FOUND_SUBSTR);
6413     // First 8 chars are already matched.
6414     negptr(cnt2);
6415     addptr(cnt2, 8);
6416 
6417     bind(SCAN_SUBSTR);
6418     subl(cnt1, 8);
6419     cmpl(cnt2, -8); // Do not read beyond substring
6420     jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
6421     // Back-up strings to avoid reading beyond substring:
6422     // cnt1 = cnt1 - cnt2 + 8
6423     addl(cnt1, cnt2); // cnt2 is negative
6424     addl(cnt1, 8);
6425     movl(cnt2, 8); negptr(cnt2);
6426     bind(CONT_SCAN_SUBSTR);
6427     if (int_cnt2 < (int)G) {
6428       movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
6429       pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);






6430     } else {
6431       // calculate index in register to avoid integer overflow (int_cnt2*2)
6432       movl(tmp, int_cnt2);
6433       addptr(tmp, cnt2);
6434       movdqu(vec, Address(str2, tmp, Address::times_2, 0));
6435       pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);




6436     }
6437     // Need to reload strings pointers if not matched whole vector
6438     jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
6439     addptr(cnt2, 8);
6440     jcc(Assembler::negative, SCAN_SUBSTR);
6441     // Fall through if found full substring
6442 
6443   } // (int_cnt2 > 8)
6444 
6445   bind(RET_FOUND);
6446   // Found result if we matched full small substring.
6447   // Compute substr offset
6448   subptr(result, str1);

6449   shrl(result, 1); // index

6450   bind(EXIT);
6451 
6452 } // string_indexofC8
6453 
6454 // Small strings are loaded through stack if they cross page boundary.
6455 void MacroAssembler::string_indexof(Register str1, Register str2,
6456                                     Register cnt1, Register cnt2,
6457                                     int int_cnt2,  Register result,
6458                                     XMMRegister vec, Register tmp) {

6459   ShortBranchVerifier sbv(this);
6460   assert(UseSSE42Intrinsics, "SSE4.2 is required");


6461   //
6462   // int_cnt2 is length of small (< 8 chars) constant substring
6463   // or (-1) for non constant substring in which case its length
6464   // is in cnt2 register.
6465   //
6466   // Note, inline_string_indexOf() generates checks:
6467   // if (substr.count > string.count) return -1;
6468   // if (substr.count == 0) return 0;
6469   //
6470   assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
6471 
6472   // This method uses pcmpestri instruction with bound registers
6473   //   inputs:
6474   //     xmm - substring
6475   //     rax - substring length (elements count)
6476   //     mem - scanned string
6477   //     rdx - string length (elements count)
6478   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)

6479   //   outputs:
6480   //     rcx - matched index in string
6481   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");



6482 
6483   Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
6484         RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
6485         FOUND_CANDIDATE;
6486 
6487   { //========================================================
6488     // We don't know where these strings are located
6489     // and we can't read beyond them. Load them through stack.
6490     Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
6491 
6492     movptr(tmp, rsp); // save old SP
6493 
6494     if (int_cnt2 > 0) {     // small (< 8 chars) constant substring
6495       if (int_cnt2 == 1) {  // One char









6496         load_unsigned_short(result, Address(str2, 0));
6497         movdl(vec, result); // move 32 bits
6498       } else if (int_cnt2 == 2) { // Two chars
6499         movdl(vec, Address(str2, 0)); // move 32 bits
6500       } else if (int_cnt2 == 4) { // Four chars
6501         movq(vec, Address(str2, 0));  // move 64 bits
6502       } else { // cnt2 = { 3, 5, 6, 7 }
6503         // Array header size is 12 bytes in 32-bit VM
6504         // + 6 bytes for 3 chars == 18 bytes,
6505         // enough space to load vec and shift.
6506         assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity");
6507         movdqu(vec, Address(str2, (int_cnt2*2)-16));
6508         psrldq(vec, 16-(int_cnt2*2));








6509       }
6510     } else { // not constant substring
6511       cmpl(cnt2, 8);
6512       jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
6513 
6514       // We can read beyond string if srt+16 does not cross page boundary
6515       // since heaps are aligned and mapped by pages.
6516       assert(os::vm_page_size() < (int)G, "default page should be small");
6517       movl(result, str2); // We need only low 32 bits
6518       andl(result, (os::vm_page_size()-1));
6519       cmpl(result, (os::vm_page_size()-16));
6520       jccb(Assembler::belowEqual, CHECK_STR);
6521 
6522       // Move small strings to stack to allow load 16 bytes into vec.
6523       subptr(rsp, 16);
6524       int stk_offset = wordSize-2;
6525       push(cnt2);
6526 
6527       bind(COPY_SUBSTR);
6528       load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
6529       movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);





6530       decrement(cnt2);
6531       jccb(Assembler::notZero, COPY_SUBSTR);
6532 
6533       pop(cnt2);
6534       movptr(str2, rsp);  // New substring address
6535     } // non constant
6536 
6537     bind(CHECK_STR);
6538     cmpl(cnt1, 8);
6539     jccb(Assembler::aboveEqual, BIG_STRINGS);
6540 
6541     // Check cross page boundary.
6542     movl(result, str1); // We need only low 32 bits
6543     andl(result, (os::vm_page_size()-1));
6544     cmpl(result, (os::vm_page_size()-16));
6545     jccb(Assembler::belowEqual, BIG_STRINGS);
6546 
6547     subptr(rsp, 16);
6548     int stk_offset = -2;
6549     if (int_cnt2 < 0) { // not constant
6550       push(cnt2);
6551       stk_offset += wordSize;
6552     }
6553     movl(cnt2, cnt1);
6554 
6555     bind(COPY_STR);
6556     load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
6557     movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);





6558     decrement(cnt2);
6559     jccb(Assembler::notZero, COPY_STR);
6560 
6561     if (int_cnt2 < 0) { // not constant
6562       pop(cnt2);
6563     }
6564     movptr(str1, rsp);  // New string address
6565 
6566     bind(BIG_STRINGS);
6567     // Load substring.
6568     if (int_cnt2 < 0) { // -1



6569       movdqu(vec, Address(str2, 0));

6570       push(cnt2);       // substr count
6571       push(str2);       // substr addr
6572       push(str1);       // string addr
6573     } else {
6574       // Small (< 8 chars) constant substrings are loaded already.
6575       movl(cnt2, int_cnt2);
6576     }
6577     push(tmp);  // original SP
6578 
6579   } // Finished loading
6580 
6581   //========================================================
6582   // Start search
6583   //
6584 
6585   movptr(result, str1); // string addr
6586 
6587   if (int_cnt2  < 0) {  // Only for non constant substring
6588     jmpb(SCAN_TO_SUBSTR);
6589 
6590     // SP saved at sp+0
6591     // String saved at sp+1*wordSize
6592     // Substr saved at sp+2*wordSize
6593     // Substr count saved at sp+3*wordSize
6594 
6595     // Reload substr for rescan, this code
6596     // is executed only for large substrings (> 8 chars)
6597     bind(RELOAD_SUBSTR);
6598     movptr(str2, Address(rsp, 2*wordSize));
6599     movl(cnt2, Address(rsp, 3*wordSize));



6600     movdqu(vec, Address(str2, 0));

6601     // We came here after the beginning of the substring was
6602     // matched but the rest of it was not so we need to search
6603     // again. Start from the next element after the previous match.
6604     subptr(str1, result); // Restore counter

6605     shrl(str1, 1);

6606     addl(cnt1, str1);
6607     decrementl(cnt1);   // Shift to next element
6608     cmpl(cnt1, cnt2);
6609     jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
6610 
6611     addptr(result, 2);
6612   } // non constant
6613 
6614   // Scan string for start of substr in 16-byte vectors
6615   bind(SCAN_TO_SUBSTR);
6616   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
6617   pcmpestri(vec, Address(result, 0), 0x0d);
6618   jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
6619   subl(cnt1, 8);
6620   jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
6621   cmpl(cnt1, cnt2);
6622   jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
6623   addptr(result, 16);
6624 
6625   bind(ADJUST_STR);
6626   cmpl(cnt1, 8); // Do not read beyond string
6627   jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
6628   // Back-up string to avoid reading beyond string.
6629   lea(result, Address(result, cnt1, Address::times_2, -16));
6630   movl(cnt1, 8);
6631   jmpb(SCAN_TO_SUBSTR);
6632 
6633   // Found a potential substr
6634   bind(FOUND_CANDIDATE);
6635   // After pcmpestri tmp(rcx) contains matched element index
6636 
6637   // Make sure string is still long enough
6638   subl(cnt1, tmp);
6639   cmpl(cnt1, cnt2);
6640   jccb(Assembler::greaterEqual, FOUND_SUBSTR);
6641   // Left less then substring.
6642 
6643   bind(RET_NOT_FOUND);
6644   movl(result, -1);
6645   jmpb(CLEANUP);
6646 
6647   bind(FOUND_SUBSTR);
6648   // Compute start addr of substr
6649   lea(result, Address(result, tmp, Address::times_2));
6650 
6651   if (int_cnt2 > 0) { // Constant substring
6652     // Repeat search for small substring (< 8 chars)
6653     // from new point without reloading substring.
6654     // Have to check that we don't read beyond string.
6655     cmpl(tmp, 8-int_cnt2);
6656     jccb(Assembler::greater, ADJUST_STR);
6657     // Fall through if matched whole substring.
6658   } else { // non constant
6659     assert(int_cnt2 == -1, "should be != 0");
6660 
6661     addl(tmp, cnt2);
6662     // Found result if we matched whole substring.
6663     cmpl(tmp, 8);
6664     jccb(Assembler::lessEqual, RET_FOUND);
6665 
6666     // Repeat search for small substring (<= 8 chars)
6667     // from new point 'str1' without reloading substring.
6668     cmpl(cnt2, 8);
6669     // Have to check that we don't read beyond string.
6670     jccb(Assembler::lessEqual, ADJUST_STR);
6671 
6672     Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
6673     // Compare the rest of substring (> 8 chars).
6674     movptr(str1, result);
6675 
6676     cmpl(tmp, cnt2);
6677     // First 8 chars are already matched.
6678     jccb(Assembler::equal, CHECK_NEXT);
6679 
6680     bind(SCAN_SUBSTR);
6681     pcmpestri(vec, Address(str1, 0), 0x0d);
6682     // Need to reload strings pointers if not matched whole vector
6683     jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
6684 
6685     bind(CHECK_NEXT);
6686     subl(cnt2, 8);
6687     jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
6688     addptr(str1, 16);



6689     addptr(str2, 16);
6690     subl(cnt1, 8);
6691     cmpl(cnt2, 8); // Do not read beyond substring

6692     jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
6693     // Back-up strings to avoid reading beyond substring.
6694     lea(str2, Address(str2, cnt2, Address::times_2, -16));
6695     lea(str1, Address(str1, cnt2, Address::times_2, -16));






6696     subl(cnt1, cnt2);
6697     movl(cnt2, 8);
6698     addl(cnt1, 8);
6699     bind(CONT_SCAN_SUBSTR);



6700     movdqu(vec, Address(str2, 0));

6701     jmpb(SCAN_SUBSTR);
6702 
6703     bind(RET_FOUND_LONG);
6704     movptr(str1, Address(rsp, wordSize));
6705   } // non constant
6706 
6707   bind(RET_FOUND);
6708   // Compute substr offset
6709   subptr(result, str1);

6710   shrl(result, 1); // index
6711 
6712   bind(CLEANUP);
6713   pop(rsp); // restore SP
6714 
6715 } // string_indexof
6716 
6717 // Compare strings.





















































































































6718 void MacroAssembler::string_compare(Register str1, Register str2,
6719                                     Register cnt1, Register cnt2, Register result,
6720                                     XMMRegister vec1) {
6721   ShortBranchVerifier sbv(this);
6722   Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;


6723 



6724   // Compute the minimum of the string lengths and the
6725   // difference of the string lengths (stack).
6726   // Do the conditional move stuff
6727   movl(result, cnt1);
6728   subl(cnt1, cnt2);
6729   push(cnt1);
6730   cmov32(Assembler::lessEqual, cnt2, result);
6731 
6732   // Is the minimum length zero?
6733   testl(cnt2, cnt2);
6734   jcc(Assembler::zero, LENGTH_DIFF_LABEL);
6735 
6736   // Compare first characters




6737   load_unsigned_short(result, Address(str1, 0));
6738   load_unsigned_short(cnt1, Address(str2, 0));




6739   subl(result, cnt1);
6740   jcc(Assembler::notZero,  POP_LABEL);





6741   cmpl(cnt2, 1);
6742   jcc(Assembler::equal, LENGTH_DIFF_LABEL);
6743 
6744   // Check if the strings start at the same location.

6745   cmpptr(str1, str2);
6746   jcc(Assembler::equal, LENGTH_DIFF_LABEL);
6747 
6748   Address::ScaleFactor scale = Address::times_2;
6749   int stride = 8;









6750 
6751   if (UseAVX >= 2 && UseSSE42Intrinsics) {
6752     Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
6753     Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
6754     Label COMPARE_TAIL_LONG;
6755     int pcmpmask = 0x19;



6756 
6757     // Setup to compare 16-chars (32-bytes) vectors,
6758     // start from first character again because it has aligned address.
6759     int stride2 = 16;
6760     int adr_stride  = stride  << scale;









6761 
6762     assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
6763     // rax and rdx are used by pcmpestri as elements counters
6764     movl(result, cnt2);
6765     andl(cnt2, ~(stride2-1));   // cnt2 holds the vector count
6766     jcc(Assembler::zero, COMPARE_TAIL_LONG);
6767 
6768     // fast path : compare first 2 8-char vectors.
6769     bind(COMPARE_16_CHARS);

6770     movdqu(vec1, Address(str1, 0));



6771     pcmpestri(vec1, Address(str2, 0), pcmpmask);
6772     jccb(Assembler::below, COMPARE_INDEX_CHAR);
6773 

6774     movdqu(vec1, Address(str1, adr_stride));
6775     pcmpestri(vec1, Address(str2, adr_stride), pcmpmask);




6776     jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS);
6777     addl(cnt1, stride);
6778 
6779     // Compare the characters at index in cnt1
6780     bind(COMPARE_INDEX_CHAR); //cnt1 has the offset of the mismatching character
6781     load_unsigned_short(result, Address(str1, cnt1, scale));
6782     load_unsigned_short(cnt2, Address(str2, cnt1, scale));
6783     subl(result, cnt2);
6784     jmp(POP_LABEL);
6785 
6786     // Setup the registers to start vector comparison loop
6787     bind(COMPARE_WIDE_VECTORS);

6788     lea(str1, Address(str1, result, scale));
6789     lea(str2, Address(str2, result, scale));




6790     subl(result, stride2);
6791     subl(cnt2, stride2);
6792     jccb(Assembler::zero, COMPARE_WIDE_TAIL);
6793     negptr(result);
6794 
6795     //  In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
6796     bind(COMPARE_WIDE_VECTORS_LOOP);

6797     vmovdqu(vec1, Address(str1, result, scale));
6798     vpxor(vec1, Address(str2, result, scale));




6799     vptest(vec1, vec1);
6800     jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
6801     addptr(result, stride2);
6802     subl(cnt2, stride2);
6803     jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
6804     // clean upper bits of YMM registers
6805     vpxor(vec1, vec1);
6806 
6807     // compare wide vectors tail
6808     bind(COMPARE_WIDE_TAIL);
6809     testptr(result, result);
6810     jccb(Assembler::zero, LENGTH_DIFF_LABEL);
6811 
6812     movl(result, stride2);
6813     movl(cnt2, result);
6814     negptr(result);
6815     jmpb(COMPARE_WIDE_VECTORS_LOOP);
6816 
6817     // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
6818     bind(VECTOR_NOT_EQUAL);
6819     // clean upper bits of YMM registers
6820     vpxor(vec1, vec1);

6821     lea(str1, Address(str1, result, scale));
6822     lea(str2, Address(str2, result, scale));




6823     jmp(COMPARE_16_CHARS);
6824 
6825     // Compare tail chars, length between 1 to 15 chars
6826     bind(COMPARE_TAIL_LONG);
6827     movl(cnt2, result);
6828     cmpl(cnt2, stride);
6829     jccb(Assembler::less, COMPARE_SMALL_STR);
6830 

6831     movdqu(vec1, Address(str1, 0));



6832     pcmpestri(vec1, Address(str2, 0), pcmpmask);
6833     jcc(Assembler::below, COMPARE_INDEX_CHAR);
6834     subptr(cnt2, stride);
6835     jccb(Assembler::zero, LENGTH_DIFF_LABEL);

6836     lea(str1, Address(str1, result, scale));
6837     lea(str2, Address(str2, result, scale));




6838     negptr(cnt2);
6839     jmpb(WHILE_HEAD_LABEL);
6840 
6841     bind(COMPARE_SMALL_STR);
6842   } else if (UseSSE42Intrinsics) {
6843     Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
6844     int pcmpmask = 0x19;
6845     // Setup to compare 8-char (16-byte) vectors,
6846     // start from first character again because it has aligned address.
6847     movl(result, cnt2);
6848     andl(cnt2, ~(stride - 1));   // cnt2 holds the vector count



6849     jccb(Assembler::zero, COMPARE_TAIL);
6850 
6851     lea(str1, Address(str1, result, scale));
6852     lea(str2, Address(str2, result, scale));




6853     negptr(result);
6854 
6855     // pcmpestri
6856     //   inputs:
6857     //     vec1- substring
6858     //     rax - negative string length (elements count)
6859     //     mem - scanned string
6860     //     rdx - string length (elements count)
6861     //     pcmpmask - cmp mode: 11000 (string compare with negated result)
6862     //               + 00 (unsigned bytes) or  + 01 (unsigned shorts)
6863     //   outputs:
6864     //     rcx - first mismatched element index
6865     assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
6866 
6867     bind(COMPARE_WIDE_VECTORS);

6868     movdqu(vec1, Address(str1, result, scale));
6869     pcmpestri(vec1, Address(str2, result, scale), pcmpmask);




6870     // After pcmpestri cnt1(rcx) contains mismatched element index
6871 
6872     jccb(Assembler::below, VECTOR_NOT_EQUAL);  // CF==1
6873     addptr(result, stride);
6874     subptr(cnt2, stride);
6875     jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
6876 
6877     // compare wide vectors tail
6878     testptr(result, result);
6879     jccb(Assembler::zero, LENGTH_DIFF_LABEL);
6880 
6881     movl(cnt2, stride);
6882     movl(result, stride);
6883     negptr(result);

6884     movdqu(vec1, Address(str1, result, scale));
6885     pcmpestri(vec1, Address(str2, result, scale), pcmpmask);




6886     jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
6887 
6888     // Mismatched characters in the vectors
6889     bind(VECTOR_NOT_EQUAL);
6890     addptr(cnt1, result);
6891     load_unsigned_short(result, Address(str1, cnt1, scale));
6892     load_unsigned_short(cnt2, Address(str2, cnt1, scale));
6893     subl(result, cnt2);
6894     jmpb(POP_LABEL);
6895 
6896     bind(COMPARE_TAIL); // limit is zero
6897     movl(cnt2, result);
6898     // Fallthru to tail compare
6899   }
6900   // Shift str2 and str1 to the end of the arrays, negate min

6901   lea(str1, Address(str1, cnt2, scale));
6902   lea(str2, Address(str2, cnt2, scale));




6903   decrementl(cnt2);  // first character was compared already
6904   negptr(cnt2);
6905 
6906   // Compare the rest of the elements
6907   bind(WHILE_HEAD_LABEL);
6908   load_unsigned_short(result, Address(str1, cnt2, scale, 0));
6909   load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));
6910   subl(result, cnt1);
6911   jccb(Assembler::notZero, POP_LABEL);
6912   increment(cnt2);
6913   jccb(Assembler::notZero, WHILE_HEAD_LABEL);
6914 
6915   // Strings are equal up to min length.  Return the length difference.
6916   bind(LENGTH_DIFF_LABEL);
6917   pop(result);




6918   jmpb(DONE_LABEL);
6919 
6920   // Discard the stored length difference
6921   bind(POP_LABEL);
6922   pop(cnt1);
6923 
6924   // That's it
6925   bind(DONE_LABEL);



6926 }
6927 
6928 // Compare char[] arrays aligned to 4 bytes or substrings.
6929 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
6930                                         Register limit, Register result, Register chr,

6931                                         XMMRegister vec1, XMMRegister vec2) {









































































































































6932   ShortBranchVerifier sbv(this);
6933   Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
6934 
6935   int length_offset  = arrayOopDesc::length_offset_in_bytes();
6936   int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
6937 

6938   // Check the input args
6939   cmpptr(ary1, ary2);
6940   jcc(Assembler::equal, TRUE_LABEL);
6941 
6942   if (is_array_equ) {
6943     // Need additional checks for arrays_equals.
6944     testptr(ary1, ary1);
6945     jcc(Assembler::zero, FALSE_LABEL);
6946     testptr(ary2, ary2);
6947     jcc(Assembler::zero, FALSE_LABEL);
6948 
6949     // Check the lengths
6950     movl(limit, Address(ary1, length_offset));
6951     cmpl(limit, Address(ary2, length_offset));
6952     jcc(Assembler::notEqual, FALSE_LABEL);
6953   }
6954 
6955   // count == 0
6956   testl(limit, limit);
6957   jcc(Assembler::zero, TRUE_LABEL);
6958 
6959   if (is_array_equ) {
6960     // Load array address
6961     lea(ary1, Address(ary1, base_offset));
6962     lea(ary2, Address(ary2, base_offset));
6963   }
6964 


6965   shll(limit, 1);      // byte count != 0

6966   movl(result, limit); // copy
6967 
6968   if (UseAVX >= 2) {
6969     // With AVX2, use 32-byte vector compare
6970     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
6971 
6972     // Compare 32-byte vectors
6973     andl(result, 0x0000001e);  //   tail count (in bytes)
6974     andl(limit, 0xffffffe0);   // vector count (in bytes)
6975     jccb(Assembler::zero, COMPARE_TAIL);
6976 
6977     lea(ary1, Address(ary1, limit, Address::times_1));
6978     lea(ary2, Address(ary2, limit, Address::times_1));
6979     negptr(limit);
6980 
6981     bind(COMPARE_WIDE_VECTORS);
6982     vmovdqu(vec1, Address(ary1, limit, Address::times_1));
6983     vmovdqu(vec2, Address(ary2, limit, Address::times_1));
6984     vpxor(vec1, vec2);
6985 
6986     vptest(vec1, vec1);
6987     jccb(Assembler::notZero, FALSE_LABEL);
6988     addptr(limit, 32);
6989     jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
6990 
6991     testl(result, result);
6992     jccb(Assembler::zero, TRUE_LABEL);
6993 
6994     vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
6995     vmovdqu(vec2, Address(ary2, result, Address::times_1, -32));
6996     vpxor(vec1, vec2);
6997 
6998     vptest(vec1, vec1);
6999     jccb(Assembler::notZero, FALSE_LABEL);
7000     jmpb(TRUE_LABEL);
7001 
7002     bind(COMPARE_TAIL); // limit is zero
7003     movl(limit, result);
7004     // Fallthru to tail compare
7005   } else if (UseSSE42Intrinsics) {
7006     // With SSE4.2, use double quad vector compare
7007     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
7008 
7009     // Compare 16-byte vectors
7010     andl(result, 0x0000000e);  //   tail count (in bytes)
7011     andl(limit, 0xfffffff0);   // vector count (in bytes)
7012     jccb(Assembler::zero, COMPARE_TAIL);
7013 
7014     lea(ary1, Address(ary1, limit, Address::times_1));
7015     lea(ary2, Address(ary2, limit, Address::times_1));
7016     negptr(limit);
7017 
7018     bind(COMPARE_WIDE_VECTORS);
7019     movdqu(vec1, Address(ary1, limit, Address::times_1));
7020     movdqu(vec2, Address(ary2, limit, Address::times_1));
7021     pxor(vec1, vec2);
7022 
7023     ptest(vec1, vec1);
7024     jccb(Assembler::notZero, FALSE_LABEL);
7025     addptr(limit, 16);
7026     jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
7027 
7028     testl(result, result);
7029     jccb(Assembler::zero, TRUE_LABEL);
7030 


7042   }
7043 
7044   // Compare 4-byte vectors
7045   andl(limit, 0xfffffffc); // vector count (in bytes)
7046   jccb(Assembler::zero, COMPARE_CHAR);
7047 
7048   lea(ary1, Address(ary1, limit, Address::times_1));
7049   lea(ary2, Address(ary2, limit, Address::times_1));
7050   negptr(limit);
7051 
7052   bind(COMPARE_VECTORS);
7053   movl(chr, Address(ary1, limit, Address::times_1));
7054   cmpl(chr, Address(ary2, limit, Address::times_1));
7055   jccb(Assembler::notEqual, FALSE_LABEL);
7056   addptr(limit, 4);
7057   jcc(Assembler::notZero, COMPARE_VECTORS);
7058 
7059   // Compare trailing char (final 2 bytes), if any
7060   bind(COMPARE_CHAR);
7061   testl(result, 0x2);   // tail  char
7062   jccb(Assembler::zero, TRUE_LABEL);
7063   load_unsigned_short(chr, Address(ary1, 0));
7064   load_unsigned_short(limit, Address(ary2, 0));
7065   cmpl(chr, limit);
7066   jccb(Assembler::notEqual, FALSE_LABEL);
7067 














7068   bind(TRUE_LABEL);
7069   movl(result, 1);   // return true
7070   jmpb(DONE);
7071 
7072   bind(FALSE_LABEL);
7073   xorl(result, result); // return false
7074 
7075   // That's it
7076   bind(DONE);
7077   if (UseAVX >= 2) {
7078     // clean upper bits of YMM registers
7079     vpxor(vec1, vec1);
7080     vpxor(vec2, vec2);
7081   }
7082 }
7083 


7084 void MacroAssembler::generate_fill(BasicType t, bool aligned,
7085                                    Register to, Register value, Register count,
7086                                    Register rtmp, XMMRegister xtmp) {
7087   ShortBranchVerifier sbv(this);
7088   assert_different_registers(to, value, count, rtmp);
7089   Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
7090   Label L_fill_2_bytes, L_fill_4_bytes;
7091 
7092   int shift = -1;
7093   switch (t) {
7094     case T_BYTE:
7095       shift = 2;
7096       break;
7097     case T_SHORT:
7098       shift = 1;
7099       break;
7100     case T_INT:
7101       shift = 0;
7102       break;
7103     default: ShouldNotReachHere();


9066     jmp(L_wordByWord);
9067 
9068   BIND(L_byteByByteProlog);
9069   andl(in2, 0x00000007);
9070   movl(tmp2, 1);
9071 
9072   BIND(L_byteByByte);
9073   cmpl(tmp2, in2);
9074   jccb(Assembler::greater, L_exit);
9075     movb(tmp1, Address(in1, 0));
9076     crc32(in_out, tmp1, 1);
9077     incl(in1);
9078     incl(tmp2);
9079     jmp(L_byteByByte);
9080 
9081   BIND(L_exit);
9082 }
9083 #endif // LP64
9084 #undef BIND
9085 #undef BLOCK_COMMENT













































































































































































9086 
9087 
9088 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
9089   switch (cond) {
9090     // Note some conditions are synonyms for others
9091     case Assembler::zero:         return Assembler::notZero;
9092     case Assembler::notZero:      return Assembler::zero;
9093     case Assembler::less:         return Assembler::greaterEqual;
9094     case Assembler::lessEqual:    return Assembler::greater;
9095     case Assembler::greater:      return Assembler::lessEqual;
9096     case Assembler::greaterEqual: return Assembler::less;
9097     case Assembler::below:        return Assembler::aboveEqual;
9098     case Assembler::belowEqual:   return Assembler::above;
9099     case Assembler::above:        return Assembler::belowEqual;
9100     case Assembler::aboveEqual:   return Assembler::below;
9101     case Assembler::overflow:     return Assembler::noOverflow;
9102     case Assembler::noOverflow:   return Assembler::overflow;
9103     case Assembler::negative:     return Assembler::positive;
9104     case Assembler::positive:     return Assembler::negative;
9105     case Assembler::parity:       return Assembler::noParity;


  29 #include "gc/shared/cardTableModRefBS.hpp"
  30 #include "gc/shared/collectedHeap.inline.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "memory/resourceArea.hpp"
  33 #include "memory/universe.hpp"
  34 #include "oops/klass.inline.hpp"
  35 #include "prims/methodHandles.hpp"
  36 #include "runtime/biasedLocking.hpp"
  37 #include "runtime/interfaceSupport.hpp"
  38 #include "runtime/objectMonitor.hpp"
  39 #include "runtime/os.hpp"
  40 #include "runtime/sharedRuntime.hpp"
  41 #include "runtime/stubRoutines.hpp"
  42 #include "utilities/macros.hpp"
  43 #if INCLUDE_ALL_GCS
  44 #include "gc/g1/g1CollectedHeap.inline.hpp"
  45 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  46 #include "gc/g1/heapRegion.hpp"
  47 #endif // INCLUDE_ALL_GCS
  48 #include "crc32c.h"
  49 #ifdef COMPILER2
  50 #include "opto/intrinsicnode.hpp"
  51 #endif
  52 
  53 #ifdef PRODUCT
  54 #define BLOCK_COMMENT(str) /* nothing */
  55 #define STOP(error) stop(error)
  56 #else
  57 #define BLOCK_COMMENT(str) block_comment(str)
  58 #define STOP(error) block_comment(error); stop(error)
  59 #endif
  60 
  61 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  62 
  63 #ifdef ASSERT
  64 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
  65 #endif
  66 
  67 static Assembler::Condition reverse[] = {
  68     Assembler::noOverflow     /* overflow      = 0x0 */ ,
  69     Assembler::overflow       /* noOverflow    = 0x1 */ ,
  70     Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
  71     Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,


6285 
6286 }
6287 
6288 void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) {
6289   // cnt - number of qwords (8-byte words).
6290   // base - start address, qword aligned.
6291   assert(base==rdi, "base register must be edi for rep stos");
6292   assert(tmp==rax,   "tmp register must be eax for rep stos");
6293   assert(cnt==rcx,   "cnt register must be ecx for rep stos");
6294 
6295   xorptr(tmp, tmp);
6296   if (UseFastStosb) {
6297     shlptr(cnt,3); // convert to number of bytes
6298     rep_stosb();
6299   } else {
6300     NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM
6301     rep_stos();
6302   }
6303 }
6304 
6305 #ifdef COMPILER2
6306 
6307 // IndexOf for constant substrings with size >= 8 chars
6308 // which don't need to be loaded through stack.
6309 void MacroAssembler::string_indexofC8(Register str1, Register str2,
6310                                       Register cnt1, Register cnt2,
6311                                       int int_cnt2,  Register result,
6312                                       XMMRegister vec, Register tmp,
6313                                       int ae) {
6314   ShortBranchVerifier sbv(this);
6315   assert(UseSSE42Intrinsics, "SSE4.2 is required");
6316   assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
6317 
6318   // This method uses the pcmpestri instruction with bound registers
6319   //   inputs:
6320   //     xmm - substring
6321   //     rax - substring length (elements count)
6322   //     mem - scanned string
6323   //     rdx - string length (elements count)
6324   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
6325   //     0xc - mode: 1100 (substring search) + 00 (unsigned bytes)
6326   //   outputs:
6327   //     rcx - matched index in string
6328   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
6329   int mode   = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts
6330   int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
6331   Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2;
6332   Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1;
6333 
6334   Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
6335         RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
6336         MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
6337 
6338   // Note, inline_string_indexOf() generates checks:
6339   // if (substr.count > string.count) return -1;
6340   // if (substr.count == 0) return 0;
6341   assert(int_cnt2 >= stride, "this code is used only for cnt2 >= 8 chars");
6342 
6343   // Load substring.
6344   if (ae == StrIntrinsicNode::UL) {
6345     pmovzxbw(vec, Address(str2, 0));
6346   } else {
6347     movdqu(vec, Address(str2, 0));
6348   }
6349   movl(cnt2, int_cnt2);
6350   movptr(result, str1); // string addr
6351 
6352   if (int_cnt2 > stride) {
6353     jmpb(SCAN_TO_SUBSTR);
6354 
6355     // Reload substr for rescan, this code
6356     // is executed only for large substrings (> 8 chars)
6357     bind(RELOAD_SUBSTR);
6358     if (ae == StrIntrinsicNode::UL) {
6359       pmovzxbw(vec, Address(str2, 0));
6360     } else {
6361       movdqu(vec, Address(str2, 0));
6362     }
6363     negptr(cnt2); // Jumped here with negative cnt2, convert to positive
6364 
6365     bind(RELOAD_STR);
6366     // We came here after the beginning of the substring was
6367     // matched but the rest of it was not so we need to search
6368     // again. Start from the next element after the previous match.
6369 
6370     // cnt2 is number of substring reminding elements and
6371     // cnt1 is number of string reminding elements when cmp failed.
6372     // Restored cnt1 = cnt1 - cnt2 + int_cnt2
6373     subl(cnt1, cnt2);
6374     addl(cnt1, int_cnt2);
6375     movl(cnt2, int_cnt2); // Now restore cnt2
6376 
6377     decrementl(cnt1);     // Shift to next element
6378     cmpl(cnt1, cnt2);
6379     jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
6380 
6381     addptr(result, (1<<scale1));
6382 
6383   } // (int_cnt2 > 8)
6384 
6385   // Scan string for start of substr in 16-byte vectors
6386   bind(SCAN_TO_SUBSTR);
6387   pcmpestri(vec, Address(result, 0), mode);
6388   jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
6389   subl(cnt1, stride);
6390   jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
6391   cmpl(cnt1, cnt2);
6392   jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
6393   addptr(result, 16);
6394   jmpb(SCAN_TO_SUBSTR);
6395 
6396   // Found a potential substr
6397   bind(FOUND_CANDIDATE);
6398   // Matched whole vector if first element matched (tmp(rcx) == 0).
6399   if (int_cnt2 == stride) {
6400     jccb(Assembler::overflow, RET_FOUND);    // OF == 1
6401   } else { // int_cnt2 > 8
6402     jccb(Assembler::overflow, FOUND_SUBSTR);
6403   }
6404   // After pcmpestri tmp(rcx) contains matched element index
6405   // Compute start addr of substr
6406   lea(result, Address(result, tmp, scale1));
6407 
6408   // Make sure string is still long enough
6409   subl(cnt1, tmp);
6410   cmpl(cnt1, cnt2);
6411   if (int_cnt2 == stride) {
6412     jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
6413   } else { // int_cnt2 > 8
6414     jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
6415   }
6416   // Left less then substring.
6417 
6418   bind(RET_NOT_FOUND);
6419   movl(result, -1);
6420   jmpb(EXIT);
6421 
6422   if (int_cnt2 > stride) {
6423     // This code is optimized for the case when whole substring
6424     // is matched if its head is matched.
6425     bind(MATCH_SUBSTR_HEAD);
6426     pcmpestri(vec, Address(result, 0), mode);
6427     // Reload only string if does not match
6428     jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
6429 
6430     Label CONT_SCAN_SUBSTR;
6431     // Compare the rest of substring (> 8 chars).
6432     bind(FOUND_SUBSTR);
6433     // First 8 chars are already matched.
6434     negptr(cnt2);
6435     addptr(cnt2, stride);
6436 
6437     bind(SCAN_SUBSTR);
6438     subl(cnt1, stride);
6439     cmpl(cnt2, -stride); // Do not read beyond substring
6440     jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
6441     // Back-up strings to avoid reading beyond substring:
6442     // cnt1 = cnt1 - cnt2 + 8
6443     addl(cnt1, cnt2); // cnt2 is negative
6444     addl(cnt1, stride);
6445     movl(cnt2, stride); negptr(cnt2);
6446     bind(CONT_SCAN_SUBSTR);
6447     if (int_cnt2 < (int)G) {
6448       int tail_off1 = int_cnt2<<scale1;
6449       int tail_off2 = int_cnt2<<scale2;
6450       if (ae == StrIntrinsicNode::UL) {
6451         pmovzxbw(vec, Address(str2, cnt2, scale2, tail_off2));
6452       } else {
6453         movdqu(vec, Address(str2, cnt2, scale2, tail_off2));
6454       }
6455       pcmpestri(vec, Address(result, cnt2, scale1, tail_off1), mode);
6456     } else {
6457       // calculate index in register to avoid integer overflow (int_cnt2*2)
6458       movl(tmp, int_cnt2);
6459       addptr(tmp, cnt2);
6460       if (ae == StrIntrinsicNode::UL) {
6461         pmovzxbw(vec, Address(str2, tmp, scale2, 0));
6462       } else {
6463         movdqu(vec, Address(str2, tmp, scale2, 0));
6464       }
6465       pcmpestri(vec, Address(result, tmp, scale1, 0), mode);
6466     }
6467     // Need to reload strings pointers if not matched whole vector
6468     jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
6469     addptr(cnt2, stride);
6470     jcc(Assembler::negative, SCAN_SUBSTR);
6471     // Fall through if found full substring
6472 
6473   } // (int_cnt2 > 8)
6474 
6475   bind(RET_FOUND);
6476   // Found result if we matched full small substring.
6477   // Compute substr offset
6478   subptr(result, str1);
6479   if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
6480     shrl(result, 1); // index
6481   }
6482   bind(EXIT);
6483 
6484 } // string_indexofC8
6485 
6486 // Small strings are loaded through stack if they cross page boundary.
6487 void MacroAssembler::string_indexof(Register str1, Register str2,
6488                                     Register cnt1, Register cnt2,
6489                                     int int_cnt2,  Register result,
6490                                     XMMRegister vec, Register tmp,
6491                                     int ae) {
6492   ShortBranchVerifier sbv(this);
6493   assert(UseSSE42Intrinsics, "SSE4.2 is required");
6494   assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
6495 
6496   //
6497   // int_cnt2 is length of small (< 8 chars) constant substring
6498   // or (-1) for non constant substring in which case its length
6499   // is in cnt2 register.
6500   //
6501   // Note, inline_string_indexOf() generates checks:
6502   // if (substr.count > string.count) return -1;
6503   // if (substr.count == 0) return 0;
6504   //
6505   int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
6506   assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < stride), "should be != 0");
6507   // This method uses the pcmpestri instruction with bound registers
6508   //   inputs:
6509   //     xmm - substring
6510   //     rax - substring length (elements count)
6511   //     mem - scanned string
6512   //     rdx - string length (elements count)
6513   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
6514   //     0xc - mode: 1100 (substring search) + 00 (unsigned bytes)
6515   //   outputs:
6516   //     rcx - matched index in string
6517   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
6518   int mode = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts
6519   Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2;
6520   Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1;
6521 
6522   Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
6523         RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
6524         FOUND_CANDIDATE;
6525 
6526   { //========================================================
6527     // We don't know where these strings are located
6528     // and we can't read beyond them. Load them through stack.
6529     Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
6530 
6531     movptr(tmp, rsp); // save old SP
6532 
6533     if (int_cnt2 > 0) {     // small (< 8 chars) constant substring
6534       if (int_cnt2 == (1>>scale2)) { // One byte
6535         assert((ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL), "Only possible for latin1 encoding");
6536         load_unsigned_byte(result, Address(str2, 0));
6537         movdl(vec, result); // move 32 bits
6538       } else if (ae == StrIntrinsicNode::LL && int_cnt2 == 3) {  // Three bytes
6539         // Not enough header space in 32-bit VM: 12+3 = 15.
6540         movl(result, Address(str2, -1));
6541         shrl(result, 8);
6542         movdl(vec, result); // move 32 bits
6543       } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (2>>scale2)) {  // One char
6544         load_unsigned_short(result, Address(str2, 0));
6545         movdl(vec, result); // move 32 bits
6546       } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (4>>scale2)) { // Two chars
6547         movdl(vec, Address(str2, 0)); // move 32 bits
6548       } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (8>>scale2)) { // Four chars
6549         movq(vec, Address(str2, 0));  // move 64 bits
6550       } else { // cnt2 = { 3, 5, 6, 7 } || (ae == StrIntrinsicNode::UL && cnt2 ={2, ..., 7})
6551         // Array header size is 12 bytes in 32-bit VM
6552         // + 6 bytes for 3 chars == 18 bytes,
6553         // enough space to load vec and shift.
6554         assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity");
6555         if (ae == StrIntrinsicNode::UL) {
6556           int tail_off = int_cnt2-8;
6557           pmovzxbw(vec, Address(str2, tail_off));
6558           psrldq(vec, -2*tail_off);
6559         }
6560         else {
6561           int tail_off = int_cnt2*(1<<scale2);
6562           movdqu(vec, Address(str2, tail_off-16));
6563           psrldq(vec, 16-tail_off);
6564         }
6565       }
6566     } else { // not constant substring
6567       cmpl(cnt2, stride);
6568       jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
6569 
6570       // We can read beyond string if srt+16 does not cross page boundary
6571       // since heaps are aligned and mapped by pages.
6572       assert(os::vm_page_size() < (int)G, "default page should be small");
6573       movl(result, str2); // We need only low 32 bits
6574       andl(result, (os::vm_page_size()-1));
6575       cmpl(result, (os::vm_page_size()-16));
6576       jccb(Assembler::belowEqual, CHECK_STR);
6577 
6578       // Move small strings to stack to allow load 16 bytes into vec.
6579       subptr(rsp, 16);
6580       int stk_offset = wordSize-(1<<scale2);
6581       push(cnt2);
6582 
6583       bind(COPY_SUBSTR);
6584       if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL) {
6585         load_unsigned_byte(result, Address(str2, cnt2, scale2, -1));
6586         movb(Address(rsp, cnt2, scale2, stk_offset), result);
6587       } else if (ae == StrIntrinsicNode::UU) {
6588         load_unsigned_short(result, Address(str2, cnt2, scale2, -2));
6589         movw(Address(rsp, cnt2, scale2, stk_offset), result);
6590       }
6591       decrement(cnt2);
6592       jccb(Assembler::notZero, COPY_SUBSTR);
6593 
6594       pop(cnt2);
6595       movptr(str2, rsp);  // New substring address
6596     } // non constant
6597 
6598     bind(CHECK_STR);
6599     cmpl(cnt1, stride);
6600     jccb(Assembler::aboveEqual, BIG_STRINGS);
6601 
6602     // Check cross page boundary.
6603     movl(result, str1); // We need only low 32 bits
6604     andl(result, (os::vm_page_size()-1));
6605     cmpl(result, (os::vm_page_size()-16));
6606     jccb(Assembler::belowEqual, BIG_STRINGS);
6607 
6608     subptr(rsp, 16);
6609     int stk_offset = -(1<<scale1);
6610     if (int_cnt2 < 0) { // not constant
6611       push(cnt2);
6612       stk_offset += wordSize;
6613     }
6614     movl(cnt2, cnt1);
6615 
6616     bind(COPY_STR);
6617     if (ae == StrIntrinsicNode::LL) {
6618       load_unsigned_byte(result, Address(str1, cnt2, scale1, -1));
6619       movb(Address(rsp, cnt2, scale1, stk_offset), result);
6620     } else {
6621       load_unsigned_short(result, Address(str1, cnt2, scale1, -2));
6622       movw(Address(rsp, cnt2, scale1, stk_offset), result);
6623     }
6624     decrement(cnt2);
6625     jccb(Assembler::notZero, COPY_STR);
6626 
6627     if (int_cnt2 < 0) { // not constant
6628       pop(cnt2);
6629     }
6630     movptr(str1, rsp);  // New string address
6631 
6632     bind(BIG_STRINGS);
6633     // Load substring.
6634     if (int_cnt2 < 0) { // -1
6635       if (ae == StrIntrinsicNode::UL) {
6636         pmovzxbw(vec, Address(str2, 0));
6637       } else {
6638         movdqu(vec, Address(str2, 0));
6639       }
6640       push(cnt2);       // substr count
6641       push(str2);       // substr addr
6642       push(str1);       // string addr
6643     } else {
6644       // Small (< 8 chars) constant substrings are loaded already.
6645       movl(cnt2, int_cnt2);
6646     }
6647     push(tmp);  // original SP
6648 
6649   } // Finished loading
6650 
6651   //========================================================
6652   // Start search
6653   //
6654 
6655   movptr(result, str1); // string addr
6656 
6657   if (int_cnt2  < 0) {  // Only for non constant substring
6658     jmpb(SCAN_TO_SUBSTR);
6659 
6660     // SP saved at sp+0
6661     // String saved at sp+1*wordSize
6662     // Substr saved at sp+2*wordSize
6663     // Substr count saved at sp+3*wordSize
6664 
6665     // Reload substr for rescan, this code
6666     // is executed only for large substrings (> 8 chars)
6667     bind(RELOAD_SUBSTR);
6668     movptr(str2, Address(rsp, 2*wordSize));
6669     movl(cnt2, Address(rsp, 3*wordSize));
6670     if (ae == StrIntrinsicNode::UL) {
6671       pmovzxbw(vec, Address(str2, 0));
6672     } else {
6673       movdqu(vec, Address(str2, 0));
6674     }
6675     // We came here after the beginning of the substring was
6676     // matched but the rest of it was not so we need to search
6677     // again. Start from the next element after the previous match.
6678     subptr(str1, result); // Restore counter
6679     if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
6680       shrl(str1, 1);
6681     }
6682     addl(cnt1, str1);
6683     decrementl(cnt1);   // Shift to next element
6684     cmpl(cnt1, cnt2);
6685     jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
6686 
6687     addptr(result, (1<<scale1));
6688   } // non constant
6689 
6690   // Scan string for start of substr in 16-byte vectors
6691   bind(SCAN_TO_SUBSTR);
6692   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
6693   pcmpestri(vec, Address(result, 0), mode);
6694   jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
6695   subl(cnt1, stride);
6696   jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
6697   cmpl(cnt1, cnt2);
6698   jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
6699   addptr(result, 16);
6700 
6701   bind(ADJUST_STR);
6702   cmpl(cnt1, stride); // Do not read beyond string
6703   jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
6704   // Back-up string to avoid reading beyond string.
6705   lea(result, Address(result, cnt1, scale1, -16));
6706   movl(cnt1, stride);
6707   jmpb(SCAN_TO_SUBSTR);
6708 
6709   // Found a potential substr
6710   bind(FOUND_CANDIDATE);
6711   // After pcmpestri tmp(rcx) contains matched element index
6712 
6713   // Make sure string is still long enough
6714   subl(cnt1, tmp);
6715   cmpl(cnt1, cnt2);
6716   jccb(Assembler::greaterEqual, FOUND_SUBSTR);
6717   // Left less then substring.
6718 
6719   bind(RET_NOT_FOUND);
6720   movl(result, -1);
6721   jmpb(CLEANUP);
6722 
6723   bind(FOUND_SUBSTR);
6724   // Compute start addr of substr
6725   lea(result, Address(result, tmp, scale1));

6726   if (int_cnt2 > 0) { // Constant substring
6727     // Repeat search for small substring (< 8 chars)
6728     // from new point without reloading substring.
6729     // Have to check that we don't read beyond string.
6730     cmpl(tmp, stride-int_cnt2);
6731     jccb(Assembler::greater, ADJUST_STR);
6732     // Fall through if matched whole substring.
6733   } else { // non constant
6734     assert(int_cnt2 == -1, "should be != 0");
6735 
6736     addl(tmp, cnt2);
6737     // Found result if we matched whole substring.
6738     cmpl(tmp, stride);
6739     jccb(Assembler::lessEqual, RET_FOUND);
6740 
6741     // Repeat search for small substring (<= 8 chars)
6742     // from new point 'str1' without reloading substring.
6743     cmpl(cnt2, stride);
6744     // Have to check that we don't read beyond string.
6745     jccb(Assembler::lessEqual, ADJUST_STR);
6746 
6747     Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
6748     // Compare the rest of substring (> 8 chars).
6749     movptr(str1, result);
6750 
6751     cmpl(tmp, cnt2);
6752     // First 8 chars are already matched.
6753     jccb(Assembler::equal, CHECK_NEXT);
6754 
6755     bind(SCAN_SUBSTR);
6756     pcmpestri(vec, Address(str1, 0), mode);
6757     // Need to reload strings pointers if not matched whole vector
6758     jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
6759 
6760     bind(CHECK_NEXT);
6761     subl(cnt2, stride);
6762     jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
6763     addptr(str1, 16);
6764     if (ae == StrIntrinsicNode::UL) {
6765       addptr(str2, 8);
6766     } else {
6767       addptr(str2, 16);
6768     }
6769     subl(cnt1, stride);
6770     cmpl(cnt2, stride); // Do not read beyond substring
6771     jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
6772     // Back-up strings to avoid reading beyond substring.
6773 
6774     if (ae == StrIntrinsicNode::UL) {
6775       lea(str2, Address(str2, cnt2, scale2, -8));
6776       lea(str1, Address(str1, cnt2, scale1, -16));
6777     } else {
6778       lea(str2, Address(str2, cnt2, scale2, -16));
6779       lea(str1, Address(str1, cnt2, scale1, -16));
6780     }
6781     subl(cnt1, cnt2);
6782     movl(cnt2, stride);
6783     addl(cnt1, stride);
6784     bind(CONT_SCAN_SUBSTR);
6785     if (ae == StrIntrinsicNode::UL) {
6786       pmovzxbw(vec, Address(str2, 0));
6787     } else {
6788       movdqu(vec, Address(str2, 0));
6789     }
6790     jmpb(SCAN_SUBSTR);
6791 
6792     bind(RET_FOUND_LONG);
6793     movptr(str1, Address(rsp, wordSize));
6794   } // non constant
6795 
6796   bind(RET_FOUND);
6797   // Compute substr offset
6798   subptr(result, str1);
6799   if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
6800     shrl(result, 1); // index
6801   }
6802   bind(CLEANUP);
6803   pop(rsp); // restore SP
6804 
6805 } // string_indexof
6806 
6807 void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
6808                                          XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
6809   ShortBranchVerifier sbv(this);
6810   assert(UseSSE42Intrinsics, "SSE4.2 is required");
6811 
6812   int stride = 8;
6813 
6814   Label FOUND_CHAR, SCAN_TO_CHAR, SCAN_TO_CHAR_LOOP,
6815         SCAN_TO_8_CHAR, SCAN_TO_8_CHAR_LOOP, SCAN_TO_16_CHAR_LOOP,
6816         RET_NOT_FOUND, SCAN_TO_8_CHAR_INIT,
6817         FOUND_SEQ_CHAR, DONE_LABEL;
6818 
6819   movptr(result, str1);
6820   if (UseAVX >= 2) {
6821     cmpl(cnt1, stride);
6822     jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
6823     cmpl(cnt1, 2*stride);
6824     jccb(Assembler::less, SCAN_TO_8_CHAR_INIT);
6825     movdl(vec1, ch);
6826     vpbroadcastw(vec1, vec1);
6827     vpxor(vec2, vec2);
6828     movl(tmp, cnt1);
6829     andl(tmp, 0xFFFFFFF0);  //vector count (in chars)
6830     andl(cnt1,0x0000000F);  //tail count (in chars)
6831 
6832     bind(SCAN_TO_16_CHAR_LOOP);
6833     vmovdqu(vec3, Address(result, 0));
6834     vpcmpeqw(vec3, vec3, vec1, true);
6835     vptest(vec2, vec3);
6836     jcc(Assembler::carryClear, FOUND_CHAR);
6837     addptr(result, 32);
6838     subl(tmp, 2*stride);
6839     jccb(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);
6840     jmp(SCAN_TO_8_CHAR);
6841     bind(SCAN_TO_8_CHAR_INIT);
6842     movdl(vec1, ch);
6843     pshuflw(vec1, vec1, 0x00);
6844     pshufd(vec1, vec1, 0);
6845     pxor(vec2, vec2);
6846   }
6847   if (UseAVX >= 2 || UseSSE42Intrinsics) {
6848     bind(SCAN_TO_8_CHAR);
6849     cmpl(cnt1, stride);
6850     if (UseAVX >= 2) {
6851       jccb(Assembler::less, SCAN_TO_CHAR);
6852     }
6853     if (!(UseAVX >= 2)) {
6854       jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
6855       movdl(vec1, ch);
6856       pshuflw(vec1, vec1, 0x00);
6857       pshufd(vec1, vec1, 0);
6858       pxor(vec2, vec2);
6859     }
6860     movl(tmp, cnt1);
6861     andl(tmp, 0xFFFFFFF8);  //vector count (in chars)
6862     andl(cnt1,0x00000007);  //tail count (in chars)
6863 
6864     bind(SCAN_TO_8_CHAR_LOOP);
6865     movdqu(vec3, Address(result, 0));
6866     pcmpeqw(vec3, vec1);
6867     ptest(vec2, vec3);
6868     jcc(Assembler::carryClear, FOUND_CHAR);
6869     addptr(result, 16);
6870     subl(tmp, stride);
6871     jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP);
6872   }
6873   bind(SCAN_TO_CHAR);
6874   testl(cnt1, cnt1);
6875   jcc(Assembler::zero, RET_NOT_FOUND);
6876 
6877   bind(SCAN_TO_CHAR_LOOP);
6878   load_unsigned_short(tmp, Address(result, 0));
6879   cmpl(ch, tmp);
6880   jccb(Assembler::equal, FOUND_SEQ_CHAR);
6881   addptr(result, 2);
6882   subl(cnt1, 1);
6883   jccb(Assembler::zero, RET_NOT_FOUND);
6884   jmp(SCAN_TO_CHAR_LOOP);
6885 
6886   bind(RET_NOT_FOUND);
6887   movl(result, -1);
6888   jmpb(DONE_LABEL);
6889 
6890   if (UseAVX >= 2 || UseSSE42Intrinsics) {
6891     bind(FOUND_CHAR);
6892     if (UseAVX >= 2) {
6893       vpmovmskb(tmp, vec3);
6894     } else {
6895       pmovmskb(tmp, vec3);
6896     }
6897     bsfl(ch, tmp);
6898     addl(result, ch);
6899   }
6900 
6901   bind(FOUND_SEQ_CHAR);
6902   subptr(result, str1);
6903   shrl(result, 1);
6904 
6905   bind(DONE_LABEL);
6906 } // string_indexof_char
6907 
6908 // helper function for string_compare
6909 void MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
6910                                         Address::ScaleFactor scale, Address::ScaleFactor scale1,
6911                                         Address::ScaleFactor scale2, Register index, int ae) {
6912   if (ae == StrIntrinsicNode::LL) {
6913     load_unsigned_byte(elem1, Address(str1, index, scale, 0));
6914     load_unsigned_byte(elem2, Address(str2, index, scale, 0));
6915   } else if (ae == StrIntrinsicNode::UU) {
6916     load_unsigned_short(elem1, Address(str1, index, scale, 0));
6917     load_unsigned_short(elem2, Address(str2, index, scale, 0));
6918   } else {
6919     load_unsigned_byte(elem1, Address(str1, index, scale1, 0));
6920     load_unsigned_short(elem2, Address(str2, index, scale2, 0));
6921   }
6922 }
6923 
6924 // Compare strings, used for char[] and byte[].
6925 void MacroAssembler::string_compare(Register str1, Register str2,
6926                                     Register cnt1, Register cnt2, Register result,
6927                                     XMMRegister vec1, int ae) {
6928   ShortBranchVerifier sbv(this);
6929   Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
6930   int stride, stride2, adr_stride, adr_stride1, adr_stride2;
6931   Address::ScaleFactor scale, scale1, scale2;
6932 
6933   if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
6934     shrl(cnt2, 1);
6935   }
6936   // Compute the minimum of the string lengths and the
6937   // difference of the string lengths (stack).
6938   // Do the conditional move stuff
6939   movl(result, cnt1);
6940   subl(cnt1, cnt2);
6941   push(cnt1);
6942   cmov32(Assembler::lessEqual, cnt2, result);
6943 
6944   // Is the minimum length zero?
6945   testl(cnt2, cnt2);
6946   jcc(Assembler::zero, LENGTH_DIFF_LABEL);
6947   if (ae == StrIntrinsicNode::LL) {
6948     // Load first bytes
6949     load_unsigned_byte(result, Address(str1, 0));
6950     load_unsigned_byte(cnt1, Address(str2, 0));
6951   } else if (ae == StrIntrinsicNode::UU) {
6952     // Load first characters
6953     load_unsigned_short(result, Address(str1, 0));
6954     load_unsigned_short(cnt1, Address(str2, 0));
6955   } else {
6956     load_unsigned_byte(result, Address(str1, 0));
6957     load_unsigned_short(cnt1, Address(str2, 0));
6958   }
6959   subl(result, cnt1);
6960   jcc(Assembler::notZero,  POP_LABEL);
6961 
6962   if (ae == StrIntrinsicNode::UU) {
6963     // Divide length by 2 to get number of chars
6964     shrl(cnt2, 1);
6965   }
6966   cmpl(cnt2, 1);
6967   jcc(Assembler::equal, LENGTH_DIFF_LABEL);
6968 
6969   // Check if the strings start at the same location and setup scale and stride
6970   if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
6971     cmpptr(str1, str2);
6972     jcc(Assembler::equal, LENGTH_DIFF_LABEL);
6973     if (ae == StrIntrinsicNode::LL) {
6974       scale = Address::times_1;
6975       stride = 16;
6976     } else {
6977       scale = Address::times_2;
6978       stride = 8;
6979     }
6980   } else {
6981     scale1 = Address::times_1;
6982     scale2 = Address::times_2;
6983     stride = 8;
6984   }
6985 
6986   if (UseAVX >= 2 && UseSSE42Intrinsics) {
6987     Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
6988     Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
6989     Label COMPARE_TAIL_LONG;
6990     int pcmpmask = 0x19;
6991     if (ae == StrIntrinsicNode::LL) {
6992       pcmpmask &= ~0x01;
6993     }
6994 
6995     // Setup to compare 16-chars (32-bytes) vectors,
6996     // start from first character again because it has aligned address.
6997     if (ae == StrIntrinsicNode::LL) {
6998       stride2 = 32;
6999     } else {
7000       stride2 = 16;
7001     }
7002     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7003       adr_stride = stride << scale;
7004     } else {
7005       adr_stride1 = 8;  //stride << scale1;
7006       adr_stride2 = 16; //stride << scale2;
7007     }
7008 
7009     assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
7010     // rax and rdx are used by pcmpestri as elements counters
7011     movl(result, cnt2);
7012     andl(cnt2, ~(stride2-1));   // cnt2 holds the vector count
7013     jcc(Assembler::zero, COMPARE_TAIL_LONG);
7014 
7015     // fast path : compare first 2 8-char vectors.
7016     bind(COMPARE_16_CHARS);
7017     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7018       movdqu(vec1, Address(str1, 0));
7019     } else {
7020       pmovzxbw(vec1, Address(str1, 0));
7021     }
7022     pcmpestri(vec1, Address(str2, 0), pcmpmask);
7023     jccb(Assembler::below, COMPARE_INDEX_CHAR);
7024 
7025     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7026       movdqu(vec1, Address(str1, adr_stride));
7027       pcmpestri(vec1, Address(str2, adr_stride), pcmpmask);
7028     } else {
7029       pmovzxbw(vec1, Address(str1, adr_stride1));
7030       pcmpestri(vec1, Address(str2, adr_stride2), pcmpmask);
7031     }
7032     jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS);
7033     addl(cnt1, stride);
7034 
7035     // Compare the characters at index in cnt1
7036     bind(COMPARE_INDEX_CHAR); // cnt1 has the offset of the mismatching character
7037     load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae);

7038     subl(result, cnt2);
7039     jmp(POP_LABEL);
7040 
7041     // Setup the registers to start vector comparison loop
7042     bind(COMPARE_WIDE_VECTORS);
7043     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7044       lea(str1, Address(str1, result, scale));
7045       lea(str2, Address(str2, result, scale));
7046     } else {
7047       lea(str1, Address(str1, result, scale1));
7048       lea(str2, Address(str2, result, scale2));
7049     }
7050     subl(result, stride2);
7051     subl(cnt2, stride2);
7052     jccb(Assembler::zero, COMPARE_WIDE_TAIL);
7053     negptr(result);
7054 
7055     //  In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
7056     bind(COMPARE_WIDE_VECTORS_LOOP);
7057     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7058       vmovdqu(vec1, Address(str1, result, scale));
7059       vpxor(vec1, Address(str2, result, scale));
7060     } else {
7061       vpmovzxbw(vec1, Address(str1, result, scale1));
7062       vpxor(vec1, Address(str2, result, scale2));
7063     }
7064     vptest(vec1, vec1);
7065     jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
7066     addptr(result, stride2);
7067     subl(cnt2, stride2);
7068     jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
7069     // clean upper bits of YMM registers
7070     vpxor(vec1, vec1);
7071 
7072     // compare wide vectors tail
7073     bind(COMPARE_WIDE_TAIL);
7074     testptr(result, result);
7075     jccb(Assembler::zero, LENGTH_DIFF_LABEL);
7076 
7077     movl(result, stride2);
7078     movl(cnt2, result);
7079     negptr(result);
7080     jmpb(COMPARE_WIDE_VECTORS_LOOP);
7081 
7082     // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
7083     bind(VECTOR_NOT_EQUAL);
7084     // clean upper bits of YMM registers
7085     vpxor(vec1, vec1);
7086     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7087       lea(str1, Address(str1, result, scale));
7088       lea(str2, Address(str2, result, scale));
7089     } else {
7090       lea(str1, Address(str1, result, scale1));
7091       lea(str2, Address(str2, result, scale2));
7092     }
7093     jmp(COMPARE_16_CHARS);
7094 
7095     // Compare tail chars, length between 1 to 15 chars
7096     bind(COMPARE_TAIL_LONG);
7097     movl(cnt2, result);
7098     cmpl(cnt2, stride);
7099     jccb(Assembler::less, COMPARE_SMALL_STR);
7100 
7101     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7102       movdqu(vec1, Address(str1, 0));
7103     } else {
7104       pmovzxbw(vec1, Address(str1, 0));
7105     }
7106     pcmpestri(vec1, Address(str2, 0), pcmpmask);
7107     jcc(Assembler::below, COMPARE_INDEX_CHAR);
7108     subptr(cnt2, stride);
7109     jccb(Assembler::zero, LENGTH_DIFF_LABEL);
7110     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7111       lea(str1, Address(str1, result, scale));
7112       lea(str2, Address(str2, result, scale));
7113     } else {
7114       lea(str1, Address(str1, result, scale1));
7115       lea(str2, Address(str2, result, scale2));
7116     }
7117     negptr(cnt2);
7118     jmpb(WHILE_HEAD_LABEL);
7119 
7120     bind(COMPARE_SMALL_STR);
7121   } else if (UseSSE42Intrinsics) {
7122     Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
7123     int pcmpmask = 0x19;
7124     // Setup to compare 8-char (16-byte) vectors,
7125     // start from first character again because it has aligned address.
7126     movl(result, cnt2);
7127     andl(cnt2, ~(stride - 1));   // cnt2 holds the vector count
7128     if (ae == StrIntrinsicNode::LL) {
7129       pcmpmask &= ~0x01;
7130     }
7131     jccb(Assembler::zero, COMPARE_TAIL);
7132     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7133       lea(str1, Address(str1, result, scale));
7134       lea(str2, Address(str2, result, scale));
7135     } else {
7136       lea(str1, Address(str1, result, scale1));
7137       lea(str2, Address(str2, result, scale2));
7138     }
7139     negptr(result);
7140 
7141     // pcmpestri
7142     //   inputs:
7143     //     vec1- substring
7144     //     rax - negative string length (elements count)
7145     //     mem - scanned string
7146     //     rdx - string length (elements count)
7147     //     pcmpmask - cmp mode: 11000 (string compare with negated result)
7148     //               + 00 (unsigned bytes) or  + 01 (unsigned shorts)
7149     //   outputs:
7150     //     rcx - first mismatched element index
7151     assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
7152 
7153     bind(COMPARE_WIDE_VECTORS);
7154     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7155       movdqu(vec1, Address(str1, result, scale));
7156       pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
7157     } else {
7158       pmovzxbw(vec1, Address(str1, result, scale1));
7159       pcmpestri(vec1, Address(str2, result, scale2), pcmpmask);
7160     }
7161     // After pcmpestri cnt1(rcx) contains mismatched element index
7162 
7163     jccb(Assembler::below, VECTOR_NOT_EQUAL);  // CF==1
7164     addptr(result, stride);
7165     subptr(cnt2, stride);
7166     jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
7167 
7168     // compare wide vectors tail
7169     testptr(result, result);
7170     jccb(Assembler::zero, LENGTH_DIFF_LABEL);
7171 
7172     movl(cnt2, stride);
7173     movl(result, stride);
7174     negptr(result);
7175     if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7176       movdqu(vec1, Address(str1, result, scale));
7177       pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
7178     } else {
7179       pmovzxbw(vec1, Address(str1, result, scale1));
7180       pcmpestri(vec1, Address(str2, result, scale2), pcmpmask);
7181     }
7182     jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
7183 
7184     // Mismatched characters in the vectors
7185     bind(VECTOR_NOT_EQUAL);
7186     addptr(cnt1, result);
7187     load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae);

7188     subl(result, cnt2);
7189     jmpb(POP_LABEL);
7190 
7191     bind(COMPARE_TAIL); // limit is zero
7192     movl(cnt2, result);
7193     // Fallthru to tail compare
7194   }
7195   // Shift str2 and str1 to the end of the arrays, negate min
7196   if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7197     lea(str1, Address(str1, cnt2, scale));
7198     lea(str2, Address(str2, cnt2, scale));
7199   } else {
7200     lea(str1, Address(str1, cnt2, scale1));
7201     lea(str2, Address(str2, cnt2, scale2));
7202   }
7203   decrementl(cnt2);  // first character was compared already
7204   negptr(cnt2);
7205 
7206   // Compare the rest of the elements
7207   bind(WHILE_HEAD_LABEL);
7208   load_next_elements(result, cnt1, str1, str2, scale, scale1, scale2, cnt2, ae);

7209   subl(result, cnt1);
7210   jccb(Assembler::notZero, POP_LABEL);
7211   increment(cnt2);
7212   jccb(Assembler::notZero, WHILE_HEAD_LABEL);
7213 
7214   // Strings are equal up to min length.  Return the length difference.
7215   bind(LENGTH_DIFF_LABEL);
7216   pop(result);
7217   if (ae == StrIntrinsicNode::UU) {
7218     // Divide diff by 2 to get number of chars
7219     sarl(result, 1);
7220   }
7221   jmpb(DONE_LABEL);
7222 
7223   // Discard the stored length difference
7224   bind(POP_LABEL);
7225   pop(cnt1);
7226 
7227   // That's it
7228   bind(DONE_LABEL);
7229   if(ae == StrIntrinsicNode::UL) {
7230     negl(result);
7231   }
7232 }
7233 
7234 // Search for Non-ASCII character (Negative byte value) in a byte array,
7235 // return true if it has any and false otherwise.
7236 void MacroAssembler::has_negatives(Register ary1, Register len,
7237                                    Register result, Register tmp1,
7238                                    XMMRegister vec1, XMMRegister vec2) {
7239 
7240   // rsi: byte array
7241   // rcx: len
7242   // rax: result
7243   ShortBranchVerifier sbv(this);
7244   assert_different_registers(ary1, len, result, tmp1);
7245   assert_different_registers(vec1, vec2);
7246   Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_CHAR, COMPARE_VECTORS, COMPARE_BYTE;
7247 
7248   // len == 0
7249   testl(len, len);
7250   jcc(Assembler::zero, FALSE_LABEL);
7251 
7252   movl(result, len); // copy
7253 
7254   if (UseAVX >= 2) {
7255     // With AVX2, use 32-byte vector compare
7256     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
7257 
7258     // Compare 32-byte vectors
7259     andl(result, 0x0000001f);  //   tail count (in bytes)
7260     andl(len, 0xffffffe0);   // vector count (in bytes)
7261     jccb(Assembler::zero, COMPARE_TAIL);
7262 
7263     lea(ary1, Address(ary1, len, Address::times_1));
7264     negptr(len);
7265 
7266     movl(tmp1, 0x80808080);   // create mask to test for Unicode chars in vector
7267     movdl(vec2, tmp1);
7268     vpbroadcastd(vec2, vec2);
7269 
7270     bind(COMPARE_WIDE_VECTORS);
7271     vmovdqu(vec1, Address(ary1, len, Address::times_1));
7272     vptest(vec1, vec2);
7273     jccb(Assembler::notZero, TRUE_LABEL);
7274     addptr(len, 32);
7275     jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
7276 
7277     testl(result, result);
7278     jccb(Assembler::zero, FALSE_LABEL);
7279 
7280     vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
7281     vptest(vec1, vec2);
7282     jccb(Assembler::notZero, TRUE_LABEL);
7283     jmpb(FALSE_LABEL);
7284 
7285     bind(COMPARE_TAIL); // len is zero
7286     movl(len, result);
7287     // Fallthru to tail compare
7288   } else if (UseSSE42Intrinsics) {
7289     // With SSE4.2, use double quad vector compare
7290     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
7291 
7292     // Compare 16-byte vectors
7293     andl(result, 0x0000000f);  //   tail count (in bytes)
7294     andl(len, 0xfffffff0);   // vector count (in bytes)
7295     jccb(Assembler::zero, COMPARE_TAIL);
7296 
7297     lea(ary1, Address(ary1, len, Address::times_1));
7298     negptr(len);
7299 
7300     movl(tmp1, 0x80808080);
7301     movdl(vec2, tmp1);
7302     pshufd(vec2, vec2, 0);
7303 
7304     bind(COMPARE_WIDE_VECTORS);
7305     movdqu(vec1, Address(ary1, len, Address::times_1));
7306     ptest(vec1, vec2);
7307     jccb(Assembler::notZero, TRUE_LABEL);
7308     addptr(len, 16);
7309     jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
7310 
7311     testl(result, result);
7312     jccb(Assembler::zero, FALSE_LABEL);
7313 
7314     movdqu(vec1, Address(ary1, result, Address::times_1, -16));
7315     ptest(vec1, vec2);
7316     jccb(Assembler::notZero, TRUE_LABEL);
7317     jmpb(FALSE_LABEL);
7318 
7319     bind(COMPARE_TAIL); // len is zero
7320     movl(len, result);
7321     // Fallthru to tail compare
7322   }
7323 
7324   // Compare 4-byte vectors
7325   andl(len, 0xfffffffc); // vector count (in bytes)
7326   jccb(Assembler::zero, COMPARE_CHAR);
7327 
7328   lea(ary1, Address(ary1, len, Address::times_1));
7329   negptr(len);
7330 
7331   bind(COMPARE_VECTORS);
7332   movl(tmp1, Address(ary1, len, Address::times_1));
7333   andl(tmp1, 0x80808080);
7334   jccb(Assembler::notZero, TRUE_LABEL);
7335   addptr(len, 4);
7336   jcc(Assembler::notZero, COMPARE_VECTORS);
7337 
7338   // Compare trailing char (final 2 bytes), if any
7339   bind(COMPARE_CHAR);
7340   testl(result, 0x2);   // tail  char
7341   jccb(Assembler::zero, COMPARE_BYTE);
7342   load_unsigned_short(tmp1, Address(ary1, 0));
7343   andl(tmp1, 0x00008080);
7344   jccb(Assembler::notZero, TRUE_LABEL);
7345   subptr(result, 2);
7346   lea(ary1, Address(ary1, 2));
7347 
7348   bind(COMPARE_BYTE);
7349   testl(result, 0x1);   // tail  byte
7350   jccb(Assembler::zero, FALSE_LABEL);
7351   load_unsigned_byte(tmp1, Address(ary1, 0));
7352   andl(tmp1, 0x00000080);
7353   jccb(Assembler::notEqual, TRUE_LABEL);
7354   jmpb(FALSE_LABEL);
7355 
7356   bind(TRUE_LABEL);
7357   movl(result, 1);   // return true
7358   jmpb(DONE);
7359 
7360   bind(FALSE_LABEL);
7361   xorl(result, result); // return false
7362 
7363   // That's it
7364   bind(DONE);
7365   if (UseAVX >= 2) {
7366     // clean upper bits of YMM registers
7367     vpxor(vec1, vec1);
7368     vpxor(vec2, vec2);
7369   }
7370 }
7371 
7372 // Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
7373 void MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
7374                                    Register limit, Register result, Register chr,
7375                                    XMMRegister vec1, XMMRegister vec2, bool is_char) {
7376   ShortBranchVerifier sbv(this);
7377   Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE;
7378 
7379   int length_offset  = arrayOopDesc::length_offset_in_bytes();
7380   int base_offset    = arrayOopDesc::base_offset_in_bytes(is_char ? T_CHAR : T_BYTE);
7381 
7382   if (is_array_equ) {
7383     // Check the input args
7384     cmpptr(ary1, ary2);
7385     jcc(Assembler::equal, TRUE_LABEL);
7386 

7387     // Need additional checks for arrays_equals.
7388     testptr(ary1, ary1);
7389     jcc(Assembler::zero, FALSE_LABEL);
7390     testptr(ary2, ary2);
7391     jcc(Assembler::zero, FALSE_LABEL);
7392 
7393     // Check the lengths
7394     movl(limit, Address(ary1, length_offset));
7395     cmpl(limit, Address(ary2, length_offset));
7396     jcc(Assembler::notEqual, FALSE_LABEL);
7397   }
7398 
7399   // count == 0
7400   testl(limit, limit);
7401   jcc(Assembler::zero, TRUE_LABEL);
7402 
7403   if (is_array_equ) {
7404     // Load array address
7405     lea(ary1, Address(ary1, base_offset));
7406     lea(ary2, Address(ary2, base_offset));
7407   }
7408 
7409   if (is_array_equ && is_char) {
7410     // arrays_equals when used for char[].
7411     shll(limit, 1);      // byte count != 0
7412   }
7413   movl(result, limit); // copy
7414 
7415   if (UseAVX >= 2) {
7416     // With AVX2, use 32-byte vector compare
7417     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
7418 
7419     // Compare 32-byte vectors
7420     andl(result, 0x0000001f);  //   tail count (in bytes)
7421     andl(limit, 0xffffffe0);   // vector count (in bytes)
7422     jccb(Assembler::zero, COMPARE_TAIL);
7423 
7424     lea(ary1, Address(ary1, limit, Address::times_1));
7425     lea(ary2, Address(ary2, limit, Address::times_1));
7426     negptr(limit);
7427 
7428     bind(COMPARE_WIDE_VECTORS);
7429     vmovdqu(vec1, Address(ary1, limit, Address::times_1));
7430     vmovdqu(vec2, Address(ary2, limit, Address::times_1));
7431     vpxor(vec1, vec2);
7432 
7433     vptest(vec1, vec1);
7434     jccb(Assembler::notZero, FALSE_LABEL);
7435     addptr(limit, 32);
7436     jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
7437 
7438     testl(result, result);
7439     jccb(Assembler::zero, TRUE_LABEL);
7440 
7441     vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
7442     vmovdqu(vec2, Address(ary2, result, Address::times_1, -32));
7443     vpxor(vec1, vec2);
7444 
7445     vptest(vec1, vec1);
7446     jccb(Assembler::notZero, FALSE_LABEL);
7447     jmpb(TRUE_LABEL);
7448 
7449     bind(COMPARE_TAIL); // limit is zero
7450     movl(limit, result);
7451     // Fallthru to tail compare
7452   } else if (UseSSE42Intrinsics) {
7453     // With SSE4.2, use double quad vector compare
7454     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
7455 
7456     // Compare 16-byte vectors
7457     andl(result, 0x0000000f);  //   tail count (in bytes)
7458     andl(limit, 0xfffffff0);   // vector count (in bytes)
7459     jccb(Assembler::zero, COMPARE_TAIL);
7460 
7461     lea(ary1, Address(ary1, limit, Address::times_1));
7462     lea(ary2, Address(ary2, limit, Address::times_1));
7463     negptr(limit);
7464 
7465     bind(COMPARE_WIDE_VECTORS);
7466     movdqu(vec1, Address(ary1, limit, Address::times_1));
7467     movdqu(vec2, Address(ary2, limit, Address::times_1));
7468     pxor(vec1, vec2);
7469 
7470     ptest(vec1, vec1);
7471     jccb(Assembler::notZero, FALSE_LABEL);
7472     addptr(limit, 16);
7473     jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
7474 
7475     testl(result, result);
7476     jccb(Assembler::zero, TRUE_LABEL);
7477 


7489   }
7490 
7491   // Compare 4-byte vectors
7492   andl(limit, 0xfffffffc); // vector count (in bytes)
7493   jccb(Assembler::zero, COMPARE_CHAR);
7494 
7495   lea(ary1, Address(ary1, limit, Address::times_1));
7496   lea(ary2, Address(ary2, limit, Address::times_1));
7497   negptr(limit);
7498 
7499   bind(COMPARE_VECTORS);
7500   movl(chr, Address(ary1, limit, Address::times_1));
7501   cmpl(chr, Address(ary2, limit, Address::times_1));
7502   jccb(Assembler::notEqual, FALSE_LABEL);
7503   addptr(limit, 4);
7504   jcc(Assembler::notZero, COMPARE_VECTORS);
7505 
7506   // Compare trailing char (final 2 bytes), if any
7507   bind(COMPARE_CHAR);
7508   testl(result, 0x2);   // tail  char
7509   jccb(Assembler::zero, COMPARE_BYTE);
7510   load_unsigned_short(chr, Address(ary1, 0));
7511   load_unsigned_short(limit, Address(ary2, 0));
7512   cmpl(chr, limit);
7513   jccb(Assembler::notEqual, FALSE_LABEL);
7514 
7515   if (is_array_equ && is_char) {
7516     bind(COMPARE_BYTE);
7517   } else {
7518     lea(ary1, Address(ary1, 2));
7519     lea(ary2, Address(ary2, 2));
7520 
7521     bind(COMPARE_BYTE);
7522     testl(result, 0x1);   // tail  byte
7523     jccb(Assembler::zero, TRUE_LABEL);
7524     load_unsigned_byte(chr, Address(ary1, 0));
7525     load_unsigned_byte(limit, Address(ary2, 0));
7526     cmpl(chr, limit);
7527     jccb(Assembler::notEqual, FALSE_LABEL);
7528   }
7529   bind(TRUE_LABEL);
7530   movl(result, 1);   // return true
7531   jmpb(DONE);
7532 
7533   bind(FALSE_LABEL);
7534   xorl(result, result); // return false
7535 
7536   // That's it
7537   bind(DONE);
7538   if (UseAVX >= 2) {
7539     // clean upper bits of YMM registers
7540     vpxor(vec1, vec1);
7541     vpxor(vec2, vec2);
7542   }
7543 }
7544 
7545 #endif
7546 
7547 void MacroAssembler::generate_fill(BasicType t, bool aligned,
7548                                    Register to, Register value, Register count,
7549                                    Register rtmp, XMMRegister xtmp) {
7550   ShortBranchVerifier sbv(this);
7551   assert_different_registers(to, value, count, rtmp);
7552   Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
7553   Label L_fill_2_bytes, L_fill_4_bytes;
7554 
7555   int shift = -1;
7556   switch (t) {
7557     case T_BYTE:
7558       shift = 2;
7559       break;
7560     case T_SHORT:
7561       shift = 1;
7562       break;
7563     case T_INT:
7564       shift = 0;
7565       break;
7566     default: ShouldNotReachHere();


9529     jmp(L_wordByWord);
9530 
9531   BIND(L_byteByByteProlog);
9532   andl(in2, 0x00000007);
9533   movl(tmp2, 1);
9534 
9535   BIND(L_byteByByte);
9536   cmpl(tmp2, in2);
9537   jccb(Assembler::greater, L_exit);
9538     movb(tmp1, Address(in1, 0));
9539     crc32(in_out, tmp1, 1);
9540     incl(in1);
9541     incl(tmp2);
9542     jmp(L_byteByByte);
9543 
9544   BIND(L_exit);
9545 }
9546 #endif // LP64
9547 #undef BIND
9548 #undef BLOCK_COMMENT
9549 
9550 
9551 // Compress char[] array to byte[].
9552 void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
9553                                          XMMRegister tmp1Reg, XMMRegister tmp2Reg,
9554                                          XMMRegister tmp3Reg, XMMRegister tmp4Reg,
9555                                          Register tmp5, Register result) {
9556   Label copy_chars_loop, return_length, return_zero, done;
9557 
9558   // rsi: src
9559   // rdi: dst
9560   // rdx: len
9561   // rcx: tmp5
9562   // rax: result
9563 
9564   // rsi holds start addr of source char[] to be compressed
9565   // rdi holds start addr of destination byte[]
9566   // rdx holds length
9567 
9568   assert(len != result, "");
9569 
9570   // save length for return
9571   push(len);
9572 
9573   if (UseSSE42Intrinsics) {
9574     Label copy_32_loop, copy_16, copy_tail;
9575 
9576     movl(result, len);
9577     movl(tmp5, 0xff00ff00);   // create mask to test for Unicode chars in vectors
9578 
9579     // vectored compression
9580     andl(len, 0xfffffff0);    // vector count (in chars)
9581     andl(result, 0x0000000f);    // tail count (in chars)
9582     testl(len, len);
9583     jccb(Assembler::zero, copy_16);
9584 
9585     // compress 16 chars per iter
9586     movdl(tmp1Reg, tmp5);
9587     pshufd(tmp1Reg, tmp1Reg, 0);   // store Unicode mask in tmp1Reg
9588     pxor(tmp4Reg, tmp4Reg);
9589 
9590     lea(src, Address(src, len, Address::times_2));
9591     lea(dst, Address(dst, len, Address::times_1));
9592     negptr(len);
9593 
9594     bind(copy_32_loop);
9595     movdqu(tmp2Reg, Address(src, len, Address::times_2));     // load 1st 8 characters
9596     por(tmp4Reg, tmp2Reg);
9597     movdqu(tmp3Reg, Address(src, len, Address::times_2, 16)); // load next 8 characters
9598     por(tmp4Reg, tmp3Reg);
9599     ptest(tmp4Reg, tmp1Reg);       // check for Unicode chars in next vector
9600     jcc(Assembler::notZero, return_zero);
9601     packuswb(tmp2Reg, tmp3Reg);    // only ASCII chars; compress each to 1 byte
9602     movdqu(Address(dst, len, Address::times_1), tmp2Reg);
9603     addptr(len, 16);
9604     jcc(Assembler::notZero, copy_32_loop);
9605 
9606     // compress next vector of 8 chars (if any)
9607     bind(copy_16);
9608     movl(len, result);
9609     andl(len, 0xfffffff8);    // vector count (in chars)
9610     andl(result, 0x00000007);    // tail count (in chars)
9611     testl(len, len);
9612     jccb(Assembler::zero, copy_tail);
9613 
9614     movdl(tmp1Reg, tmp5);
9615     pshufd(tmp1Reg, tmp1Reg, 0);   // store Unicode mask in tmp1Reg
9616     pxor(tmp3Reg, tmp3Reg);
9617 
9618     movdqu(tmp2Reg, Address(src, 0));
9619     ptest(tmp2Reg, tmp1Reg);       // check for Unicode chars in vector
9620     jccb(Assembler::notZero, return_zero);
9621     packuswb(tmp2Reg, tmp3Reg);    // only LATIN1 chars; compress each to 1 byte
9622     movq(Address(dst, 0), tmp2Reg);
9623     addptr(src, 16);
9624     addptr(dst, 8);
9625 
9626     bind(copy_tail);
9627     movl(len, result);
9628   }
9629   // compress 1 char per iter
9630   testl(len, len);
9631   jccb(Assembler::zero, return_length);
9632   lea(src, Address(src, len, Address::times_2));
9633   lea(dst, Address(dst, len, Address::times_1));
9634   negptr(len);
9635 
9636   bind(copy_chars_loop);
9637   load_unsigned_short(result, Address(src, len, Address::times_2));
9638   testl(result, 0xff00);      // check if Unicode char
9639   jccb(Assembler::notZero, return_zero);
9640   movb(Address(dst, len, Address::times_1), result);  // ASCII char; compress to 1 byte
9641   increment(len);
9642   jcc(Assembler::notZero, copy_chars_loop);
9643 
9644   // if compression succeeded, return length
9645   bind(return_length);
9646   pop(result);
9647   jmpb(done);
9648 
9649   // if compression failed, return 0
9650   bind(return_zero);
9651   xorl(result, result);
9652   addptr(rsp, wordSize);
9653 
9654   bind(done);
9655 }
9656 
9657 // Inflate byte[] array to char[].
9658 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
9659                                         XMMRegister tmp1, Register tmp2) {
9660   Label copy_chars_loop, done;
9661 
9662   // rsi: src
9663   // rdi: dst
9664   // rdx: len
9665   // rcx: tmp2
9666 
9667   // rsi holds start addr of source byte[] to be inflated
9668   // rdi holds start addr of destination char[]
9669   // rdx holds length
9670   assert_different_registers(src, dst, len, tmp2);
9671 
9672   if (UseSSE42Intrinsics) {
9673     Label copy_8_loop, copy_bytes, copy_tail;
9674 
9675     movl(tmp2, len);
9676     andl(tmp2, 0x00000007);   // tail count (in chars)
9677     andl(len, 0xfffffff8);    // vector count (in chars)
9678     jccb(Assembler::zero, copy_tail);
9679 
9680     // vectored inflation
9681     lea(src, Address(src, len, Address::times_1));
9682     lea(dst, Address(dst, len, Address::times_2));
9683     negptr(len);
9684 
9685     // inflate 8 chars per iter
9686     bind(copy_8_loop);
9687     pmovzxbw(tmp1, Address(src, len, Address::times_1));  // unpack to 8 words
9688     movdqu(Address(dst, len, Address::times_2), tmp1);
9689     addptr(len, 8);
9690     jcc(Assembler::notZero, copy_8_loop);
9691 
9692     bind(copy_tail);
9693     movl(len, tmp2);
9694 
9695     cmpl(len, 4);
9696     jccb(Assembler::less, copy_bytes);
9697 
9698     movdl(tmp1, Address(src, 0));  // load 4 byte chars
9699     pmovzxbw(tmp1, tmp1);
9700     movq(Address(dst, 0), tmp1);
9701     subptr(len, 4);
9702     addptr(src, 4);
9703     addptr(dst, 8);
9704 
9705     bind(copy_bytes);
9706   }
9707   testl(len, len);
9708   jccb(Assembler::zero, done);
9709   lea(src, Address(src, len, Address::times_1));
9710   lea(dst, Address(dst, len, Address::times_2));
9711   negptr(len);
9712 
9713   // inflate 1 char per iter
9714   bind(copy_chars_loop);
9715   load_unsigned_byte(tmp2, Address(src, len, Address::times_1));  // load byte char
9716   movw(Address(dst, len, Address::times_2), tmp2);  // inflate byte char to word
9717   increment(len);
9718   jcc(Assembler::notZero, copy_chars_loop);
9719 
9720   bind(done);
9721 }
9722 
9723 
9724 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
9725   switch (cond) {
9726     // Note some conditions are synonyms for others
9727     case Assembler::zero:         return Assembler::notZero;
9728     case Assembler::notZero:      return Assembler::zero;
9729     case Assembler::less:         return Assembler::greaterEqual;
9730     case Assembler::lessEqual:    return Assembler::greater;
9731     case Assembler::greater:      return Assembler::lessEqual;
9732     case Assembler::greaterEqual: return Assembler::less;
9733     case Assembler::below:        return Assembler::aboveEqual;
9734     case Assembler::belowEqual:   return Assembler::above;
9735     case Assembler::above:        return Assembler::belowEqual;
9736     case Assembler::aboveEqual:   return Assembler::below;
9737     case Assembler::overflow:     return Assembler::noOverflow;
9738     case Assembler::noOverflow:   return Assembler::overflow;
9739     case Assembler::negative:     return Assembler::positive;
9740     case Assembler::positive:     return Assembler::negative;
9741     case Assembler::parity:       return Assembler::noParity;
< prev index next >