29 #include "gc/shared/cardTableModRefBS.hpp"
30 #include "gc/shared/collectedHeap.inline.hpp"
31 #include "interpreter/interpreter.hpp"
32 #include "memory/resourceArea.hpp"
33 #include "memory/universe.hpp"
34 #include "oops/klass.inline.hpp"
35 #include "prims/methodHandles.hpp"
36 #include "runtime/biasedLocking.hpp"
37 #include "runtime/interfaceSupport.hpp"
38 #include "runtime/objectMonitor.hpp"
39 #include "runtime/os.hpp"
40 #include "runtime/sharedRuntime.hpp"
41 #include "runtime/stubRoutines.hpp"
42 #include "utilities/macros.hpp"
43 #if INCLUDE_ALL_GCS
44 #include "gc/g1/g1CollectedHeap.inline.hpp"
45 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
46 #include "gc/g1/heapRegion.hpp"
47 #endif // INCLUDE_ALL_GCS
48 #include "crc32c.h"
49
50 #ifdef PRODUCT
51 #define BLOCK_COMMENT(str) /* nothing */
52 #define STOP(error) stop(error)
53 #else
54 #define BLOCK_COMMENT(str) block_comment(str)
55 #define STOP(error) block_comment(error); stop(error)
56 #endif
57
58 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
59
60 #ifdef ASSERT
61 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
62 #endif
63
64 static Assembler::Condition reverse[] = {
65 Assembler::noOverflow /* overflow = 0x0 */ ,
66 Assembler::overflow /* noOverflow = 0x1 */ ,
67 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
68 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
6282
6283 }
6284
6285 void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) {
6286 // cnt - number of qwords (8-byte words).
6287 // base - start address, qword aligned.
6288 assert(base==rdi, "base register must be edi for rep stos");
6289 assert(tmp==rax, "tmp register must be eax for rep stos");
6290 assert(cnt==rcx, "cnt register must be ecx for rep stos");
6291
6292 xorptr(tmp, tmp);
6293 if (UseFastStosb) {
6294 shlptr(cnt,3); // convert to number of bytes
6295 rep_stosb();
6296 } else {
6297 NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM
6298 rep_stos();
6299 }
6300 }
6301
6302 // IndexOf for constant substrings with size >= 8 chars
6303 // which don't need to be loaded through stack.
6304 void MacroAssembler::string_indexofC8(Register str1, Register str2,
6305 Register cnt1, Register cnt2,
6306 int int_cnt2, Register result,
6307 XMMRegister vec, Register tmp) {
6308 ShortBranchVerifier sbv(this);
6309 assert(UseSSE42Intrinsics, "SSE4.2 is required");
6310
6311 // This method uses pcmpestri instruction with bound registers
6312 // inputs:
6313 // xmm - substring
6314 // rax - substring length (elements count)
6315 // mem - scanned string
6316 // rdx - string length (elements count)
6317 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
6318 // outputs:
6319 // rcx - matched index in string
6320 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
6321
6322 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
6323 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
6324 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
6325
6326 // Note, inline_string_indexOf() generates checks:
6327 // if (substr.count > string.count) return -1;
6328 // if (substr.count == 0) return 0;
6329 assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
6330
6331 // Load substring.
6332 movdqu(vec, Address(str2, 0));
6333 movl(cnt2, int_cnt2);
6334 movptr(result, str1); // string addr
6335
6336 if (int_cnt2 > 8) {
6337 jmpb(SCAN_TO_SUBSTR);
6338
6339 // Reload substr for rescan, this code
6340 // is executed only for large substrings (> 8 chars)
6341 bind(RELOAD_SUBSTR);
6342 movdqu(vec, Address(str2, 0));
6343 negptr(cnt2); // Jumped here with negative cnt2, convert to positive
6344
6345 bind(RELOAD_STR);
6346 // We came here after the beginning of the substring was
6347 // matched but the rest of it was not so we need to search
6348 // again. Start from the next element after the previous match.
6349
6350 // cnt2 is number of substring reminding elements and
6351 // cnt1 is number of string reminding elements when cmp failed.
6352 // Restored cnt1 = cnt1 - cnt2 + int_cnt2
6353 subl(cnt1, cnt2);
6354 addl(cnt1, int_cnt2);
6355 movl(cnt2, int_cnt2); // Now restore cnt2
6356
6357 decrementl(cnt1); // Shift to next element
6358 cmpl(cnt1, cnt2);
6359 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
6360
6361 addptr(result, 2);
6362
6363 } // (int_cnt2 > 8)
6364
6365 // Scan string for start of substr in 16-byte vectors
6366 bind(SCAN_TO_SUBSTR);
6367 pcmpestri(vec, Address(result, 0), 0x0d);
6368 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1
6369 subl(cnt1, 8);
6370 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
6371 cmpl(cnt1, cnt2);
6372 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
6373 addptr(result, 16);
6374 jmpb(SCAN_TO_SUBSTR);
6375
6376 // Found a potential substr
6377 bind(FOUND_CANDIDATE);
6378 // Matched whole vector if first element matched (tmp(rcx) == 0).
6379 if (int_cnt2 == 8) {
6380 jccb(Assembler::overflow, RET_FOUND); // OF == 1
6381 } else { // int_cnt2 > 8
6382 jccb(Assembler::overflow, FOUND_SUBSTR);
6383 }
6384 // After pcmpestri tmp(rcx) contains matched element index
6385 // Compute start addr of substr
6386 lea(result, Address(result, tmp, Address::times_2));
6387
6388 // Make sure string is still long enough
6389 subl(cnt1, tmp);
6390 cmpl(cnt1, cnt2);
6391 if (int_cnt2 == 8) {
6392 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
6393 } else { // int_cnt2 > 8
6394 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
6395 }
6396 // Left less then substring.
6397
6398 bind(RET_NOT_FOUND);
6399 movl(result, -1);
6400 jmpb(EXIT);
6401
6402 if (int_cnt2 > 8) {
6403 // This code is optimized for the case when whole substring
6404 // is matched if its head is matched.
6405 bind(MATCH_SUBSTR_HEAD);
6406 pcmpestri(vec, Address(result, 0), 0x0d);
6407 // Reload only string if does not match
6408 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
6409
6410 Label CONT_SCAN_SUBSTR;
6411 // Compare the rest of substring (> 8 chars).
6412 bind(FOUND_SUBSTR);
6413 // First 8 chars are already matched.
6414 negptr(cnt2);
6415 addptr(cnt2, 8);
6416
6417 bind(SCAN_SUBSTR);
6418 subl(cnt1, 8);
6419 cmpl(cnt2, -8); // Do not read beyond substring
6420 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
6421 // Back-up strings to avoid reading beyond substring:
6422 // cnt1 = cnt1 - cnt2 + 8
6423 addl(cnt1, cnt2); // cnt2 is negative
6424 addl(cnt1, 8);
6425 movl(cnt2, 8); negptr(cnt2);
6426 bind(CONT_SCAN_SUBSTR);
6427 if (int_cnt2 < (int)G) {
6428 movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
6429 pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
6430 } else {
6431 // calculate index in register to avoid integer overflow (int_cnt2*2)
6432 movl(tmp, int_cnt2);
6433 addptr(tmp, cnt2);
6434 movdqu(vec, Address(str2, tmp, Address::times_2, 0));
6435 pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
6436 }
6437 // Need to reload strings pointers if not matched whole vector
6438 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
6439 addptr(cnt2, 8);
6440 jcc(Assembler::negative, SCAN_SUBSTR);
6441 // Fall through if found full substring
6442
6443 } // (int_cnt2 > 8)
6444
6445 bind(RET_FOUND);
6446 // Found result if we matched full small substring.
6447 // Compute substr offset
6448 subptr(result, str1);
6449 shrl(result, 1); // index
6450 bind(EXIT);
6451
6452 } // string_indexofC8
6453
6454 // Small strings are loaded through stack if they cross page boundary.
6455 void MacroAssembler::string_indexof(Register str1, Register str2,
6456 Register cnt1, Register cnt2,
6457 int int_cnt2, Register result,
6458 XMMRegister vec, Register tmp) {
6459 ShortBranchVerifier sbv(this);
6460 assert(UseSSE42Intrinsics, "SSE4.2 is required");
6461 //
6462 // int_cnt2 is length of small (< 8 chars) constant substring
6463 // or (-1) for non constant substring in which case its length
6464 // is in cnt2 register.
6465 //
6466 // Note, inline_string_indexOf() generates checks:
6467 // if (substr.count > string.count) return -1;
6468 // if (substr.count == 0) return 0;
6469 //
6470 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
6471
6472 // This method uses pcmpestri instruction with bound registers
6473 // inputs:
6474 // xmm - substring
6475 // rax - substring length (elements count)
6476 // mem - scanned string
6477 // rdx - string length (elements count)
6478 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
6479 // outputs:
6480 // rcx - matched index in string
6481 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
6482
6483 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
6484 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
6485 FOUND_CANDIDATE;
6486
6487 { //========================================================
6488 // We don't know where these strings are located
6489 // and we can't read beyond them. Load them through stack.
6490 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
6491
6492 movptr(tmp, rsp); // save old SP
6493
6494 if (int_cnt2 > 0) { // small (< 8 chars) constant substring
6495 if (int_cnt2 == 1) { // One char
6496 load_unsigned_short(result, Address(str2, 0));
6497 movdl(vec, result); // move 32 bits
6498 } else if (int_cnt2 == 2) { // Two chars
6499 movdl(vec, Address(str2, 0)); // move 32 bits
6500 } else if (int_cnt2 == 4) { // Four chars
6501 movq(vec, Address(str2, 0)); // move 64 bits
6502 } else { // cnt2 = { 3, 5, 6, 7 }
6503 // Array header size is 12 bytes in 32-bit VM
6504 // + 6 bytes for 3 chars == 18 bytes,
6505 // enough space to load vec and shift.
6506 assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity");
6507 movdqu(vec, Address(str2, (int_cnt2*2)-16));
6508 psrldq(vec, 16-(int_cnt2*2));
6509 }
6510 } else { // not constant substring
6511 cmpl(cnt2, 8);
6512 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
6513
6514 // We can read beyond string if srt+16 does not cross page boundary
6515 // since heaps are aligned and mapped by pages.
6516 assert(os::vm_page_size() < (int)G, "default page should be small");
6517 movl(result, str2); // We need only low 32 bits
6518 andl(result, (os::vm_page_size()-1));
6519 cmpl(result, (os::vm_page_size()-16));
6520 jccb(Assembler::belowEqual, CHECK_STR);
6521
6522 // Move small strings to stack to allow load 16 bytes into vec.
6523 subptr(rsp, 16);
6524 int stk_offset = wordSize-2;
6525 push(cnt2);
6526
6527 bind(COPY_SUBSTR);
6528 load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
6529 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
6530 decrement(cnt2);
6531 jccb(Assembler::notZero, COPY_SUBSTR);
6532
6533 pop(cnt2);
6534 movptr(str2, rsp); // New substring address
6535 } // non constant
6536
6537 bind(CHECK_STR);
6538 cmpl(cnt1, 8);
6539 jccb(Assembler::aboveEqual, BIG_STRINGS);
6540
6541 // Check cross page boundary.
6542 movl(result, str1); // We need only low 32 bits
6543 andl(result, (os::vm_page_size()-1));
6544 cmpl(result, (os::vm_page_size()-16));
6545 jccb(Assembler::belowEqual, BIG_STRINGS);
6546
6547 subptr(rsp, 16);
6548 int stk_offset = -2;
6549 if (int_cnt2 < 0) { // not constant
6550 push(cnt2);
6551 stk_offset += wordSize;
6552 }
6553 movl(cnt2, cnt1);
6554
6555 bind(COPY_STR);
6556 load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
6557 movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
6558 decrement(cnt2);
6559 jccb(Assembler::notZero, COPY_STR);
6560
6561 if (int_cnt2 < 0) { // not constant
6562 pop(cnt2);
6563 }
6564 movptr(str1, rsp); // New string address
6565
6566 bind(BIG_STRINGS);
6567 // Load substring.
6568 if (int_cnt2 < 0) { // -1
6569 movdqu(vec, Address(str2, 0));
6570 push(cnt2); // substr count
6571 push(str2); // substr addr
6572 push(str1); // string addr
6573 } else {
6574 // Small (< 8 chars) constant substrings are loaded already.
6575 movl(cnt2, int_cnt2);
6576 }
6577 push(tmp); // original SP
6578
6579 } // Finished loading
6580
6581 //========================================================
6582 // Start search
6583 //
6584
6585 movptr(result, str1); // string addr
6586
6587 if (int_cnt2 < 0) { // Only for non constant substring
6588 jmpb(SCAN_TO_SUBSTR);
6589
6590 // SP saved at sp+0
6591 // String saved at sp+1*wordSize
6592 // Substr saved at sp+2*wordSize
6593 // Substr count saved at sp+3*wordSize
6594
6595 // Reload substr for rescan, this code
6596 // is executed only for large substrings (> 8 chars)
6597 bind(RELOAD_SUBSTR);
6598 movptr(str2, Address(rsp, 2*wordSize));
6599 movl(cnt2, Address(rsp, 3*wordSize));
6600 movdqu(vec, Address(str2, 0));
6601 // We came here after the beginning of the substring was
6602 // matched but the rest of it was not so we need to search
6603 // again. Start from the next element after the previous match.
6604 subptr(str1, result); // Restore counter
6605 shrl(str1, 1);
6606 addl(cnt1, str1);
6607 decrementl(cnt1); // Shift to next element
6608 cmpl(cnt1, cnt2);
6609 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
6610
6611 addptr(result, 2);
6612 } // non constant
6613
6614 // Scan string for start of substr in 16-byte vectors
6615 bind(SCAN_TO_SUBSTR);
6616 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
6617 pcmpestri(vec, Address(result, 0), 0x0d);
6618 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1
6619 subl(cnt1, 8);
6620 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
6621 cmpl(cnt1, cnt2);
6622 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
6623 addptr(result, 16);
6624
6625 bind(ADJUST_STR);
6626 cmpl(cnt1, 8); // Do not read beyond string
6627 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
6628 // Back-up string to avoid reading beyond string.
6629 lea(result, Address(result, cnt1, Address::times_2, -16));
6630 movl(cnt1, 8);
6631 jmpb(SCAN_TO_SUBSTR);
6632
6633 // Found a potential substr
6634 bind(FOUND_CANDIDATE);
6635 // After pcmpestri tmp(rcx) contains matched element index
6636
6637 // Make sure string is still long enough
6638 subl(cnt1, tmp);
6639 cmpl(cnt1, cnt2);
6640 jccb(Assembler::greaterEqual, FOUND_SUBSTR);
6641 // Left less then substring.
6642
6643 bind(RET_NOT_FOUND);
6644 movl(result, -1);
6645 jmpb(CLEANUP);
6646
6647 bind(FOUND_SUBSTR);
6648 // Compute start addr of substr
6649 lea(result, Address(result, tmp, Address::times_2));
6650
6651 if (int_cnt2 > 0) { // Constant substring
6652 // Repeat search for small substring (< 8 chars)
6653 // from new point without reloading substring.
6654 // Have to check that we don't read beyond string.
6655 cmpl(tmp, 8-int_cnt2);
6656 jccb(Assembler::greater, ADJUST_STR);
6657 // Fall through if matched whole substring.
6658 } else { // non constant
6659 assert(int_cnt2 == -1, "should be != 0");
6660
6661 addl(tmp, cnt2);
6662 // Found result if we matched whole substring.
6663 cmpl(tmp, 8);
6664 jccb(Assembler::lessEqual, RET_FOUND);
6665
6666 // Repeat search for small substring (<= 8 chars)
6667 // from new point 'str1' without reloading substring.
6668 cmpl(cnt2, 8);
6669 // Have to check that we don't read beyond string.
6670 jccb(Assembler::lessEqual, ADJUST_STR);
6671
6672 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
6673 // Compare the rest of substring (> 8 chars).
6674 movptr(str1, result);
6675
6676 cmpl(tmp, cnt2);
6677 // First 8 chars are already matched.
6678 jccb(Assembler::equal, CHECK_NEXT);
6679
6680 bind(SCAN_SUBSTR);
6681 pcmpestri(vec, Address(str1, 0), 0x0d);
6682 // Need to reload strings pointers if not matched whole vector
6683 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
6684
6685 bind(CHECK_NEXT);
6686 subl(cnt2, 8);
6687 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
6688 addptr(str1, 16);
6689 addptr(str2, 16);
6690 subl(cnt1, 8);
6691 cmpl(cnt2, 8); // Do not read beyond substring
6692 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
6693 // Back-up strings to avoid reading beyond substring.
6694 lea(str2, Address(str2, cnt2, Address::times_2, -16));
6695 lea(str1, Address(str1, cnt2, Address::times_2, -16));
6696 subl(cnt1, cnt2);
6697 movl(cnt2, 8);
6698 addl(cnt1, 8);
6699 bind(CONT_SCAN_SUBSTR);
6700 movdqu(vec, Address(str2, 0));
6701 jmpb(SCAN_SUBSTR);
6702
6703 bind(RET_FOUND_LONG);
6704 movptr(str1, Address(rsp, wordSize));
6705 } // non constant
6706
6707 bind(RET_FOUND);
6708 // Compute substr offset
6709 subptr(result, str1);
6710 shrl(result, 1); // index
6711
6712 bind(CLEANUP);
6713 pop(rsp); // restore SP
6714
6715 } // string_indexof
6716
6717 // Compare strings.
6718 void MacroAssembler::string_compare(Register str1, Register str2,
6719 Register cnt1, Register cnt2, Register result,
6720 XMMRegister vec1) {
6721 ShortBranchVerifier sbv(this);
6722 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
6723
6724 // Compute the minimum of the string lengths and the
6725 // difference of the string lengths (stack).
6726 // Do the conditional move stuff
6727 movl(result, cnt1);
6728 subl(cnt1, cnt2);
6729 push(cnt1);
6730 cmov32(Assembler::lessEqual, cnt2, result);
6731
6732 // Is the minimum length zero?
6733 testl(cnt2, cnt2);
6734 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
6735
6736 // Compare first characters
6737 load_unsigned_short(result, Address(str1, 0));
6738 load_unsigned_short(cnt1, Address(str2, 0));
6739 subl(result, cnt1);
6740 jcc(Assembler::notZero, POP_LABEL);
6741 cmpl(cnt2, 1);
6742 jcc(Assembler::equal, LENGTH_DIFF_LABEL);
6743
6744 // Check if the strings start at the same location.
6745 cmpptr(str1, str2);
6746 jcc(Assembler::equal, LENGTH_DIFF_LABEL);
6747
6748 Address::ScaleFactor scale = Address::times_2;
6749 int stride = 8;
6750
6751 if (UseAVX >= 2 && UseSSE42Intrinsics) {
6752 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
6753 Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
6754 Label COMPARE_TAIL_LONG;
6755 int pcmpmask = 0x19;
6756
6757 // Setup to compare 16-chars (32-bytes) vectors,
6758 // start from first character again because it has aligned address.
6759 int stride2 = 16;
6760 int adr_stride = stride << scale;
6761
6762 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
6763 // rax and rdx are used by pcmpestri as elements counters
6764 movl(result, cnt2);
6765 andl(cnt2, ~(stride2-1)); // cnt2 holds the vector count
6766 jcc(Assembler::zero, COMPARE_TAIL_LONG);
6767
6768 // fast path : compare first 2 8-char vectors.
6769 bind(COMPARE_16_CHARS);
6770 movdqu(vec1, Address(str1, 0));
6771 pcmpestri(vec1, Address(str2, 0), pcmpmask);
6772 jccb(Assembler::below, COMPARE_INDEX_CHAR);
6773
6774 movdqu(vec1, Address(str1, adr_stride));
6775 pcmpestri(vec1, Address(str2, adr_stride), pcmpmask);
6776 jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS);
6777 addl(cnt1, stride);
6778
6779 // Compare the characters at index in cnt1
6780 bind(COMPARE_INDEX_CHAR); //cnt1 has the offset of the mismatching character
6781 load_unsigned_short(result, Address(str1, cnt1, scale));
6782 load_unsigned_short(cnt2, Address(str2, cnt1, scale));
6783 subl(result, cnt2);
6784 jmp(POP_LABEL);
6785
6786 // Setup the registers to start vector comparison loop
6787 bind(COMPARE_WIDE_VECTORS);
6788 lea(str1, Address(str1, result, scale));
6789 lea(str2, Address(str2, result, scale));
6790 subl(result, stride2);
6791 subl(cnt2, stride2);
6792 jccb(Assembler::zero, COMPARE_WIDE_TAIL);
6793 negptr(result);
6794
6795 // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
6796 bind(COMPARE_WIDE_VECTORS_LOOP);
6797 vmovdqu(vec1, Address(str1, result, scale));
6798 vpxor(vec1, Address(str2, result, scale));
6799 vptest(vec1, vec1);
6800 jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
6801 addptr(result, stride2);
6802 subl(cnt2, stride2);
6803 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
6804 // clean upper bits of YMM registers
6805 vpxor(vec1, vec1);
6806
6807 // compare wide vectors tail
6808 bind(COMPARE_WIDE_TAIL);
6809 testptr(result, result);
6810 jccb(Assembler::zero, LENGTH_DIFF_LABEL);
6811
6812 movl(result, stride2);
6813 movl(cnt2, result);
6814 negptr(result);
6815 jmpb(COMPARE_WIDE_VECTORS_LOOP);
6816
6817 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
6818 bind(VECTOR_NOT_EQUAL);
6819 // clean upper bits of YMM registers
6820 vpxor(vec1, vec1);
6821 lea(str1, Address(str1, result, scale));
6822 lea(str2, Address(str2, result, scale));
6823 jmp(COMPARE_16_CHARS);
6824
6825 // Compare tail chars, length between 1 to 15 chars
6826 bind(COMPARE_TAIL_LONG);
6827 movl(cnt2, result);
6828 cmpl(cnt2, stride);
6829 jccb(Assembler::less, COMPARE_SMALL_STR);
6830
6831 movdqu(vec1, Address(str1, 0));
6832 pcmpestri(vec1, Address(str2, 0), pcmpmask);
6833 jcc(Assembler::below, COMPARE_INDEX_CHAR);
6834 subptr(cnt2, stride);
6835 jccb(Assembler::zero, LENGTH_DIFF_LABEL);
6836 lea(str1, Address(str1, result, scale));
6837 lea(str2, Address(str2, result, scale));
6838 negptr(cnt2);
6839 jmpb(WHILE_HEAD_LABEL);
6840
6841 bind(COMPARE_SMALL_STR);
6842 } else if (UseSSE42Intrinsics) {
6843 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
6844 int pcmpmask = 0x19;
6845 // Setup to compare 8-char (16-byte) vectors,
6846 // start from first character again because it has aligned address.
6847 movl(result, cnt2);
6848 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count
6849 jccb(Assembler::zero, COMPARE_TAIL);
6850
6851 lea(str1, Address(str1, result, scale));
6852 lea(str2, Address(str2, result, scale));
6853 negptr(result);
6854
6855 // pcmpestri
6856 // inputs:
6857 // vec1- substring
6858 // rax - negative string length (elements count)
6859 // mem - scanned string
6860 // rdx - string length (elements count)
6861 // pcmpmask - cmp mode: 11000 (string compare with negated result)
6862 // + 00 (unsigned bytes) or + 01 (unsigned shorts)
6863 // outputs:
6864 // rcx - first mismatched element index
6865 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
6866
6867 bind(COMPARE_WIDE_VECTORS);
6868 movdqu(vec1, Address(str1, result, scale));
6869 pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
6870 // After pcmpestri cnt1(rcx) contains mismatched element index
6871
6872 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1
6873 addptr(result, stride);
6874 subptr(cnt2, stride);
6875 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
6876
6877 // compare wide vectors tail
6878 testptr(result, result);
6879 jccb(Assembler::zero, LENGTH_DIFF_LABEL);
6880
6881 movl(cnt2, stride);
6882 movl(result, stride);
6883 negptr(result);
6884 movdqu(vec1, Address(str1, result, scale));
6885 pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
6886 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
6887
6888 // Mismatched characters in the vectors
6889 bind(VECTOR_NOT_EQUAL);
6890 addptr(cnt1, result);
6891 load_unsigned_short(result, Address(str1, cnt1, scale));
6892 load_unsigned_short(cnt2, Address(str2, cnt1, scale));
6893 subl(result, cnt2);
6894 jmpb(POP_LABEL);
6895
6896 bind(COMPARE_TAIL); // limit is zero
6897 movl(cnt2, result);
6898 // Fallthru to tail compare
6899 }
6900 // Shift str2 and str1 to the end of the arrays, negate min
6901 lea(str1, Address(str1, cnt2, scale));
6902 lea(str2, Address(str2, cnt2, scale));
6903 decrementl(cnt2); // first character was compared already
6904 negptr(cnt2);
6905
6906 // Compare the rest of the elements
6907 bind(WHILE_HEAD_LABEL);
6908 load_unsigned_short(result, Address(str1, cnt2, scale, 0));
6909 load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));
6910 subl(result, cnt1);
6911 jccb(Assembler::notZero, POP_LABEL);
6912 increment(cnt2);
6913 jccb(Assembler::notZero, WHILE_HEAD_LABEL);
6914
6915 // Strings are equal up to min length. Return the length difference.
6916 bind(LENGTH_DIFF_LABEL);
6917 pop(result);
6918 jmpb(DONE_LABEL);
6919
6920 // Discard the stored length difference
6921 bind(POP_LABEL);
6922 pop(cnt1);
6923
6924 // That's it
6925 bind(DONE_LABEL);
6926 }
6927
6928 // Compare char[] arrays aligned to 4 bytes or substrings.
6929 void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
6930 Register limit, Register result, Register chr,
6931 XMMRegister vec1, XMMRegister vec2) {
6932 ShortBranchVerifier sbv(this);
6933 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
6934
6935 int length_offset = arrayOopDesc::length_offset_in_bytes();
6936 int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
6937
6938 // Check the input args
6939 cmpptr(ary1, ary2);
6940 jcc(Assembler::equal, TRUE_LABEL);
6941
6942 if (is_array_equ) {
6943 // Need additional checks for arrays_equals.
6944 testptr(ary1, ary1);
6945 jcc(Assembler::zero, FALSE_LABEL);
6946 testptr(ary2, ary2);
6947 jcc(Assembler::zero, FALSE_LABEL);
6948
6949 // Check the lengths
6950 movl(limit, Address(ary1, length_offset));
6951 cmpl(limit, Address(ary2, length_offset));
6952 jcc(Assembler::notEqual, FALSE_LABEL);
6953 }
6954
6955 // count == 0
6956 testl(limit, limit);
6957 jcc(Assembler::zero, TRUE_LABEL);
6958
6959 if (is_array_equ) {
6960 // Load array address
6961 lea(ary1, Address(ary1, base_offset));
6962 lea(ary2, Address(ary2, base_offset));
6963 }
6964
6965 shll(limit, 1); // byte count != 0
6966 movl(result, limit); // copy
6967
6968 if (UseAVX >= 2) {
6969 // With AVX2, use 32-byte vector compare
6970 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
6971
6972 // Compare 32-byte vectors
6973 andl(result, 0x0000001e); // tail count (in bytes)
6974 andl(limit, 0xffffffe0); // vector count (in bytes)
6975 jccb(Assembler::zero, COMPARE_TAIL);
6976
6977 lea(ary1, Address(ary1, limit, Address::times_1));
6978 lea(ary2, Address(ary2, limit, Address::times_1));
6979 negptr(limit);
6980
6981 bind(COMPARE_WIDE_VECTORS);
6982 vmovdqu(vec1, Address(ary1, limit, Address::times_1));
6983 vmovdqu(vec2, Address(ary2, limit, Address::times_1));
6984 vpxor(vec1, vec2);
6985
6986 vptest(vec1, vec1);
6987 jccb(Assembler::notZero, FALSE_LABEL);
6988 addptr(limit, 32);
6989 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
6990
6991 testl(result, result);
6992 jccb(Assembler::zero, TRUE_LABEL);
6993
6994 vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
6995 vmovdqu(vec2, Address(ary2, result, Address::times_1, -32));
6996 vpxor(vec1, vec2);
6997
6998 vptest(vec1, vec1);
6999 jccb(Assembler::notZero, FALSE_LABEL);
7000 jmpb(TRUE_LABEL);
7001
7002 bind(COMPARE_TAIL); // limit is zero
7003 movl(limit, result);
7004 // Fallthru to tail compare
7005 } else if (UseSSE42Intrinsics) {
7006 // With SSE4.2, use double quad vector compare
7007 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
7008
7009 // Compare 16-byte vectors
7010 andl(result, 0x0000000e); // tail count (in bytes)
7011 andl(limit, 0xfffffff0); // vector count (in bytes)
7012 jccb(Assembler::zero, COMPARE_TAIL);
7013
7014 lea(ary1, Address(ary1, limit, Address::times_1));
7015 lea(ary2, Address(ary2, limit, Address::times_1));
7016 negptr(limit);
7017
7018 bind(COMPARE_WIDE_VECTORS);
7019 movdqu(vec1, Address(ary1, limit, Address::times_1));
7020 movdqu(vec2, Address(ary2, limit, Address::times_1));
7021 pxor(vec1, vec2);
7022
7023 ptest(vec1, vec1);
7024 jccb(Assembler::notZero, FALSE_LABEL);
7025 addptr(limit, 16);
7026 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
7027
7028 testl(result, result);
7029 jccb(Assembler::zero, TRUE_LABEL);
7030
7042 }
7043
7044 // Compare 4-byte vectors
7045 andl(limit, 0xfffffffc); // vector count (in bytes)
7046 jccb(Assembler::zero, COMPARE_CHAR);
7047
7048 lea(ary1, Address(ary1, limit, Address::times_1));
7049 lea(ary2, Address(ary2, limit, Address::times_1));
7050 negptr(limit);
7051
7052 bind(COMPARE_VECTORS);
7053 movl(chr, Address(ary1, limit, Address::times_1));
7054 cmpl(chr, Address(ary2, limit, Address::times_1));
7055 jccb(Assembler::notEqual, FALSE_LABEL);
7056 addptr(limit, 4);
7057 jcc(Assembler::notZero, COMPARE_VECTORS);
7058
7059 // Compare trailing char (final 2 bytes), if any
7060 bind(COMPARE_CHAR);
7061 testl(result, 0x2); // tail char
7062 jccb(Assembler::zero, TRUE_LABEL);
7063 load_unsigned_short(chr, Address(ary1, 0));
7064 load_unsigned_short(limit, Address(ary2, 0));
7065 cmpl(chr, limit);
7066 jccb(Assembler::notEqual, FALSE_LABEL);
7067
7068 bind(TRUE_LABEL);
7069 movl(result, 1); // return true
7070 jmpb(DONE);
7071
7072 bind(FALSE_LABEL);
7073 xorl(result, result); // return false
7074
7075 // That's it
7076 bind(DONE);
7077 if (UseAVX >= 2) {
7078 // clean upper bits of YMM registers
7079 vpxor(vec1, vec1);
7080 vpxor(vec2, vec2);
7081 }
7082 }
7083
7084 void MacroAssembler::generate_fill(BasicType t, bool aligned,
7085 Register to, Register value, Register count,
7086 Register rtmp, XMMRegister xtmp) {
7087 ShortBranchVerifier sbv(this);
7088 assert_different_registers(to, value, count, rtmp);
7089 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
7090 Label L_fill_2_bytes, L_fill_4_bytes;
7091
7092 int shift = -1;
7093 switch (t) {
7094 case T_BYTE:
7095 shift = 2;
7096 break;
7097 case T_SHORT:
7098 shift = 1;
7099 break;
7100 case T_INT:
7101 shift = 0;
7102 break;
7103 default: ShouldNotReachHere();
9066 jmp(L_wordByWord);
9067
9068 BIND(L_byteByByteProlog);
9069 andl(in2, 0x00000007);
9070 movl(tmp2, 1);
9071
9072 BIND(L_byteByByte);
9073 cmpl(tmp2, in2);
9074 jccb(Assembler::greater, L_exit);
9075 movb(tmp1, Address(in1, 0));
9076 crc32(in_out, tmp1, 1);
9077 incl(in1);
9078 incl(tmp2);
9079 jmp(L_byteByByte);
9080
9081 BIND(L_exit);
9082 }
9083 #endif // LP64
9084 #undef BIND
9085 #undef BLOCK_COMMENT
9086
9087
9088 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
9089 switch (cond) {
9090 // Note some conditions are synonyms for others
9091 case Assembler::zero: return Assembler::notZero;
9092 case Assembler::notZero: return Assembler::zero;
9093 case Assembler::less: return Assembler::greaterEqual;
9094 case Assembler::lessEqual: return Assembler::greater;
9095 case Assembler::greater: return Assembler::lessEqual;
9096 case Assembler::greaterEqual: return Assembler::less;
9097 case Assembler::below: return Assembler::aboveEqual;
9098 case Assembler::belowEqual: return Assembler::above;
9099 case Assembler::above: return Assembler::belowEqual;
9100 case Assembler::aboveEqual: return Assembler::below;
9101 case Assembler::overflow: return Assembler::noOverflow;
9102 case Assembler::noOverflow: return Assembler::overflow;
9103 case Assembler::negative: return Assembler::positive;
9104 case Assembler::positive: return Assembler::negative;
9105 case Assembler::parity: return Assembler::noParity;
|
29 #include "gc/shared/cardTableModRefBS.hpp"
30 #include "gc/shared/collectedHeap.inline.hpp"
31 #include "interpreter/interpreter.hpp"
32 #include "memory/resourceArea.hpp"
33 #include "memory/universe.hpp"
34 #include "oops/klass.inline.hpp"
35 #include "prims/methodHandles.hpp"
36 #include "runtime/biasedLocking.hpp"
37 #include "runtime/interfaceSupport.hpp"
38 #include "runtime/objectMonitor.hpp"
39 #include "runtime/os.hpp"
40 #include "runtime/sharedRuntime.hpp"
41 #include "runtime/stubRoutines.hpp"
42 #include "utilities/macros.hpp"
43 #if INCLUDE_ALL_GCS
44 #include "gc/g1/g1CollectedHeap.inline.hpp"
45 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
46 #include "gc/g1/heapRegion.hpp"
47 #endif // INCLUDE_ALL_GCS
48 #include "crc32c.h"
49 #ifdef COMPILER2
50 #include "opto/intrinsicnode.hpp"
51 #endif
52
53 #ifdef PRODUCT
54 #define BLOCK_COMMENT(str) /* nothing */
55 #define STOP(error) stop(error)
56 #else
57 #define BLOCK_COMMENT(str) block_comment(str)
58 #define STOP(error) block_comment(error); stop(error)
59 #endif
60
61 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
62
63 #ifdef ASSERT
64 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
65 #endif
66
67 static Assembler::Condition reverse[] = {
68 Assembler::noOverflow /* overflow = 0x0 */ ,
69 Assembler::overflow /* noOverflow = 0x1 */ ,
70 Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ ,
71 Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ ,
6285
6286 }
6287
6288 void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) {
6289 // cnt - number of qwords (8-byte words).
6290 // base - start address, qword aligned.
6291 assert(base==rdi, "base register must be edi for rep stos");
6292 assert(tmp==rax, "tmp register must be eax for rep stos");
6293 assert(cnt==rcx, "cnt register must be ecx for rep stos");
6294
6295 xorptr(tmp, tmp);
6296 if (UseFastStosb) {
6297 shlptr(cnt,3); // convert to number of bytes
6298 rep_stosb();
6299 } else {
6300 NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM
6301 rep_stos();
6302 }
6303 }
6304
6305 #ifdef COMPILER2
6306
6307 // IndexOf for constant substrings with size >= 8 chars
6308 // which don't need to be loaded through stack.
6309 void MacroAssembler::string_indexofC8(Register str1, Register str2,
6310 Register cnt1, Register cnt2,
6311 int int_cnt2, Register result,
6312 XMMRegister vec, Register tmp,
6313 int ae) {
6314 ShortBranchVerifier sbv(this);
6315 assert(UseSSE42Intrinsics, "SSE4.2 is required");
6316 assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
6317
6318 // This method uses the pcmpestri instruction with bound registers
6319 // inputs:
6320 // xmm - substring
6321 // rax - substring length (elements count)
6322 // mem - scanned string
6323 // rdx - string length (elements count)
6324 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
6325 // 0xc - mode: 1100 (substring search) + 00 (unsigned bytes)
6326 // outputs:
6327 // rcx - matched index in string
6328 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
6329 int mode = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts
6330 int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
6331 Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2;
6332 Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1;
6333
6334 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
6335 RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
6336 MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE;
6337
6338 // Note, inline_string_indexOf() generates checks:
6339 // if (substr.count > string.count) return -1;
6340 // if (substr.count == 0) return 0;
6341 assert(int_cnt2 >= stride, "this code is used only for cnt2 >= 8 chars");
6342
6343 // Load substring.
6344 if (ae == StrIntrinsicNode::UL) {
6345 pmovzxbw(vec, Address(str2, 0));
6346 } else {
6347 movdqu(vec, Address(str2, 0));
6348 }
6349 movl(cnt2, int_cnt2);
6350 movptr(result, str1); // string addr
6351
6352 if (int_cnt2 > stride) {
6353 jmpb(SCAN_TO_SUBSTR);
6354
6355 // Reload substr for rescan, this code
6356 // is executed only for large substrings (> 8 chars)
6357 bind(RELOAD_SUBSTR);
6358 if (ae == StrIntrinsicNode::UL) {
6359 pmovzxbw(vec, Address(str2, 0));
6360 } else {
6361 movdqu(vec, Address(str2, 0));
6362 }
6363 negptr(cnt2); // Jumped here with negative cnt2, convert to positive
6364
6365 bind(RELOAD_STR);
6366 // We came here after the beginning of the substring was
6367 // matched but the rest of it was not so we need to search
6368 // again. Start from the next element after the previous match.
6369
6370 // cnt2 is number of substring reminding elements and
6371 // cnt1 is number of string reminding elements when cmp failed.
6372 // Restored cnt1 = cnt1 - cnt2 + int_cnt2
6373 subl(cnt1, cnt2);
6374 addl(cnt1, int_cnt2);
6375 movl(cnt2, int_cnt2); // Now restore cnt2
6376
6377 decrementl(cnt1); // Shift to next element
6378 cmpl(cnt1, cnt2);
6379 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
6380
6381 addptr(result, (1<<scale1));
6382
6383 } // (int_cnt2 > 8)
6384
6385 // Scan string for start of substr in 16-byte vectors
6386 bind(SCAN_TO_SUBSTR);
6387 pcmpestri(vec, Address(result, 0), mode);
6388 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1
6389 subl(cnt1, stride);
6390 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
6391 cmpl(cnt1, cnt2);
6392 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
6393 addptr(result, 16);
6394 jmpb(SCAN_TO_SUBSTR);
6395
6396 // Found a potential substr
6397 bind(FOUND_CANDIDATE);
6398 // Matched whole vector if first element matched (tmp(rcx) == 0).
6399 if (int_cnt2 == stride) {
6400 jccb(Assembler::overflow, RET_FOUND); // OF == 1
6401 } else { // int_cnt2 > 8
6402 jccb(Assembler::overflow, FOUND_SUBSTR);
6403 }
6404 // After pcmpestri tmp(rcx) contains matched element index
6405 // Compute start addr of substr
6406 lea(result, Address(result, tmp, scale1));
6407
6408 // Make sure string is still long enough
6409 subl(cnt1, tmp);
6410 cmpl(cnt1, cnt2);
6411 if (int_cnt2 == stride) {
6412 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
6413 } else { // int_cnt2 > 8
6414 jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
6415 }
6416 // Left less then substring.
6417
6418 bind(RET_NOT_FOUND);
6419 movl(result, -1);
6420 jmpb(EXIT);
6421
6422 if (int_cnt2 > stride) {
6423 // This code is optimized for the case when whole substring
6424 // is matched if its head is matched.
6425 bind(MATCH_SUBSTR_HEAD);
6426 pcmpestri(vec, Address(result, 0), mode);
6427 // Reload only string if does not match
6428 jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
6429
6430 Label CONT_SCAN_SUBSTR;
6431 // Compare the rest of substring (> 8 chars).
6432 bind(FOUND_SUBSTR);
6433 // First 8 chars are already matched.
6434 negptr(cnt2);
6435 addptr(cnt2, stride);
6436
6437 bind(SCAN_SUBSTR);
6438 subl(cnt1, stride);
6439 cmpl(cnt2, -stride); // Do not read beyond substring
6440 jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
6441 // Back-up strings to avoid reading beyond substring:
6442 // cnt1 = cnt1 - cnt2 + 8
6443 addl(cnt1, cnt2); // cnt2 is negative
6444 addl(cnt1, stride);
6445 movl(cnt2, stride); negptr(cnt2);
6446 bind(CONT_SCAN_SUBSTR);
6447 if (int_cnt2 < (int)G) {
6448 int tail_off1 = int_cnt2<<scale1;
6449 int tail_off2 = int_cnt2<<scale2;
6450 if (ae == StrIntrinsicNode::UL) {
6451 pmovzxbw(vec, Address(str2, cnt2, scale2, tail_off2));
6452 } else {
6453 movdqu(vec, Address(str2, cnt2, scale2, tail_off2));
6454 }
6455 pcmpestri(vec, Address(result, cnt2, scale1, tail_off1), mode);
6456 } else {
6457 // calculate index in register to avoid integer overflow (int_cnt2*2)
6458 movl(tmp, int_cnt2);
6459 addptr(tmp, cnt2);
6460 if (ae == StrIntrinsicNode::UL) {
6461 pmovzxbw(vec, Address(str2, tmp, scale2, 0));
6462 } else {
6463 movdqu(vec, Address(str2, tmp, scale2, 0));
6464 }
6465 pcmpestri(vec, Address(result, tmp, scale1, 0), mode);
6466 }
6467 // Need to reload strings pointers if not matched whole vector
6468 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
6469 addptr(cnt2, stride);
6470 jcc(Assembler::negative, SCAN_SUBSTR);
6471 // Fall through if found full substring
6472
6473 } // (int_cnt2 > 8)
6474
6475 bind(RET_FOUND);
6476 // Found result if we matched full small substring.
6477 // Compute substr offset
6478 subptr(result, str1);
6479 if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
6480 shrl(result, 1); // index
6481 }
6482 bind(EXIT);
6483
6484 } // string_indexofC8
6485
6486 // Small strings are loaded through stack if they cross page boundary.
6487 void MacroAssembler::string_indexof(Register str1, Register str2,
6488 Register cnt1, Register cnt2,
6489 int int_cnt2, Register result,
6490 XMMRegister vec, Register tmp,
6491 int ae) {
6492 ShortBranchVerifier sbv(this);
6493 assert(UseSSE42Intrinsics, "SSE4.2 is required");
6494 assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
6495
6496 //
6497 // int_cnt2 is length of small (< 8 chars) constant substring
6498 // or (-1) for non constant substring in which case its length
6499 // is in cnt2 register.
6500 //
6501 // Note, inline_string_indexOf() generates checks:
6502 // if (substr.count > string.count) return -1;
6503 // if (substr.count == 0) return 0;
6504 //
6505 int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
6506 assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < stride), "should be != 0");
6507 // This method uses the pcmpestri instruction with bound registers
6508 // inputs:
6509 // xmm - substring
6510 // rax - substring length (elements count)
6511 // mem - scanned string
6512 // rdx - string length (elements count)
6513 // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
6514 // 0xc - mode: 1100 (substring search) + 00 (unsigned bytes)
6515 // outputs:
6516 // rcx - matched index in string
6517 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
6518 int mode = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts
6519 Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2;
6520 Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1;
6521
6522 Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
6523 RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
6524 FOUND_CANDIDATE;
6525
6526 { //========================================================
6527 // We don't know where these strings are located
6528 // and we can't read beyond them. Load them through stack.
6529 Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR;
6530
6531 movptr(tmp, rsp); // save old SP
6532
6533 if (int_cnt2 > 0) { // small (< 8 chars) constant substring
6534 if (int_cnt2 == (1>>scale2)) { // One byte
6535 assert((ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL), "Only possible for latin1 encoding");
6536 load_unsigned_byte(result, Address(str2, 0));
6537 movdl(vec, result); // move 32 bits
6538 } else if (ae == StrIntrinsicNode::LL && int_cnt2 == 3) { // Three bytes
6539 // Not enough header space in 32-bit VM: 12+3 = 15.
6540 movl(result, Address(str2, -1));
6541 shrl(result, 8);
6542 movdl(vec, result); // move 32 bits
6543 } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (2>>scale2)) { // One char
6544 load_unsigned_short(result, Address(str2, 0));
6545 movdl(vec, result); // move 32 bits
6546 } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (4>>scale2)) { // Two chars
6547 movdl(vec, Address(str2, 0)); // move 32 bits
6548 } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (8>>scale2)) { // Four chars
6549 movq(vec, Address(str2, 0)); // move 64 bits
6550 } else { // cnt2 = { 3, 5, 6, 7 } || (ae == StrIntrinsicNode::UL && cnt2 ={2, ..., 7})
6551 // Array header size is 12 bytes in 32-bit VM
6552 // + 6 bytes for 3 chars == 18 bytes,
6553 // enough space to load vec and shift.
6554 assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity");
6555 if (ae == StrIntrinsicNode::UL) {
6556 int tail_off = int_cnt2-8;
6557 pmovzxbw(vec, Address(str2, tail_off));
6558 psrldq(vec, -2*tail_off);
6559 }
6560 else {
6561 int tail_off = int_cnt2*(1<<scale2);
6562 movdqu(vec, Address(str2, tail_off-16));
6563 psrldq(vec, 16-tail_off);
6564 }
6565 }
6566 } else { // not constant substring
6567 cmpl(cnt2, stride);
6568 jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
6569
6570 // We can read beyond string if srt+16 does not cross page boundary
6571 // since heaps are aligned and mapped by pages.
6572 assert(os::vm_page_size() < (int)G, "default page should be small");
6573 movl(result, str2); // We need only low 32 bits
6574 andl(result, (os::vm_page_size()-1));
6575 cmpl(result, (os::vm_page_size()-16));
6576 jccb(Assembler::belowEqual, CHECK_STR);
6577
6578 // Move small strings to stack to allow load 16 bytes into vec.
6579 subptr(rsp, 16);
6580 int stk_offset = wordSize-(1<<scale2);
6581 push(cnt2);
6582
6583 bind(COPY_SUBSTR);
6584 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL) {
6585 load_unsigned_byte(result, Address(str2, cnt2, scale2, -1));
6586 movb(Address(rsp, cnt2, scale2, stk_offset), result);
6587 } else if (ae == StrIntrinsicNode::UU) {
6588 load_unsigned_short(result, Address(str2, cnt2, scale2, -2));
6589 movw(Address(rsp, cnt2, scale2, stk_offset), result);
6590 }
6591 decrement(cnt2);
6592 jccb(Assembler::notZero, COPY_SUBSTR);
6593
6594 pop(cnt2);
6595 movptr(str2, rsp); // New substring address
6596 } // non constant
6597
6598 bind(CHECK_STR);
6599 cmpl(cnt1, stride);
6600 jccb(Assembler::aboveEqual, BIG_STRINGS);
6601
6602 // Check cross page boundary.
6603 movl(result, str1); // We need only low 32 bits
6604 andl(result, (os::vm_page_size()-1));
6605 cmpl(result, (os::vm_page_size()-16));
6606 jccb(Assembler::belowEqual, BIG_STRINGS);
6607
6608 subptr(rsp, 16);
6609 int stk_offset = -(1<<scale1);
6610 if (int_cnt2 < 0) { // not constant
6611 push(cnt2);
6612 stk_offset += wordSize;
6613 }
6614 movl(cnt2, cnt1);
6615
6616 bind(COPY_STR);
6617 if (ae == StrIntrinsicNode::LL) {
6618 load_unsigned_byte(result, Address(str1, cnt2, scale1, -1));
6619 movb(Address(rsp, cnt2, scale1, stk_offset), result);
6620 } else {
6621 load_unsigned_short(result, Address(str1, cnt2, scale1, -2));
6622 movw(Address(rsp, cnt2, scale1, stk_offset), result);
6623 }
6624 decrement(cnt2);
6625 jccb(Assembler::notZero, COPY_STR);
6626
6627 if (int_cnt2 < 0) { // not constant
6628 pop(cnt2);
6629 }
6630 movptr(str1, rsp); // New string address
6631
6632 bind(BIG_STRINGS);
6633 // Load substring.
6634 if (int_cnt2 < 0) { // -1
6635 if (ae == StrIntrinsicNode::UL) {
6636 pmovzxbw(vec, Address(str2, 0));
6637 } else {
6638 movdqu(vec, Address(str2, 0));
6639 }
6640 push(cnt2); // substr count
6641 push(str2); // substr addr
6642 push(str1); // string addr
6643 } else {
6644 // Small (< 8 chars) constant substrings are loaded already.
6645 movl(cnt2, int_cnt2);
6646 }
6647 push(tmp); // original SP
6648
6649 } // Finished loading
6650
6651 //========================================================
6652 // Start search
6653 //
6654
6655 movptr(result, str1); // string addr
6656
6657 if (int_cnt2 < 0) { // Only for non constant substring
6658 jmpb(SCAN_TO_SUBSTR);
6659
6660 // SP saved at sp+0
6661 // String saved at sp+1*wordSize
6662 // Substr saved at sp+2*wordSize
6663 // Substr count saved at sp+3*wordSize
6664
6665 // Reload substr for rescan, this code
6666 // is executed only for large substrings (> 8 chars)
6667 bind(RELOAD_SUBSTR);
6668 movptr(str2, Address(rsp, 2*wordSize));
6669 movl(cnt2, Address(rsp, 3*wordSize));
6670 if (ae == StrIntrinsicNode::UL) {
6671 pmovzxbw(vec, Address(str2, 0));
6672 } else {
6673 movdqu(vec, Address(str2, 0));
6674 }
6675 // We came here after the beginning of the substring was
6676 // matched but the rest of it was not so we need to search
6677 // again. Start from the next element after the previous match.
6678 subptr(str1, result); // Restore counter
6679 if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
6680 shrl(str1, 1);
6681 }
6682 addl(cnt1, str1);
6683 decrementl(cnt1); // Shift to next element
6684 cmpl(cnt1, cnt2);
6685 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
6686
6687 addptr(result, (1<<scale1));
6688 } // non constant
6689
6690 // Scan string for start of substr in 16-byte vectors
6691 bind(SCAN_TO_SUBSTR);
6692 assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
6693 pcmpestri(vec, Address(result, 0), mode);
6694 jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1
6695 subl(cnt1, stride);
6696 jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
6697 cmpl(cnt1, cnt2);
6698 jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring
6699 addptr(result, 16);
6700
6701 bind(ADJUST_STR);
6702 cmpl(cnt1, stride); // Do not read beyond string
6703 jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
6704 // Back-up string to avoid reading beyond string.
6705 lea(result, Address(result, cnt1, scale1, -16));
6706 movl(cnt1, stride);
6707 jmpb(SCAN_TO_SUBSTR);
6708
6709 // Found a potential substr
6710 bind(FOUND_CANDIDATE);
6711 // After pcmpestri tmp(rcx) contains matched element index
6712
6713 // Make sure string is still long enough
6714 subl(cnt1, tmp);
6715 cmpl(cnt1, cnt2);
6716 jccb(Assembler::greaterEqual, FOUND_SUBSTR);
6717 // Left less then substring.
6718
6719 bind(RET_NOT_FOUND);
6720 movl(result, -1);
6721 jmpb(CLEANUP);
6722
6723 bind(FOUND_SUBSTR);
6724 // Compute start addr of substr
6725 lea(result, Address(result, tmp, scale1));
6726 if (int_cnt2 > 0) { // Constant substring
6727 // Repeat search for small substring (< 8 chars)
6728 // from new point without reloading substring.
6729 // Have to check that we don't read beyond string.
6730 cmpl(tmp, stride-int_cnt2);
6731 jccb(Assembler::greater, ADJUST_STR);
6732 // Fall through if matched whole substring.
6733 } else { // non constant
6734 assert(int_cnt2 == -1, "should be != 0");
6735
6736 addl(tmp, cnt2);
6737 // Found result if we matched whole substring.
6738 cmpl(tmp, stride);
6739 jccb(Assembler::lessEqual, RET_FOUND);
6740
6741 // Repeat search for small substring (<= 8 chars)
6742 // from new point 'str1' without reloading substring.
6743 cmpl(cnt2, stride);
6744 // Have to check that we don't read beyond string.
6745 jccb(Assembler::lessEqual, ADJUST_STR);
6746
6747 Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG;
6748 // Compare the rest of substring (> 8 chars).
6749 movptr(str1, result);
6750
6751 cmpl(tmp, cnt2);
6752 // First 8 chars are already matched.
6753 jccb(Assembler::equal, CHECK_NEXT);
6754
6755 bind(SCAN_SUBSTR);
6756 pcmpestri(vec, Address(str1, 0), mode);
6757 // Need to reload strings pointers if not matched whole vector
6758 jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
6759
6760 bind(CHECK_NEXT);
6761 subl(cnt2, stride);
6762 jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
6763 addptr(str1, 16);
6764 if (ae == StrIntrinsicNode::UL) {
6765 addptr(str2, 8);
6766 } else {
6767 addptr(str2, 16);
6768 }
6769 subl(cnt1, stride);
6770 cmpl(cnt2, stride); // Do not read beyond substring
6771 jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
6772 // Back-up strings to avoid reading beyond substring.
6773
6774 if (ae == StrIntrinsicNode::UL) {
6775 lea(str2, Address(str2, cnt2, scale2, -8));
6776 lea(str1, Address(str1, cnt2, scale1, -16));
6777 } else {
6778 lea(str2, Address(str2, cnt2, scale2, -16));
6779 lea(str1, Address(str1, cnt2, scale1, -16));
6780 }
6781 subl(cnt1, cnt2);
6782 movl(cnt2, stride);
6783 addl(cnt1, stride);
6784 bind(CONT_SCAN_SUBSTR);
6785 if (ae == StrIntrinsicNode::UL) {
6786 pmovzxbw(vec, Address(str2, 0));
6787 } else {
6788 movdqu(vec, Address(str2, 0));
6789 }
6790 jmpb(SCAN_SUBSTR);
6791
6792 bind(RET_FOUND_LONG);
6793 movptr(str1, Address(rsp, wordSize));
6794 } // non constant
6795
6796 bind(RET_FOUND);
6797 // Compute substr offset
6798 subptr(result, str1);
6799 if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
6800 shrl(result, 1); // index
6801 }
6802 bind(CLEANUP);
6803 pop(rsp); // restore SP
6804
6805 } // string_indexof
6806
6807 void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
6808 XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
6809 ShortBranchVerifier sbv(this);
6810 assert(UseSSE42Intrinsics, "SSE4.2 is required");
6811
6812 int stride = 8;
6813
6814 Label FOUND_CHAR, SCAN_TO_CHAR, SCAN_TO_CHAR_LOOP,
6815 SCAN_TO_8_CHAR, SCAN_TO_8_CHAR_LOOP, SCAN_TO_16_CHAR_LOOP,
6816 RET_NOT_FOUND, SCAN_TO_8_CHAR_INIT,
6817 FOUND_SEQ_CHAR, DONE_LABEL;
6818
6819 movptr(result, str1);
6820 if (UseAVX >= 2) {
6821 cmpl(cnt1, stride);
6822 jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
6823 cmpl(cnt1, 2*stride);
6824 jccb(Assembler::less, SCAN_TO_8_CHAR_INIT);
6825 movdl(vec1, ch);
6826 vpbroadcastw(vec1, vec1);
6827 vpxor(vec2, vec2);
6828 movl(tmp, cnt1);
6829 andl(tmp, 0xFFFFFFF0); //vector count (in chars)
6830 andl(cnt1,0x0000000F); //tail count (in chars)
6831
6832 bind(SCAN_TO_16_CHAR_LOOP);
6833 vmovdqu(vec3, Address(result, 0));
6834 vpcmpeqw(vec3, vec3, vec1, true);
6835 vptest(vec2, vec3);
6836 jcc(Assembler::carryClear, FOUND_CHAR);
6837 addptr(result, 32);
6838 subl(tmp, 2*stride);
6839 jccb(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);
6840 jmp(SCAN_TO_8_CHAR);
6841 bind(SCAN_TO_8_CHAR_INIT);
6842 movdl(vec1, ch);
6843 pshuflw(vec1, vec1, 0x00);
6844 pshufd(vec1, vec1, 0);
6845 pxor(vec2, vec2);
6846 }
6847 if (UseAVX >= 2 || UseSSE42Intrinsics) {
6848 bind(SCAN_TO_8_CHAR);
6849 cmpl(cnt1, stride);
6850 if (UseAVX >= 2) {
6851 jccb(Assembler::less, SCAN_TO_CHAR);
6852 }
6853 if (!(UseAVX >= 2)) {
6854 jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
6855 movdl(vec1, ch);
6856 pshuflw(vec1, vec1, 0x00);
6857 pshufd(vec1, vec1, 0);
6858 pxor(vec2, vec2);
6859 }
6860 movl(tmp, cnt1);
6861 andl(tmp, 0xFFFFFFF8); //vector count (in chars)
6862 andl(cnt1,0x00000007); //tail count (in chars)
6863
6864 bind(SCAN_TO_8_CHAR_LOOP);
6865 movdqu(vec3, Address(result, 0));
6866 pcmpeqw(vec3, vec1);
6867 ptest(vec2, vec3);
6868 jcc(Assembler::carryClear, FOUND_CHAR);
6869 addptr(result, 16);
6870 subl(tmp, stride);
6871 jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP);
6872 }
6873 bind(SCAN_TO_CHAR);
6874 testl(cnt1, cnt1);
6875 jcc(Assembler::zero, RET_NOT_FOUND);
6876
6877 bind(SCAN_TO_CHAR_LOOP);
6878 load_unsigned_short(tmp, Address(result, 0));
6879 cmpl(ch, tmp);
6880 jccb(Assembler::equal, FOUND_SEQ_CHAR);
6881 addptr(result, 2);
6882 subl(cnt1, 1);
6883 jccb(Assembler::zero, RET_NOT_FOUND);
6884 jmp(SCAN_TO_CHAR_LOOP);
6885
6886 bind(RET_NOT_FOUND);
6887 movl(result, -1);
6888 jmpb(DONE_LABEL);
6889
6890 if (UseAVX >= 2 || UseSSE42Intrinsics) {
6891 bind(FOUND_CHAR);
6892 if (UseAVX >= 2) {
6893 vpmovmskb(tmp, vec3);
6894 } else {
6895 pmovmskb(tmp, vec3);
6896 }
6897 bsfl(ch, tmp);
6898 addl(result, ch);
6899 }
6900
6901 bind(FOUND_SEQ_CHAR);
6902 subptr(result, str1);
6903 shrl(result, 1);
6904
6905 bind(DONE_LABEL);
6906 } // string_indexof_char
6907
6908 // helper function for string_compare
6909 void MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
6910 Address::ScaleFactor scale, Address::ScaleFactor scale1,
6911 Address::ScaleFactor scale2, Register index, int ae) {
6912 if (ae == StrIntrinsicNode::LL) {
6913 load_unsigned_byte(elem1, Address(str1, index, scale, 0));
6914 load_unsigned_byte(elem2, Address(str2, index, scale, 0));
6915 } else if (ae == StrIntrinsicNode::UU) {
6916 load_unsigned_short(elem1, Address(str1, index, scale, 0));
6917 load_unsigned_short(elem2, Address(str2, index, scale, 0));
6918 } else {
6919 load_unsigned_byte(elem1, Address(str1, index, scale1, 0));
6920 load_unsigned_short(elem2, Address(str2, index, scale2, 0));
6921 }
6922 }
6923
6924 // Compare strings, used for char[] and byte[].
6925 void MacroAssembler::string_compare(Register str1, Register str2,
6926 Register cnt1, Register cnt2, Register result,
6927 XMMRegister vec1, int ae) {
6928 ShortBranchVerifier sbv(this);
6929 Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
6930 int stride, stride2, adr_stride, adr_stride1, adr_stride2;
6931 Address::ScaleFactor scale, scale1, scale2;
6932
6933 if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
6934 shrl(cnt2, 1);
6935 }
6936 // Compute the minimum of the string lengths and the
6937 // difference of the string lengths (stack).
6938 // Do the conditional move stuff
6939 movl(result, cnt1);
6940 subl(cnt1, cnt2);
6941 push(cnt1);
6942 cmov32(Assembler::lessEqual, cnt2, result);
6943
6944 // Is the minimum length zero?
6945 testl(cnt2, cnt2);
6946 jcc(Assembler::zero, LENGTH_DIFF_LABEL);
6947 if (ae == StrIntrinsicNode::LL) {
6948 // Load first bytes
6949 load_unsigned_byte(result, Address(str1, 0));
6950 load_unsigned_byte(cnt1, Address(str2, 0));
6951 } else if (ae == StrIntrinsicNode::UU) {
6952 // Load first characters
6953 load_unsigned_short(result, Address(str1, 0));
6954 load_unsigned_short(cnt1, Address(str2, 0));
6955 } else {
6956 load_unsigned_byte(result, Address(str1, 0));
6957 load_unsigned_short(cnt1, Address(str2, 0));
6958 }
6959 subl(result, cnt1);
6960 jcc(Assembler::notZero, POP_LABEL);
6961
6962 if (ae == StrIntrinsicNode::UU) {
6963 // Divide length by 2 to get number of chars
6964 shrl(cnt2, 1);
6965 }
6966 cmpl(cnt2, 1);
6967 jcc(Assembler::equal, LENGTH_DIFF_LABEL);
6968
6969 // Check if the strings start at the same location and setup scale and stride
6970 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
6971 cmpptr(str1, str2);
6972 jcc(Assembler::equal, LENGTH_DIFF_LABEL);
6973 if (ae == StrIntrinsicNode::LL) {
6974 scale = Address::times_1;
6975 stride = 16;
6976 } else {
6977 scale = Address::times_2;
6978 stride = 8;
6979 }
6980 } else {
6981 scale1 = Address::times_1;
6982 scale2 = Address::times_2;
6983 stride = 8;
6984 }
6985
6986 if (UseAVX >= 2 && UseSSE42Intrinsics) {
6987 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
6988 Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
6989 Label COMPARE_TAIL_LONG;
6990 int pcmpmask = 0x19;
6991 if (ae == StrIntrinsicNode::LL) {
6992 pcmpmask &= ~0x01;
6993 }
6994
6995 // Setup to compare 16-chars (32-bytes) vectors,
6996 // start from first character again because it has aligned address.
6997 if (ae == StrIntrinsicNode::LL) {
6998 stride2 = 32;
6999 } else {
7000 stride2 = 16;
7001 }
7002 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7003 adr_stride = stride << scale;
7004 } else {
7005 adr_stride1 = 8; //stride << scale1;
7006 adr_stride2 = 16; //stride << scale2;
7007 }
7008
7009 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
7010 // rax and rdx are used by pcmpestri as elements counters
7011 movl(result, cnt2);
7012 andl(cnt2, ~(stride2-1)); // cnt2 holds the vector count
7013 jcc(Assembler::zero, COMPARE_TAIL_LONG);
7014
7015 // fast path : compare first 2 8-char vectors.
7016 bind(COMPARE_16_CHARS);
7017 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7018 movdqu(vec1, Address(str1, 0));
7019 } else {
7020 pmovzxbw(vec1, Address(str1, 0));
7021 }
7022 pcmpestri(vec1, Address(str2, 0), pcmpmask);
7023 jccb(Assembler::below, COMPARE_INDEX_CHAR);
7024
7025 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7026 movdqu(vec1, Address(str1, adr_stride));
7027 pcmpestri(vec1, Address(str2, adr_stride), pcmpmask);
7028 } else {
7029 pmovzxbw(vec1, Address(str1, adr_stride1));
7030 pcmpestri(vec1, Address(str2, adr_stride2), pcmpmask);
7031 }
7032 jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS);
7033 addl(cnt1, stride);
7034
7035 // Compare the characters at index in cnt1
7036 bind(COMPARE_INDEX_CHAR); // cnt1 has the offset of the mismatching character
7037 load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae);
7038 subl(result, cnt2);
7039 jmp(POP_LABEL);
7040
7041 // Setup the registers to start vector comparison loop
7042 bind(COMPARE_WIDE_VECTORS);
7043 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7044 lea(str1, Address(str1, result, scale));
7045 lea(str2, Address(str2, result, scale));
7046 } else {
7047 lea(str1, Address(str1, result, scale1));
7048 lea(str2, Address(str2, result, scale2));
7049 }
7050 subl(result, stride2);
7051 subl(cnt2, stride2);
7052 jccb(Assembler::zero, COMPARE_WIDE_TAIL);
7053 negptr(result);
7054
7055 // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
7056 bind(COMPARE_WIDE_VECTORS_LOOP);
7057 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7058 vmovdqu(vec1, Address(str1, result, scale));
7059 vpxor(vec1, Address(str2, result, scale));
7060 } else {
7061 vpmovzxbw(vec1, Address(str1, result, scale1));
7062 vpxor(vec1, Address(str2, result, scale2));
7063 }
7064 vptest(vec1, vec1);
7065 jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
7066 addptr(result, stride2);
7067 subl(cnt2, stride2);
7068 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
7069 // clean upper bits of YMM registers
7070 vpxor(vec1, vec1);
7071
7072 // compare wide vectors tail
7073 bind(COMPARE_WIDE_TAIL);
7074 testptr(result, result);
7075 jccb(Assembler::zero, LENGTH_DIFF_LABEL);
7076
7077 movl(result, stride2);
7078 movl(cnt2, result);
7079 negptr(result);
7080 jmpb(COMPARE_WIDE_VECTORS_LOOP);
7081
7082 // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
7083 bind(VECTOR_NOT_EQUAL);
7084 // clean upper bits of YMM registers
7085 vpxor(vec1, vec1);
7086 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7087 lea(str1, Address(str1, result, scale));
7088 lea(str2, Address(str2, result, scale));
7089 } else {
7090 lea(str1, Address(str1, result, scale1));
7091 lea(str2, Address(str2, result, scale2));
7092 }
7093 jmp(COMPARE_16_CHARS);
7094
7095 // Compare tail chars, length between 1 to 15 chars
7096 bind(COMPARE_TAIL_LONG);
7097 movl(cnt2, result);
7098 cmpl(cnt2, stride);
7099 jccb(Assembler::less, COMPARE_SMALL_STR);
7100
7101 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7102 movdqu(vec1, Address(str1, 0));
7103 } else {
7104 pmovzxbw(vec1, Address(str1, 0));
7105 }
7106 pcmpestri(vec1, Address(str2, 0), pcmpmask);
7107 jcc(Assembler::below, COMPARE_INDEX_CHAR);
7108 subptr(cnt2, stride);
7109 jccb(Assembler::zero, LENGTH_DIFF_LABEL);
7110 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7111 lea(str1, Address(str1, result, scale));
7112 lea(str2, Address(str2, result, scale));
7113 } else {
7114 lea(str1, Address(str1, result, scale1));
7115 lea(str2, Address(str2, result, scale2));
7116 }
7117 negptr(cnt2);
7118 jmpb(WHILE_HEAD_LABEL);
7119
7120 bind(COMPARE_SMALL_STR);
7121 } else if (UseSSE42Intrinsics) {
7122 Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL;
7123 int pcmpmask = 0x19;
7124 // Setup to compare 8-char (16-byte) vectors,
7125 // start from first character again because it has aligned address.
7126 movl(result, cnt2);
7127 andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count
7128 if (ae == StrIntrinsicNode::LL) {
7129 pcmpmask &= ~0x01;
7130 }
7131 jccb(Assembler::zero, COMPARE_TAIL);
7132 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7133 lea(str1, Address(str1, result, scale));
7134 lea(str2, Address(str2, result, scale));
7135 } else {
7136 lea(str1, Address(str1, result, scale1));
7137 lea(str2, Address(str2, result, scale2));
7138 }
7139 negptr(result);
7140
7141 // pcmpestri
7142 // inputs:
7143 // vec1- substring
7144 // rax - negative string length (elements count)
7145 // mem - scanned string
7146 // rdx - string length (elements count)
7147 // pcmpmask - cmp mode: 11000 (string compare with negated result)
7148 // + 00 (unsigned bytes) or + 01 (unsigned shorts)
7149 // outputs:
7150 // rcx - first mismatched element index
7151 assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
7152
7153 bind(COMPARE_WIDE_VECTORS);
7154 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7155 movdqu(vec1, Address(str1, result, scale));
7156 pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
7157 } else {
7158 pmovzxbw(vec1, Address(str1, result, scale1));
7159 pcmpestri(vec1, Address(str2, result, scale2), pcmpmask);
7160 }
7161 // After pcmpestri cnt1(rcx) contains mismatched element index
7162
7163 jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1
7164 addptr(result, stride);
7165 subptr(cnt2, stride);
7166 jccb(Assembler::notZero, COMPARE_WIDE_VECTORS);
7167
7168 // compare wide vectors tail
7169 testptr(result, result);
7170 jccb(Assembler::zero, LENGTH_DIFF_LABEL);
7171
7172 movl(cnt2, stride);
7173 movl(result, stride);
7174 negptr(result);
7175 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7176 movdqu(vec1, Address(str1, result, scale));
7177 pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
7178 } else {
7179 pmovzxbw(vec1, Address(str1, result, scale1));
7180 pcmpestri(vec1, Address(str2, result, scale2), pcmpmask);
7181 }
7182 jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
7183
7184 // Mismatched characters in the vectors
7185 bind(VECTOR_NOT_EQUAL);
7186 addptr(cnt1, result);
7187 load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae);
7188 subl(result, cnt2);
7189 jmpb(POP_LABEL);
7190
7191 bind(COMPARE_TAIL); // limit is zero
7192 movl(cnt2, result);
7193 // Fallthru to tail compare
7194 }
7195 // Shift str2 and str1 to the end of the arrays, negate min
7196 if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
7197 lea(str1, Address(str1, cnt2, scale));
7198 lea(str2, Address(str2, cnt2, scale));
7199 } else {
7200 lea(str1, Address(str1, cnt2, scale1));
7201 lea(str2, Address(str2, cnt2, scale2));
7202 }
7203 decrementl(cnt2); // first character was compared already
7204 negptr(cnt2);
7205
7206 // Compare the rest of the elements
7207 bind(WHILE_HEAD_LABEL);
7208 load_next_elements(result, cnt1, str1, str2, scale, scale1, scale2, cnt2, ae);
7209 subl(result, cnt1);
7210 jccb(Assembler::notZero, POP_LABEL);
7211 increment(cnt2);
7212 jccb(Assembler::notZero, WHILE_HEAD_LABEL);
7213
7214 // Strings are equal up to min length. Return the length difference.
7215 bind(LENGTH_DIFF_LABEL);
7216 pop(result);
7217 if (ae == StrIntrinsicNode::UU) {
7218 // Divide diff by 2 to get number of chars
7219 sarl(result, 1);
7220 }
7221 jmpb(DONE_LABEL);
7222
7223 // Discard the stored length difference
7224 bind(POP_LABEL);
7225 pop(cnt1);
7226
7227 // That's it
7228 bind(DONE_LABEL);
7229 if(ae == StrIntrinsicNode::UL) {
7230 negl(result);
7231 }
7232 }
7233
7234 // Search for Non-ASCII character (Negative byte value) in a byte array,
7235 // return true if it has any and false otherwise.
7236 void MacroAssembler::has_negatives(Register ary1, Register len,
7237 Register result, Register tmp1,
7238 XMMRegister vec1, XMMRegister vec2) {
7239
7240 // rsi: byte array
7241 // rcx: len
7242 // rax: result
7243 ShortBranchVerifier sbv(this);
7244 assert_different_registers(ary1, len, result, tmp1);
7245 assert_different_registers(vec1, vec2);
7246 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_CHAR, COMPARE_VECTORS, COMPARE_BYTE;
7247
7248 // len == 0
7249 testl(len, len);
7250 jcc(Assembler::zero, FALSE_LABEL);
7251
7252 movl(result, len); // copy
7253
7254 if (UseAVX >= 2) {
7255 // With AVX2, use 32-byte vector compare
7256 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
7257
7258 // Compare 32-byte vectors
7259 andl(result, 0x0000001f); // tail count (in bytes)
7260 andl(len, 0xffffffe0); // vector count (in bytes)
7261 jccb(Assembler::zero, COMPARE_TAIL);
7262
7263 lea(ary1, Address(ary1, len, Address::times_1));
7264 negptr(len);
7265
7266 movl(tmp1, 0x80808080); // create mask to test for Unicode chars in vector
7267 movdl(vec2, tmp1);
7268 vpbroadcastd(vec2, vec2);
7269
7270 bind(COMPARE_WIDE_VECTORS);
7271 vmovdqu(vec1, Address(ary1, len, Address::times_1));
7272 vptest(vec1, vec2);
7273 jccb(Assembler::notZero, TRUE_LABEL);
7274 addptr(len, 32);
7275 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
7276
7277 testl(result, result);
7278 jccb(Assembler::zero, FALSE_LABEL);
7279
7280 vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
7281 vptest(vec1, vec2);
7282 jccb(Assembler::notZero, TRUE_LABEL);
7283 jmpb(FALSE_LABEL);
7284
7285 bind(COMPARE_TAIL); // len is zero
7286 movl(len, result);
7287 // Fallthru to tail compare
7288 } else if (UseSSE42Intrinsics) {
7289 // With SSE4.2, use double quad vector compare
7290 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
7291
7292 // Compare 16-byte vectors
7293 andl(result, 0x0000000f); // tail count (in bytes)
7294 andl(len, 0xfffffff0); // vector count (in bytes)
7295 jccb(Assembler::zero, COMPARE_TAIL);
7296
7297 lea(ary1, Address(ary1, len, Address::times_1));
7298 negptr(len);
7299
7300 movl(tmp1, 0x80808080);
7301 movdl(vec2, tmp1);
7302 pshufd(vec2, vec2, 0);
7303
7304 bind(COMPARE_WIDE_VECTORS);
7305 movdqu(vec1, Address(ary1, len, Address::times_1));
7306 ptest(vec1, vec2);
7307 jccb(Assembler::notZero, TRUE_LABEL);
7308 addptr(len, 16);
7309 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
7310
7311 testl(result, result);
7312 jccb(Assembler::zero, FALSE_LABEL);
7313
7314 movdqu(vec1, Address(ary1, result, Address::times_1, -16));
7315 ptest(vec1, vec2);
7316 jccb(Assembler::notZero, TRUE_LABEL);
7317 jmpb(FALSE_LABEL);
7318
7319 bind(COMPARE_TAIL); // len is zero
7320 movl(len, result);
7321 // Fallthru to tail compare
7322 }
7323
7324 // Compare 4-byte vectors
7325 andl(len, 0xfffffffc); // vector count (in bytes)
7326 jccb(Assembler::zero, COMPARE_CHAR);
7327
7328 lea(ary1, Address(ary1, len, Address::times_1));
7329 negptr(len);
7330
7331 bind(COMPARE_VECTORS);
7332 movl(tmp1, Address(ary1, len, Address::times_1));
7333 andl(tmp1, 0x80808080);
7334 jccb(Assembler::notZero, TRUE_LABEL);
7335 addptr(len, 4);
7336 jcc(Assembler::notZero, COMPARE_VECTORS);
7337
7338 // Compare trailing char (final 2 bytes), if any
7339 bind(COMPARE_CHAR);
7340 testl(result, 0x2); // tail char
7341 jccb(Assembler::zero, COMPARE_BYTE);
7342 load_unsigned_short(tmp1, Address(ary1, 0));
7343 andl(tmp1, 0x00008080);
7344 jccb(Assembler::notZero, TRUE_LABEL);
7345 subptr(result, 2);
7346 lea(ary1, Address(ary1, 2));
7347
7348 bind(COMPARE_BYTE);
7349 testl(result, 0x1); // tail byte
7350 jccb(Assembler::zero, FALSE_LABEL);
7351 load_unsigned_byte(tmp1, Address(ary1, 0));
7352 andl(tmp1, 0x00000080);
7353 jccb(Assembler::notEqual, TRUE_LABEL);
7354 jmpb(FALSE_LABEL);
7355
7356 bind(TRUE_LABEL);
7357 movl(result, 1); // return true
7358 jmpb(DONE);
7359
7360 bind(FALSE_LABEL);
7361 xorl(result, result); // return false
7362
7363 // That's it
7364 bind(DONE);
7365 if (UseAVX >= 2) {
7366 // clean upper bits of YMM registers
7367 vpxor(vec1, vec1);
7368 vpxor(vec2, vec2);
7369 }
7370 }
7371
7372 // Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
7373 void MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
7374 Register limit, Register result, Register chr,
7375 XMMRegister vec1, XMMRegister vec2, bool is_char) {
7376 ShortBranchVerifier sbv(this);
7377 Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE;
7378
7379 int length_offset = arrayOopDesc::length_offset_in_bytes();
7380 int base_offset = arrayOopDesc::base_offset_in_bytes(is_char ? T_CHAR : T_BYTE);
7381
7382 if (is_array_equ) {
7383 // Check the input args
7384 cmpptr(ary1, ary2);
7385 jcc(Assembler::equal, TRUE_LABEL);
7386
7387 // Need additional checks for arrays_equals.
7388 testptr(ary1, ary1);
7389 jcc(Assembler::zero, FALSE_LABEL);
7390 testptr(ary2, ary2);
7391 jcc(Assembler::zero, FALSE_LABEL);
7392
7393 // Check the lengths
7394 movl(limit, Address(ary1, length_offset));
7395 cmpl(limit, Address(ary2, length_offset));
7396 jcc(Assembler::notEqual, FALSE_LABEL);
7397 }
7398
7399 // count == 0
7400 testl(limit, limit);
7401 jcc(Assembler::zero, TRUE_LABEL);
7402
7403 if (is_array_equ) {
7404 // Load array address
7405 lea(ary1, Address(ary1, base_offset));
7406 lea(ary2, Address(ary2, base_offset));
7407 }
7408
7409 if (is_array_equ && is_char) {
7410 // arrays_equals when used for char[].
7411 shll(limit, 1); // byte count != 0
7412 }
7413 movl(result, limit); // copy
7414
7415 if (UseAVX >= 2) {
7416 // With AVX2, use 32-byte vector compare
7417 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
7418
7419 // Compare 32-byte vectors
7420 andl(result, 0x0000001f); // tail count (in bytes)
7421 andl(limit, 0xffffffe0); // vector count (in bytes)
7422 jccb(Assembler::zero, COMPARE_TAIL);
7423
7424 lea(ary1, Address(ary1, limit, Address::times_1));
7425 lea(ary2, Address(ary2, limit, Address::times_1));
7426 negptr(limit);
7427
7428 bind(COMPARE_WIDE_VECTORS);
7429 vmovdqu(vec1, Address(ary1, limit, Address::times_1));
7430 vmovdqu(vec2, Address(ary2, limit, Address::times_1));
7431 vpxor(vec1, vec2);
7432
7433 vptest(vec1, vec1);
7434 jccb(Assembler::notZero, FALSE_LABEL);
7435 addptr(limit, 32);
7436 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
7437
7438 testl(result, result);
7439 jccb(Assembler::zero, TRUE_LABEL);
7440
7441 vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
7442 vmovdqu(vec2, Address(ary2, result, Address::times_1, -32));
7443 vpxor(vec1, vec2);
7444
7445 vptest(vec1, vec1);
7446 jccb(Assembler::notZero, FALSE_LABEL);
7447 jmpb(TRUE_LABEL);
7448
7449 bind(COMPARE_TAIL); // limit is zero
7450 movl(limit, result);
7451 // Fallthru to tail compare
7452 } else if (UseSSE42Intrinsics) {
7453 // With SSE4.2, use double quad vector compare
7454 Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
7455
7456 // Compare 16-byte vectors
7457 andl(result, 0x0000000f); // tail count (in bytes)
7458 andl(limit, 0xfffffff0); // vector count (in bytes)
7459 jccb(Assembler::zero, COMPARE_TAIL);
7460
7461 lea(ary1, Address(ary1, limit, Address::times_1));
7462 lea(ary2, Address(ary2, limit, Address::times_1));
7463 negptr(limit);
7464
7465 bind(COMPARE_WIDE_VECTORS);
7466 movdqu(vec1, Address(ary1, limit, Address::times_1));
7467 movdqu(vec2, Address(ary2, limit, Address::times_1));
7468 pxor(vec1, vec2);
7469
7470 ptest(vec1, vec1);
7471 jccb(Assembler::notZero, FALSE_LABEL);
7472 addptr(limit, 16);
7473 jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
7474
7475 testl(result, result);
7476 jccb(Assembler::zero, TRUE_LABEL);
7477
7489 }
7490
7491 // Compare 4-byte vectors
7492 andl(limit, 0xfffffffc); // vector count (in bytes)
7493 jccb(Assembler::zero, COMPARE_CHAR);
7494
7495 lea(ary1, Address(ary1, limit, Address::times_1));
7496 lea(ary2, Address(ary2, limit, Address::times_1));
7497 negptr(limit);
7498
7499 bind(COMPARE_VECTORS);
7500 movl(chr, Address(ary1, limit, Address::times_1));
7501 cmpl(chr, Address(ary2, limit, Address::times_1));
7502 jccb(Assembler::notEqual, FALSE_LABEL);
7503 addptr(limit, 4);
7504 jcc(Assembler::notZero, COMPARE_VECTORS);
7505
7506 // Compare trailing char (final 2 bytes), if any
7507 bind(COMPARE_CHAR);
7508 testl(result, 0x2); // tail char
7509 jccb(Assembler::zero, COMPARE_BYTE);
7510 load_unsigned_short(chr, Address(ary1, 0));
7511 load_unsigned_short(limit, Address(ary2, 0));
7512 cmpl(chr, limit);
7513 jccb(Assembler::notEqual, FALSE_LABEL);
7514
7515 if (is_array_equ && is_char) {
7516 bind(COMPARE_BYTE);
7517 } else {
7518 lea(ary1, Address(ary1, 2));
7519 lea(ary2, Address(ary2, 2));
7520
7521 bind(COMPARE_BYTE);
7522 testl(result, 0x1); // tail byte
7523 jccb(Assembler::zero, TRUE_LABEL);
7524 load_unsigned_byte(chr, Address(ary1, 0));
7525 load_unsigned_byte(limit, Address(ary2, 0));
7526 cmpl(chr, limit);
7527 jccb(Assembler::notEqual, FALSE_LABEL);
7528 }
7529 bind(TRUE_LABEL);
7530 movl(result, 1); // return true
7531 jmpb(DONE);
7532
7533 bind(FALSE_LABEL);
7534 xorl(result, result); // return false
7535
7536 // That's it
7537 bind(DONE);
7538 if (UseAVX >= 2) {
7539 // clean upper bits of YMM registers
7540 vpxor(vec1, vec1);
7541 vpxor(vec2, vec2);
7542 }
7543 }
7544
7545 #endif
7546
7547 void MacroAssembler::generate_fill(BasicType t, bool aligned,
7548 Register to, Register value, Register count,
7549 Register rtmp, XMMRegister xtmp) {
7550 ShortBranchVerifier sbv(this);
7551 assert_different_registers(to, value, count, rtmp);
7552 Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
7553 Label L_fill_2_bytes, L_fill_4_bytes;
7554
7555 int shift = -1;
7556 switch (t) {
7557 case T_BYTE:
7558 shift = 2;
7559 break;
7560 case T_SHORT:
7561 shift = 1;
7562 break;
7563 case T_INT:
7564 shift = 0;
7565 break;
7566 default: ShouldNotReachHere();
9529 jmp(L_wordByWord);
9530
9531 BIND(L_byteByByteProlog);
9532 andl(in2, 0x00000007);
9533 movl(tmp2, 1);
9534
9535 BIND(L_byteByByte);
9536 cmpl(tmp2, in2);
9537 jccb(Assembler::greater, L_exit);
9538 movb(tmp1, Address(in1, 0));
9539 crc32(in_out, tmp1, 1);
9540 incl(in1);
9541 incl(tmp2);
9542 jmp(L_byteByByte);
9543
9544 BIND(L_exit);
9545 }
9546 #endif // LP64
9547 #undef BIND
9548 #undef BLOCK_COMMENT
9549
9550
9551 // Compress char[] array to byte[].
9552 void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
9553 XMMRegister tmp1Reg, XMMRegister tmp2Reg,
9554 XMMRegister tmp3Reg, XMMRegister tmp4Reg,
9555 Register tmp5, Register result) {
9556 Label copy_chars_loop, return_length, return_zero, done;
9557
9558 // rsi: src
9559 // rdi: dst
9560 // rdx: len
9561 // rcx: tmp5
9562 // rax: result
9563
9564 // rsi holds start addr of source char[] to be compressed
9565 // rdi holds start addr of destination byte[]
9566 // rdx holds length
9567
9568 assert(len != result, "");
9569
9570 // save length for return
9571 push(len);
9572
9573 if (UseSSE42Intrinsics) {
9574 Label copy_32_loop, copy_16, copy_tail;
9575
9576 movl(result, len);
9577 movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vectors
9578
9579 // vectored compression
9580 andl(len, 0xfffffff0); // vector count (in chars)
9581 andl(result, 0x0000000f); // tail count (in chars)
9582 testl(len, len);
9583 jccb(Assembler::zero, copy_16);
9584
9585 // compress 16 chars per iter
9586 movdl(tmp1Reg, tmp5);
9587 pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg
9588 pxor(tmp4Reg, tmp4Reg);
9589
9590 lea(src, Address(src, len, Address::times_2));
9591 lea(dst, Address(dst, len, Address::times_1));
9592 negptr(len);
9593
9594 bind(copy_32_loop);
9595 movdqu(tmp2Reg, Address(src, len, Address::times_2)); // load 1st 8 characters
9596 por(tmp4Reg, tmp2Reg);
9597 movdqu(tmp3Reg, Address(src, len, Address::times_2, 16)); // load next 8 characters
9598 por(tmp4Reg, tmp3Reg);
9599 ptest(tmp4Reg, tmp1Reg); // check for Unicode chars in next vector
9600 jcc(Assembler::notZero, return_zero);
9601 packuswb(tmp2Reg, tmp3Reg); // only ASCII chars; compress each to 1 byte
9602 movdqu(Address(dst, len, Address::times_1), tmp2Reg);
9603 addptr(len, 16);
9604 jcc(Assembler::notZero, copy_32_loop);
9605
9606 // compress next vector of 8 chars (if any)
9607 bind(copy_16);
9608 movl(len, result);
9609 andl(len, 0xfffffff8); // vector count (in chars)
9610 andl(result, 0x00000007); // tail count (in chars)
9611 testl(len, len);
9612 jccb(Assembler::zero, copy_tail);
9613
9614 movdl(tmp1Reg, tmp5);
9615 pshufd(tmp1Reg, tmp1Reg, 0); // store Unicode mask in tmp1Reg
9616 pxor(tmp3Reg, tmp3Reg);
9617
9618 movdqu(tmp2Reg, Address(src, 0));
9619 ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector
9620 jccb(Assembler::notZero, return_zero);
9621 packuswb(tmp2Reg, tmp3Reg); // only LATIN1 chars; compress each to 1 byte
9622 movq(Address(dst, 0), tmp2Reg);
9623 addptr(src, 16);
9624 addptr(dst, 8);
9625
9626 bind(copy_tail);
9627 movl(len, result);
9628 }
9629 // compress 1 char per iter
9630 testl(len, len);
9631 jccb(Assembler::zero, return_length);
9632 lea(src, Address(src, len, Address::times_2));
9633 lea(dst, Address(dst, len, Address::times_1));
9634 negptr(len);
9635
9636 bind(copy_chars_loop);
9637 load_unsigned_short(result, Address(src, len, Address::times_2));
9638 testl(result, 0xff00); // check if Unicode char
9639 jccb(Assembler::notZero, return_zero);
9640 movb(Address(dst, len, Address::times_1), result); // ASCII char; compress to 1 byte
9641 increment(len);
9642 jcc(Assembler::notZero, copy_chars_loop);
9643
9644 // if compression succeeded, return length
9645 bind(return_length);
9646 pop(result);
9647 jmpb(done);
9648
9649 // if compression failed, return 0
9650 bind(return_zero);
9651 xorl(result, result);
9652 addptr(rsp, wordSize);
9653
9654 bind(done);
9655 }
9656
9657 // Inflate byte[] array to char[].
9658 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
9659 XMMRegister tmp1, Register tmp2) {
9660 Label copy_chars_loop, done;
9661
9662 // rsi: src
9663 // rdi: dst
9664 // rdx: len
9665 // rcx: tmp2
9666
9667 // rsi holds start addr of source byte[] to be inflated
9668 // rdi holds start addr of destination char[]
9669 // rdx holds length
9670 assert_different_registers(src, dst, len, tmp2);
9671
9672 if (UseSSE42Intrinsics) {
9673 Label copy_8_loop, copy_bytes, copy_tail;
9674
9675 movl(tmp2, len);
9676 andl(tmp2, 0x00000007); // tail count (in chars)
9677 andl(len, 0xfffffff8); // vector count (in chars)
9678 jccb(Assembler::zero, copy_tail);
9679
9680 // vectored inflation
9681 lea(src, Address(src, len, Address::times_1));
9682 lea(dst, Address(dst, len, Address::times_2));
9683 negptr(len);
9684
9685 // inflate 8 chars per iter
9686 bind(copy_8_loop);
9687 pmovzxbw(tmp1, Address(src, len, Address::times_1)); // unpack to 8 words
9688 movdqu(Address(dst, len, Address::times_2), tmp1);
9689 addptr(len, 8);
9690 jcc(Assembler::notZero, copy_8_loop);
9691
9692 bind(copy_tail);
9693 movl(len, tmp2);
9694
9695 cmpl(len, 4);
9696 jccb(Assembler::less, copy_bytes);
9697
9698 movdl(tmp1, Address(src, 0)); // load 4 byte chars
9699 pmovzxbw(tmp1, tmp1);
9700 movq(Address(dst, 0), tmp1);
9701 subptr(len, 4);
9702 addptr(src, 4);
9703 addptr(dst, 8);
9704
9705 bind(copy_bytes);
9706 }
9707 testl(len, len);
9708 jccb(Assembler::zero, done);
9709 lea(src, Address(src, len, Address::times_1));
9710 lea(dst, Address(dst, len, Address::times_2));
9711 negptr(len);
9712
9713 // inflate 1 char per iter
9714 bind(copy_chars_loop);
9715 load_unsigned_byte(tmp2, Address(src, len, Address::times_1)); // load byte char
9716 movw(Address(dst, len, Address::times_2), tmp2); // inflate byte char to word
9717 increment(len);
9718 jcc(Assembler::notZero, copy_chars_loop);
9719
9720 bind(done);
9721 }
9722
9723
9724 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
9725 switch (cond) {
9726 // Note some conditions are synonyms for others
9727 case Assembler::zero: return Assembler::notZero;
9728 case Assembler::notZero: return Assembler::zero;
9729 case Assembler::less: return Assembler::greaterEqual;
9730 case Assembler::lessEqual: return Assembler::greater;
9731 case Assembler::greater: return Assembler::lessEqual;
9732 case Assembler::greaterEqual: return Assembler::less;
9733 case Assembler::below: return Assembler::aboveEqual;
9734 case Assembler::belowEqual: return Assembler::above;
9735 case Assembler::above: return Assembler::belowEqual;
9736 case Assembler::aboveEqual: return Assembler::below;
9737 case Assembler::overflow: return Assembler::noOverflow;
9738 case Assembler::noOverflow: return Assembler::overflow;
9739 case Assembler::negative: return Assembler::positive;
9740 case Assembler::positive: return Assembler::negative;
9741 case Assembler::parity: return Assembler::noParity;
|