28 #include "compiler/disassembler.hpp"
29 #include "gc/shared/cardTableModRefBS.hpp"
30 #include "gc/shared/collectedHeap.inline.hpp"
31 #include "interpreter/interpreter.hpp"
32 #include "memory/resourceArea.hpp"
33 #include "memory/universe.hpp"
34 #include "oops/klass.inline.hpp"
35 #include "prims/methodHandles.hpp"
36 #include "runtime/biasedLocking.hpp"
37 #include "runtime/interfaceSupport.hpp"
38 #include "runtime/objectMonitor.hpp"
39 #include "runtime/os.hpp"
40 #include "runtime/sharedRuntime.hpp"
41 #include "runtime/stubRoutines.hpp"
42 #include "utilities/macros.hpp"
43 #if INCLUDE_ALL_GCS
44 #include "gc/g1/g1CollectedHeap.inline.hpp"
45 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
46 #include "gc/g1/heapRegion.hpp"
47 #endif // INCLUDE_ALL_GCS
48
49 #ifdef PRODUCT
50 #define BLOCK_COMMENT(str) /* nothing */
51 #define STOP(error) stop(error)
52 #else
53 #define BLOCK_COMMENT(str) block_comment(str)
54 #define STOP(error) block_comment(error); stop(error)
55 #endif
56
57 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
58
59 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
60
61 #ifdef ASSERT
62 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
63 #endif
64
65 static Assembler::Condition reverse[] = {
66 Assembler::noOverflow /* overflow = 0x0 */ ,
67 Assembler::overflow /* noOverflow = 0x1 */ ,
8501
8502 BIND(L_tail_restore);
8503 movl(len, tmp); // restore
8504 BIND(L_tail);
8505 andl(len, 0xf);
8506 jccb(Assembler::zero, L_exit);
8507
8508 // Fold the rest of bytes
8509 align(4);
8510 BIND(L_tail_loop);
8511 movsbl(rax, Address(buf, 0)); // load byte with sign extension
8512 update_byte_crc32(crc, rax, table);
8513 increment(buf);
8514 decrementl(len);
8515 jccb(Assembler::greater, L_tail_loop);
8516
8517 BIND(L_exit);
8518 notl(crc); // ~c
8519 }
8520
8521 #undef BIND
8522 #undef BLOCK_COMMENT
8523
8524
8525 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
8526 switch (cond) {
8527 // Note some conditions are synonyms for others
8528 case Assembler::zero: return Assembler::notZero;
8529 case Assembler::notZero: return Assembler::zero;
8530 case Assembler::less: return Assembler::greaterEqual;
8531 case Assembler::lessEqual: return Assembler::greater;
8532 case Assembler::greater: return Assembler::lessEqual;
8533 case Assembler::greaterEqual: return Assembler::less;
8534 case Assembler::below: return Assembler::aboveEqual;
8535 case Assembler::belowEqual: return Assembler::above;
8536 case Assembler::above: return Assembler::belowEqual;
8537 case Assembler::aboveEqual: return Assembler::below;
8538 case Assembler::overflow: return Assembler::noOverflow;
8539 case Assembler::noOverflow: return Assembler::overflow;
8540 case Assembler::negative: return Assembler::positive;
|
28 #include "compiler/disassembler.hpp"
29 #include "gc/shared/cardTableModRefBS.hpp"
30 #include "gc/shared/collectedHeap.inline.hpp"
31 #include "interpreter/interpreter.hpp"
32 #include "memory/resourceArea.hpp"
33 #include "memory/universe.hpp"
34 #include "oops/klass.inline.hpp"
35 #include "prims/methodHandles.hpp"
36 #include "runtime/biasedLocking.hpp"
37 #include "runtime/interfaceSupport.hpp"
38 #include "runtime/objectMonitor.hpp"
39 #include "runtime/os.hpp"
40 #include "runtime/sharedRuntime.hpp"
41 #include "runtime/stubRoutines.hpp"
42 #include "utilities/macros.hpp"
43 #if INCLUDE_ALL_GCS
44 #include "gc/g1/g1CollectedHeap.inline.hpp"
45 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
46 #include "gc/g1/heapRegion.hpp"
47 #endif // INCLUDE_ALL_GCS
48 #include "crc32c.h"
49
50 #ifdef PRODUCT
51 #define BLOCK_COMMENT(str) /* nothing */
52 #define STOP(error) stop(error)
53 #else
54 #define BLOCK_COMMENT(str) block_comment(str)
55 #define STOP(error) block_comment(error); stop(error)
56 #endif
57
58 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
59
60 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
61
62 #ifdef ASSERT
63 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
64 #endif
65
66 static Assembler::Condition reverse[] = {
67 Assembler::noOverflow /* overflow = 0x0 */ ,
68 Assembler::overflow /* noOverflow = 0x1 */ ,
8502
8503 BIND(L_tail_restore);
8504 movl(len, tmp); // restore
8505 BIND(L_tail);
8506 andl(len, 0xf);
8507 jccb(Assembler::zero, L_exit);
8508
8509 // Fold the rest of bytes
8510 align(4);
8511 BIND(L_tail_loop);
8512 movsbl(rax, Address(buf, 0)); // load byte with sign extension
8513 update_byte_crc32(crc, rax, table);
8514 increment(buf);
8515 decrementl(len);
8516 jccb(Assembler::greater, L_tail_loop);
8517
8518 BIND(L_exit);
8519 notl(crc); // ~c
8520 }
8521
8522 #ifdef _LP64
8523 // S. Gueron / Information Processing Letters 112 (2012) 184
8524 // Algorithm 4: Computing carry-less multiplication using a precomputed lookup table.
8525 // Input: A 32 bit value B = [byte3, byte2, byte1, byte0].
8526 // Output: the 64-bit carry-less product of B * CONST
8527 void MacroAssembler::crc32c_ipl_alg4(Register in, uint32_t n,
8528 Register tmp1, Register tmp2, Register tmp3) {
8529 lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr()));
8530 if (n > 0) {
8531 addq(tmp3, n * 256 * 8);
8532 }
8533 // Q1 = TABLEExt[n][B & 0xFF];
8534 movl(tmp1, in);
8535 andl(tmp1, 0x000000FF);
8536 shll(tmp1, 3);
8537 addq(tmp1, tmp3);
8538 movq(tmp1, Address(tmp1, 0));
8539
8540 // Q2 = TABLEExt[n][B >> 8 & 0xFF];
8541 movl(tmp2, in);
8542 shrl(tmp2, 8);
8543 andl(tmp2, 0x000000FF);
8544 shll(tmp2, 3);
8545 addq(tmp2, tmp3);
8546 movq(tmp2, Address(tmp2, 0));
8547
8548 shlq(tmp2, 8);
8549 xorq(tmp1, tmp2);
8550
8551 // Q3 = TABLEExt[n][B >> 16 & 0xFF];
8552 movl(tmp2, in);
8553 shrl(tmp2, 16);
8554 andl(tmp2, 0x000000FF);
8555 shll(tmp2, 3);
8556 addq(tmp2, tmp3);
8557 movq(tmp2, Address(tmp2, 0));
8558
8559 shlq(tmp2, 16);
8560 xorq(tmp1, tmp2);
8561
8562 // Q4 = TABLEExt[n][B >> 24 & 0xFF];
8563 shrl(in, 24);
8564 andl(in, 0x000000FF);
8565 shll(in, 3);
8566 addq(in, tmp3);
8567 movq(in, Address(in, 0));
8568
8569 shlq(in, 24);
8570 xorq(in, tmp1);
8571 // return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24;
8572 }
8573
8574 void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1,
8575 Register in_out,
8576 uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
8577 XMMRegister w_xtmp2,
8578 Register tmp1,
8579 Register n_tmp2, Register n_tmp3) {
8580 if (is_pclmulqdq_supported) {
8581 movdl(w_xtmp1, in_out); // modified blindly
8582
8583 movl(tmp1, const_or_pre_comp_const_index);
8584 movdl(w_xtmp2, tmp1);
8585 pclmulqdq(w_xtmp1, w_xtmp2, 0);
8586
8587 movdq(in_out, w_xtmp1);
8588 } else {
8589 crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3);
8590 }
8591 }
8592
8593 // Recombination Alternative 2: No bit-reflections
8594 // T1 = (CRC_A * U1) << 1
8595 // T2 = (CRC_B * U2) << 1
8596 // C1 = T1 >> 32
8597 // C2 = T2 >> 32
8598 // T1 = T1 & 0xFFFFFFFF
8599 // T2 = T2 & 0xFFFFFFFF
8600 // T1 = CRC32(0, T1)
8601 // T2 = CRC32(0, T2)
8602 // C1 = C1 ^ T1
8603 // C2 = C2 ^ T2
8604 // CRC = C1 ^ C2 ^ CRC_C
8605 void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
8606 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
8607 Register tmp1, Register tmp2,
8608 Register n_tmp3) {
8609 crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
8610 crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
8611 shlq(in_out, 1);
8612 movl(tmp1, in_out);
8613 shrq(in_out, 32);
8614 xorl(tmp2, tmp2);
8615 crc32(tmp2, tmp1, 4);
8616 xorl(in_out, tmp2); // we don't care about upper 32 bit contents here
8617 shlq(in1, 1);
8618 movl(tmp1, in1);
8619 shrq(in1, 32);
8620 xorl(tmp2, tmp2);
8621 crc32(tmp2, tmp1, 4);
8622 xorl(in1, tmp2);
8623 xorl(in_out, in1);
8624 xorl(in_out, in2);
8625 }
8626
8627 // Set N to predefined value
8628 // Subtract from a lenght of a buffer
8629 // execute in a loop:
8630 // CRC_A = 0xFFFFFFFF, CRC_B = 0, CRC_C = 0
8631 // for i = 1 to N do
8632 // CRC_A = CRC32(CRC_A, A[i])
8633 // CRC_B = CRC32(CRC_B, B[i])
8634 // CRC_C = CRC32(CRC_C, C[i])
8635 // end for
8636 // Recombine
8637 void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
8638 Register in_out1, Register in_out2, Register in_out3,
8639 Register tmp1, Register tmp2, Register tmp3,
8640 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
8641 Register tmp4, Register tmp5,
8642 Register n_tmp6) {
8643 Label L_processPartitions;
8644 Label L_processPartition;
8645 Label L_exit;
8646
8647 bind(L_processPartitions);
8648 cmpl(in_out1, 3 * size);
8649 jcc(Assembler::less, L_exit);
8650 xorl(tmp1, tmp1);
8651 xorl(tmp2, tmp2);
8652 movq(tmp3, in_out2);
8653 addq(tmp3, size);
8654
8655 bind(L_processPartition);
8656 crc32(in_out3, Address(in_out2, 0), 8);
8657 crc32(tmp1, Address(in_out2, size), 8);
8658 crc32(tmp2, Address(in_out2, size * 2), 8);
8659 addq(in_out2, 8);
8660 cmpq(in_out2, tmp3);
8661 jcc(Assembler::less, L_processPartition);
8662 crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2,
8663 w_xtmp1, w_xtmp2, w_xtmp3,
8664 tmp4, tmp5,
8665 n_tmp6);
8666 addq(in_out2, 2 * size);
8667 subl(in_out1, 3 * size);
8668 jmp(L_processPartitions);
8669
8670 bind(L_exit);
8671 }
8672 #else
8673 void MacroAssembler::crc32c_ipl_alg4(Register in_out, uint32_t n,
8674 Register tmp1, Register tmp2, Register tmp3,
8675 XMMRegister xtmp1, XMMRegister xtmp2) {
8676 lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr()));
8677 if (n > 0) {
8678 addl(tmp3, n * 256 * 8);
8679 }
8680 // Q1 = TABLEExt[n][B & 0xFF];
8681 movl(tmp1, in_out);
8682 andl(tmp1, 0x000000FF);
8683 shll(tmp1, 3);
8684 addl(tmp1, tmp3);
8685 movq(xtmp1, Address(tmp1, 0));
8686
8687 // Q2 = TABLEExt[n][B >> 8 & 0xFF];
8688 movl(tmp2, in_out);
8689 shrl(tmp2, 8);
8690 andl(tmp2, 0x000000FF);
8691 shll(tmp2, 3);
8692 addl(tmp2, tmp3);
8693 movq(xtmp2, Address(tmp2, 0));
8694
8695 psllq(xtmp2, 8);
8696 pxor(xtmp1, xtmp2);
8697
8698 // Q3 = TABLEExt[n][B >> 16 & 0xFF];
8699 movl(tmp2, in_out);
8700 shrl(tmp2, 16);
8701 andl(tmp2, 0x000000FF);
8702 shll(tmp2, 3);
8703 addl(tmp2, tmp3);
8704 movq(xtmp2, Address(tmp2, 0));
8705
8706 psllq(xtmp2, 16);
8707 pxor(xtmp1, xtmp2);
8708
8709 // Q4 = TABLEExt[n][B >> 24 & 0xFF];
8710 shrl(in_out, 24);
8711 andl(in_out, 0x000000FF);
8712 shll(in_out, 3);
8713 addl(in_out, tmp3);
8714 movq(xtmp2, Address(in_out, 0));
8715
8716 psllq(xtmp2, 24);
8717 pxor(xtmp1, xtmp2); // Result in CXMM
8718 // return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24;
8719 }
8720
8721 void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1,
8722 Register in_out,
8723 uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
8724 XMMRegister w_xtmp2,
8725 Register tmp1,
8726 Register n_tmp2, Register n_tmp3) {
8727 if (is_pclmulqdq_supported) {
8728 movdl(w_xtmp1, in_out);
8729
8730 movl(tmp1, const_or_pre_comp_const_index);
8731 movdl(w_xtmp2, tmp1);
8732 pclmulqdq(w_xtmp1, w_xtmp2, 0);
8733 // Keep result in XMM since GPR is 32 bit in length
8734 } else {
8735 crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3, w_xtmp1, w_xtmp2);
8736 }
8737 }
8738
8739 void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
8740 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
8741 Register tmp1, Register tmp2,
8742 Register n_tmp3) {
8743 crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
8744 crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
8745
8746 psllq(w_xtmp1, 1);
8747 movdl(tmp1, w_xtmp1);
8748 psrlq(w_xtmp1, 32);
8749 movdl(in_out, w_xtmp1);
8750
8751 xorl(tmp2, tmp2);
8752 crc32(tmp2, tmp1, 4);
8753 xorl(in_out, tmp2);
8754
8755 psllq(w_xtmp2, 1);
8756 movdl(tmp1, w_xtmp2);
8757 psrlq(w_xtmp2, 32);
8758 movdl(in1, w_xtmp2);
8759
8760 xorl(tmp2, tmp2);
8761 crc32(tmp2, tmp1, 4);
8762 xorl(in1, tmp2);
8763 xorl(in_out, in1);
8764 xorl(in_out, in2);
8765 }
8766
8767 void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
8768 Register in_out1, Register in_out2, Register in_out3,
8769 Register tmp1, Register tmp2, Register tmp3,
8770 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
8771 Register tmp4, Register tmp5,
8772 Register n_tmp6) {
8773 Label L_processPartitions;
8774 Label L_processPartition;
8775 Label L_exit;
8776
8777 bind(L_processPartitions);
8778 cmpl(in_out1, 3 * size);
8779 jcc(Assembler::less, L_exit);
8780 xorl(tmp1, tmp1);
8781 xorl(tmp2, tmp2);
8782 movl(tmp3, in_out2);
8783 addl(tmp3, size);
8784
8785 bind(L_processPartition);
8786 crc32(in_out3, Address(in_out2, 0), 4);
8787 crc32(tmp1, Address(in_out2, size), 4);
8788 crc32(tmp2, Address(in_out2, size*2), 4);
8789 crc32(in_out3, Address(in_out2, 0+4), 4);
8790 crc32(tmp1, Address(in_out2, size+4), 4);
8791 crc32(tmp2, Address(in_out2, size*2+4), 4);
8792 addl(in_out2, 8);
8793 cmpl(in_out2, tmp3);
8794 jcc(Assembler::less, L_processPartition);
8795
8796 push(tmp3);
8797 push(in_out1);
8798 push(in_out2);
8799 tmp4 = tmp3;
8800 tmp5 = in_out1;
8801 n_tmp6 = in_out2;
8802
8803 crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2,
8804 w_xtmp1, w_xtmp2, w_xtmp3,
8805 tmp4, tmp5,
8806 n_tmp6);
8807
8808 pop(in_out2);
8809 pop(in_out1);
8810 pop(tmp3);
8811
8812 addl(in_out2, 2 * size);
8813 subl(in_out1, 3 * size);
8814 jmp(L_processPartitions);
8815
8816 bind(L_exit);
8817 }
8818 #endif //LP64
8819
8820 #ifdef _LP64
8821 // Algorithm 2: Pipelined usage of the CRC32 instruction.
8822 // Input: A buffer I of L bytes.
8823 // Output: the CRC32C value of the buffer.
8824 // Notations:
8825 // Write L = 24N + r, with N = floor (L/24).
8826 // r = L mod 24 (0 <= r < 24).
8827 // Consider I as the concatenation of A|B|C|R, where A, B, C, each,
8828 // N quadwords, and R consists of r bytes.
8829 // A[j] = I [8j+7:8j], j= 0, 1, ..., N-1
8830 // B[j] = I [N + 8j+7:N + 8j], j= 0, 1, ..., N-1
8831 // C[j] = I [2N + 8j+7:2N + 8j], j= 0, 1, ..., N-1
8832 // if r > 0 R[j] = I [3N +j], j= 0, 1, ...,r-1
8833 void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
8834 Register tmp1, Register tmp2, Register tmp3,
8835 Register tmp4, Register tmp5, Register tmp6,
8836 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
8837 bool is_pclmulqdq_supported) {
8838 uint32_t const_or_pre_comp_const_index[CRC32C::NUM_PRECOMPUTED_CONSTANTS];
8839 Label L_wordByWord;
8840 Label L_byteByByteProlog;
8841 Label L_byteByByte;
8842 Label L_exit;
8843
8844 if (is_pclmulqdq_supported ) {
8845 const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr;
8846 const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr+1);
8847
8848 const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2);
8849 const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3);
8850
8851 const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4);
8852 const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5);
8853 assert((CRC32C::NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5, "Checking whether you declared all of the constants based on the number of \"chunks\"");
8854 } else {
8855 const_or_pre_comp_const_index[0] = 1;
8856 const_or_pre_comp_const_index[1] = 0;
8857
8858 const_or_pre_comp_const_index[2] = 3;
8859 const_or_pre_comp_const_index[3] = 2;
8860
8861 const_or_pre_comp_const_index[4] = 5;
8862 const_or_pre_comp_const_index[5] = 4;
8863 }
8864 crc32c_proc_chunk(CRC32C::HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported,
8865 in2, in1, in_out,
8866 tmp1, tmp2, tmp3,
8867 w_xtmp1, w_xtmp2, w_xtmp3,
8868 tmp4, tmp5,
8869 tmp6);
8870 crc32c_proc_chunk(CRC32C::MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported,
8871 in2, in1, in_out,
8872 tmp1, tmp2, tmp3,
8873 w_xtmp1, w_xtmp2, w_xtmp3,
8874 tmp4, tmp5,
8875 tmp6);
8876 crc32c_proc_chunk(CRC32C::LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported,
8877 in2, in1, in_out,
8878 tmp1, tmp2, tmp3,
8879 w_xtmp1, w_xtmp2, w_xtmp3,
8880 tmp4, tmp5,
8881 tmp6);
8882 movl(tmp1, in2);
8883 andl(tmp1, 0x00000007);
8884 negl(tmp1);
8885 addl(tmp1, in2);
8886 addq(tmp1, in1);
8887
8888 BIND(L_wordByWord);
8889 cmpq(in1, tmp1);
8890 jcc(Assembler::greaterEqual, L_byteByByteProlog);
8891 crc32(in_out, Address(in1, 0), 4);
8892 addq(in1, 4);
8893 jmp(L_wordByWord);
8894
8895 BIND(L_byteByByteProlog);
8896 andl(in2, 0x00000007);
8897 movl(tmp2, 1);
8898
8899 BIND(L_byteByByte);
8900 cmpl(tmp2, in2);
8901 jccb(Assembler::greater, L_exit);
8902 crc32(in_out, Address(in1, 0), 1);
8903 incq(in1);
8904 incl(tmp2);
8905 jmp(L_byteByByte);
8906
8907 BIND(L_exit);
8908 }
8909 #else
8910 void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
8911 Register tmp1, Register tmp2, Register tmp3,
8912 Register tmp4, Register tmp5, Register tmp6,
8913 XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
8914 bool is_pclmulqdq_supported) {
8915 uint32_t const_or_pre_comp_const_index[CRC32C::NUM_PRECOMPUTED_CONSTANTS];
8916 Label L_wordByWord;
8917 Label L_byteByByteProlog;
8918 Label L_byteByByte;
8919 Label L_exit;
8920
8921 if (is_pclmulqdq_supported) {
8922 const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr;
8923 const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 1);
8924
8925 const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2);
8926 const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3);
8927
8928 const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4);
8929 const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5);
8930 } else {
8931 const_or_pre_comp_const_index[0] = 1;
8932 const_or_pre_comp_const_index[1] = 0;
8933
8934 const_or_pre_comp_const_index[2] = 3;
8935 const_or_pre_comp_const_index[3] = 2;
8936
8937 const_or_pre_comp_const_index[4] = 5;
8938 const_or_pre_comp_const_index[5] = 4;
8939 }
8940 crc32c_proc_chunk(CRC32C::HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported,
8941 in2, in1, in_out,
8942 tmp1, tmp2, tmp3,
8943 w_xtmp1, w_xtmp2, w_xtmp3,
8944 tmp4, tmp5,
8945 tmp6);
8946 crc32c_proc_chunk(CRC32C::MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported,
8947 in2, in1, in_out,
8948 tmp1, tmp2, tmp3,
8949 w_xtmp1, w_xtmp2, w_xtmp3,
8950 tmp4, tmp5,
8951 tmp6);
8952 crc32c_proc_chunk(CRC32C::LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported,
8953 in2, in1, in_out,
8954 tmp1, tmp2, tmp3,
8955 w_xtmp1, w_xtmp2, w_xtmp3,
8956 tmp4, tmp5,
8957 tmp6);
8958 movl(tmp1, in2);
8959 andl(tmp1, 0x00000007);
8960 negl(tmp1);
8961 addl(tmp1, in2);
8962 addl(tmp1, in1);
8963
8964 BIND(L_wordByWord);
8965 cmpl(in1, tmp1);
8966 jcc(Assembler::greaterEqual, L_byteByByteProlog);
8967 crc32(in_out, Address(in1,0), 4);
8968 addl(in1, 4);
8969 jmp(L_wordByWord);
8970
8971 BIND(L_byteByByteProlog);
8972 andl(in2, 0x00000007);
8973 movl(tmp2, 1);
8974
8975 BIND(L_byteByByte);
8976 cmpl(tmp2, in2);
8977 jccb(Assembler::greater, L_exit);
8978 movb(tmp1, Address(in1, 0));
8979 crc32(in_out, tmp1, 1);
8980 incl(in1);
8981 incl(tmp2);
8982 jmp(L_byteByByte);
8983
8984 BIND(L_exit);
8985 }
8986 #endif // LP64
8987 #undef BIND
8988 #undef BLOCK_COMMENT
8989
8990
8991 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
8992 switch (cond) {
8993 // Note some conditions are synonyms for others
8994 case Assembler::zero: return Assembler::notZero;
8995 case Assembler::notZero: return Assembler::zero;
8996 case Assembler::less: return Assembler::greaterEqual;
8997 case Assembler::lessEqual: return Assembler::greater;
8998 case Assembler::greater: return Assembler::lessEqual;
8999 case Assembler::greaterEqual: return Assembler::less;
9000 case Assembler::below: return Assembler::aboveEqual;
9001 case Assembler::belowEqual: return Assembler::above;
9002 case Assembler::above: return Assembler::belowEqual;
9003 case Assembler::aboveEqual: return Assembler::below;
9004 case Assembler::overflow: return Assembler::noOverflow;
9005 case Assembler::noOverflow: return Assembler::overflow;
9006 case Assembler::negative: return Assembler::positive;
|