< prev index next >

src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page




  28 #include "compiler/disassembler.hpp"
  29 #include "gc/shared/cardTableModRefBS.hpp"
  30 #include "gc/shared/collectedHeap.inline.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "memory/resourceArea.hpp"
  33 #include "memory/universe.hpp"
  34 #include "oops/klass.inline.hpp"
  35 #include "prims/methodHandles.hpp"
  36 #include "runtime/biasedLocking.hpp"
  37 #include "runtime/interfaceSupport.hpp"
  38 #include "runtime/objectMonitor.hpp"
  39 #include "runtime/os.hpp"
  40 #include "runtime/sharedRuntime.hpp"
  41 #include "runtime/stubRoutines.hpp"
  42 #include "utilities/macros.hpp"
  43 #if INCLUDE_ALL_GCS
  44 #include "gc/g1/g1CollectedHeap.inline.hpp"
  45 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  46 #include "gc/g1/heapRegion.hpp"
  47 #endif // INCLUDE_ALL_GCS

  48 
  49 #ifdef PRODUCT
  50 #define BLOCK_COMMENT(str) /* nothing */
  51 #define STOP(error) stop(error)
  52 #else
  53 #define BLOCK_COMMENT(str) block_comment(str)
  54 #define STOP(error) block_comment(error); stop(error)
  55 #endif
  56 
  57 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  58 
  59 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
  60 
  61 #ifdef ASSERT
  62 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
  63 #endif
  64 
  65 static Assembler::Condition reverse[] = {
  66     Assembler::noOverflow     /* overflow      = 0x0 */ ,
  67     Assembler::overflow       /* noOverflow    = 0x1 */ ,


8501 
8502   BIND(L_tail_restore);
8503   movl(len, tmp); // restore
8504   BIND(L_tail);
8505   andl(len, 0xf);
8506   jccb(Assembler::zero, L_exit);
8507 
8508   // Fold the rest of bytes
8509   align(4);
8510   BIND(L_tail_loop);
8511   movsbl(rax, Address(buf, 0)); // load byte with sign extension
8512   update_byte_crc32(crc, rax, table);
8513   increment(buf);
8514   decrementl(len);
8515   jccb(Assembler::greater, L_tail_loop);
8516 
8517   BIND(L_exit);
8518   notl(crc); // ~c
8519 }
8520 

















































































































































































































































































































































































































































































8521 #undef BIND
8522 #undef BLOCK_COMMENT
8523 
8524 
8525 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
8526   switch (cond) {
8527     // Note some conditions are synonyms for others
8528     case Assembler::zero:         return Assembler::notZero;
8529     case Assembler::notZero:      return Assembler::zero;
8530     case Assembler::less:         return Assembler::greaterEqual;
8531     case Assembler::lessEqual:    return Assembler::greater;
8532     case Assembler::greater:      return Assembler::lessEqual;
8533     case Assembler::greaterEqual: return Assembler::less;
8534     case Assembler::below:        return Assembler::aboveEqual;
8535     case Assembler::belowEqual:   return Assembler::above;
8536     case Assembler::above:        return Assembler::belowEqual;
8537     case Assembler::aboveEqual:   return Assembler::below;
8538     case Assembler::overflow:     return Assembler::noOverflow;
8539     case Assembler::noOverflow:   return Assembler::overflow;
8540     case Assembler::negative:     return Assembler::positive;


  28 #include "compiler/disassembler.hpp"
  29 #include "gc/shared/cardTableModRefBS.hpp"
  30 #include "gc/shared/collectedHeap.inline.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "memory/resourceArea.hpp"
  33 #include "memory/universe.hpp"
  34 #include "oops/klass.inline.hpp"
  35 #include "prims/methodHandles.hpp"
  36 #include "runtime/biasedLocking.hpp"
  37 #include "runtime/interfaceSupport.hpp"
  38 #include "runtime/objectMonitor.hpp"
  39 #include "runtime/os.hpp"
  40 #include "runtime/sharedRuntime.hpp"
  41 #include "runtime/stubRoutines.hpp"
  42 #include "utilities/macros.hpp"
  43 #if INCLUDE_ALL_GCS
  44 #include "gc/g1/g1CollectedHeap.inline.hpp"
  45 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  46 #include "gc/g1/heapRegion.hpp"
  47 #endif // INCLUDE_ALL_GCS
  48 #include "crc32c.h"
  49 
  50 #ifdef PRODUCT
  51 #define BLOCK_COMMENT(str) /* nothing */
  52 #define STOP(error) stop(error)
  53 #else
  54 #define BLOCK_COMMENT(str) block_comment(str)
  55 #define STOP(error) block_comment(error); stop(error)
  56 #endif
  57 
  58 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  59 
  60 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
  61 
  62 #ifdef ASSERT
  63 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
  64 #endif
  65 
  66 static Assembler::Condition reverse[] = {
  67     Assembler::noOverflow     /* overflow      = 0x0 */ ,
  68     Assembler::overflow       /* noOverflow    = 0x1 */ ,


8502 
8503   BIND(L_tail_restore);
8504   movl(len, tmp); // restore
8505   BIND(L_tail);
8506   andl(len, 0xf);
8507   jccb(Assembler::zero, L_exit);
8508 
8509   // Fold the rest of bytes
8510   align(4);
8511   BIND(L_tail_loop);
8512   movsbl(rax, Address(buf, 0)); // load byte with sign extension
8513   update_byte_crc32(crc, rax, table);
8514   increment(buf);
8515   decrementl(len);
8516   jccb(Assembler::greater, L_tail_loop);
8517 
8518   BIND(L_exit);
8519   notl(crc); // ~c
8520 }
8521 
8522 #ifdef _LP64
8523 // S. Gueron / Information Processing Letters 112 (2012) 184
8524 // Algorithm 4: Computing carry-less multiplication using a precomputed lookup table.
8525 // Input: A 32 bit value B = [byte3, byte2, byte1, byte0].
8526 // Output: the 64-bit carry-less product of B * CONST
8527 void MacroAssembler::crc32c_ipl_alg4(Register in, uint32_t n,
8528                                      Register tmp1, Register tmp2, Register tmp3) {
8529   lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr()));
8530   if (n > 0) {
8531     addq(tmp3, n * 256 * 8);
8532   }
8533   //    Q1 = TABLEExt[n][B & 0xFF];
8534   movl(tmp1, in);
8535   andl(tmp1, 0x000000FF);
8536   shll(tmp1, 3);
8537   addq(tmp1, tmp3);
8538   movq(tmp1, Address(tmp1, 0));
8539 
8540   //    Q2 = TABLEExt[n][B >> 8 & 0xFF];
8541   movl(tmp2, in);
8542   shrl(tmp2, 8);
8543   andl(tmp2, 0x000000FF);
8544   shll(tmp2, 3);
8545   addq(tmp2, tmp3);
8546   movq(tmp2, Address(tmp2, 0));
8547 
8548   shlq(tmp2, 8);
8549   xorq(tmp1, tmp2);
8550 
8551   //    Q3 = TABLEExt[n][B >> 16 & 0xFF];
8552   movl(tmp2, in);
8553   shrl(tmp2, 16);
8554   andl(tmp2, 0x000000FF);
8555   shll(tmp2, 3);
8556   addq(tmp2, tmp3);
8557   movq(tmp2, Address(tmp2, 0));
8558 
8559   shlq(tmp2, 16);
8560   xorq(tmp1, tmp2);
8561 
8562   //    Q4 = TABLEExt[n][B >> 24 & 0xFF];
8563   shrl(in, 24);
8564   andl(in, 0x000000FF);
8565   shll(in, 3);
8566   addq(in, tmp3);
8567   movq(in, Address(in, 0));
8568  
8569   shlq(in, 24);
8570   xorq(in, tmp1);
8571   //    return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24;
8572 }
8573 
8574 void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1,
8575                                       Register in_out,
8576                                       uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
8577                                       XMMRegister w_xtmp2,
8578                                       Register tmp1,
8579                                       Register n_tmp2, Register n_tmp3) {
8580   if (is_pclmulqdq_supported) {
8581     movdl(w_xtmp1, in_out); // modified blindly
8582 
8583     movl(tmp1, const_or_pre_comp_const_index);
8584     movdl(w_xtmp2, tmp1);
8585     pclmulqdq(w_xtmp1, w_xtmp2, 0);
8586 
8587     movdq(in_out, w_xtmp1);
8588   } else {
8589     crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3);
8590   }
8591 }
8592 
8593 // Recombination Alternative 2: No bit-reflections
8594 // T1 = (CRC_A * U1) << 1
8595 // T2 = (CRC_B * U2) << 1
8596 // C1 = T1 >> 32
8597 // C2 = T2 >> 32
8598 // T1 = T1 & 0xFFFFFFFF
8599 // T2 = T2 & 0xFFFFFFFF
8600 // T1 = CRC32(0, T1)
8601 // T2 = CRC32(0, T2)
8602 // C1 = C1 ^ T1
8603 // C2 = C2 ^ T2
8604 // CRC = C1 ^ C2 ^ CRC_C
8605 void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
8606                                      XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
8607                                      Register tmp1, Register tmp2,
8608                                      Register n_tmp3) {
8609   crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
8610   crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
8611   shlq(in_out, 1);
8612   movl(tmp1, in_out);
8613   shrq(in_out, 32);
8614   xorl(tmp2, tmp2);
8615   crc32(tmp2, tmp1, 4);
8616   xorl(in_out, tmp2); // we don't care about upper 32 bit contents here
8617   shlq(in1, 1);
8618   movl(tmp1, in1);
8619   shrq(in1, 32);
8620   xorl(tmp2, tmp2);
8621   crc32(tmp2, tmp1, 4);
8622   xorl(in1, tmp2);
8623   xorl(in_out, in1);
8624   xorl(in_out, in2);
8625 }
8626 
8627 // Set N to predefined value
8628 // Subtract from a lenght of a buffer
8629 // execute in a loop:
8630 // CRC_A = 0xFFFFFFFF, CRC_B = 0, CRC_C = 0
8631 // for i = 1 to N do
8632 //  CRC_A = CRC32(CRC_A, A[i])
8633 //  CRC_B = CRC32(CRC_B, B[i])
8634 //  CRC_C = CRC32(CRC_C, C[i])
8635 // end for
8636 // Recombine
8637 void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
8638                                        Register in_out1, Register in_out2, Register in_out3,
8639                                        Register tmp1, Register tmp2, Register tmp3, 
8640                                        XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
8641                                        Register tmp4, Register tmp5, 
8642                                        Register n_tmp6) {
8643   Label L_processPartitions;
8644   Label L_processPartition;
8645   Label L_exit;
8646     
8647   bind(L_processPartitions);
8648   cmpl(in_out1, 3 * size);
8649   jcc(Assembler::less, L_exit);
8650     xorl(tmp1, tmp1);
8651     xorl(tmp2, tmp2);
8652     movq(tmp3, in_out2);
8653     addq(tmp3, size);
8654 
8655     bind(L_processPartition);
8656       crc32(in_out3, Address(in_out2, 0), 8);
8657       crc32(tmp1, Address(in_out2, size), 8);
8658       crc32(tmp2, Address(in_out2, size * 2), 8);
8659       addq(in_out2, 8);
8660       cmpq(in_out2, tmp3);
8661       jcc(Assembler::less, L_processPartition);
8662     crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2,
8663             w_xtmp1, w_xtmp2, w_xtmp3,
8664             tmp4, tmp5,
8665             n_tmp6);
8666     addq(in_out2, 2 * size);
8667     subl(in_out1, 3 * size);
8668     jmp(L_processPartitions);
8669 
8670   bind(L_exit);
8671 }
8672 #else
8673 void MacroAssembler::crc32c_ipl_alg4(Register in_out, uint32_t n,
8674                                      Register tmp1, Register tmp2, Register tmp3,
8675                                      XMMRegister xtmp1, XMMRegister xtmp2) {
8676   lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr()));
8677   if (n > 0) {
8678     addl(tmp3, n * 256 * 8);
8679   }
8680   //    Q1 = TABLEExt[n][B & 0xFF];
8681   movl(tmp1, in_out);
8682   andl(tmp1, 0x000000FF);
8683   shll(tmp1, 3);
8684   addl(tmp1, tmp3);
8685   movq(xtmp1, Address(tmp1, 0));
8686 
8687   //    Q2 = TABLEExt[n][B >> 8 & 0xFF];
8688   movl(tmp2, in_out);
8689   shrl(tmp2, 8);
8690   andl(tmp2, 0x000000FF);
8691   shll(tmp2, 3);
8692   addl(tmp2, tmp3);
8693   movq(xtmp2, Address(tmp2, 0));
8694 
8695   psllq(xtmp2, 8);
8696   pxor(xtmp1, xtmp2);
8697 
8698   //    Q3 = TABLEExt[n][B >> 16 & 0xFF];
8699   movl(tmp2, in_out);
8700   shrl(tmp2, 16);
8701   andl(tmp2, 0x000000FF);
8702   shll(tmp2, 3);
8703   addl(tmp2, tmp3);
8704   movq(xtmp2, Address(tmp2, 0));
8705 
8706   psllq(xtmp2, 16);
8707   pxor(xtmp1, xtmp2);
8708 
8709   //    Q4 = TABLEExt[n][B >> 24 & 0xFF];
8710   shrl(in_out, 24);
8711   andl(in_out, 0x000000FF);
8712   shll(in_out, 3);
8713   addl(in_out, tmp3);
8714   movq(xtmp2, Address(in_out, 0));
8715 
8716   psllq(xtmp2, 24);
8717   pxor(xtmp1, xtmp2); // Result in CXMM
8718   //    return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24;
8719 }
8720 
8721 void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1,
8722                                       Register in_out,
8723                                       uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
8724                                       XMMRegister w_xtmp2,
8725                                       Register tmp1,
8726                                       Register n_tmp2, Register n_tmp3) {
8727   if (is_pclmulqdq_supported) {
8728     movdl(w_xtmp1, in_out);
8729 
8730     movl(tmp1, const_or_pre_comp_const_index);
8731     movdl(w_xtmp2, tmp1);
8732     pclmulqdq(w_xtmp1, w_xtmp2, 0);
8733     // Keep result in XMM since GPR is 32 bit in length
8734   } else {
8735     crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3, w_xtmp1, w_xtmp2);
8736   }
8737 }
8738 
8739 void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
8740                                      XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
8741                                      Register tmp1, Register tmp2,
8742                                      Register n_tmp3) {
8743   crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
8744   crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
8745   
8746   psllq(w_xtmp1, 1);
8747   movdl(tmp1, w_xtmp1);
8748   psrlq(w_xtmp1, 32);
8749   movdl(in_out, w_xtmp1);
8750 
8751   xorl(tmp2, tmp2);
8752   crc32(tmp2, tmp1, 4);
8753   xorl(in_out, tmp2);
8754   
8755   psllq(w_xtmp2, 1);
8756   movdl(tmp1, w_xtmp2);
8757   psrlq(w_xtmp2, 32);
8758   movdl(in1, w_xtmp2);
8759 
8760   xorl(tmp2, tmp2);
8761   crc32(tmp2, tmp1, 4);
8762   xorl(in1, tmp2);
8763   xorl(in_out, in1);
8764   xorl(in_out, in2);
8765 }
8766 
8767 void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
8768                                        Register in_out1, Register in_out2, Register in_out3,
8769                                        Register tmp1, Register tmp2, Register tmp3,
8770                                        XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
8771                                        Register tmp4, Register tmp5,
8772                                        Register n_tmp6) {
8773   Label L_processPartitions;
8774   Label L_processPartition;
8775   Label L_exit;
8776 
8777   bind(L_processPartitions);
8778   cmpl(in_out1, 3 * size);
8779   jcc(Assembler::less, L_exit);
8780     xorl(tmp1, tmp1);
8781     xorl(tmp2, tmp2);
8782     movl(tmp3, in_out2);
8783     addl(tmp3, size);
8784 
8785     bind(L_processPartition);
8786       crc32(in_out3, Address(in_out2, 0), 4);
8787       crc32(tmp1, Address(in_out2, size), 4);
8788       crc32(tmp2, Address(in_out2, size*2), 4);
8789       crc32(in_out3, Address(in_out2, 0+4), 4);
8790       crc32(tmp1, Address(in_out2, size+4), 4);
8791       crc32(tmp2, Address(in_out2, size*2+4), 4);
8792       addl(in_out2, 8);
8793       cmpl(in_out2, tmp3);
8794       jcc(Assembler::less, L_processPartition);
8795 
8796         push(tmp3);
8797         push(in_out1);
8798         push(in_out2);
8799         tmp4 = tmp3;
8800         tmp5 = in_out1;
8801         n_tmp6 = in_out2;
8802 
8803       crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2,
8804             w_xtmp1, w_xtmp2, w_xtmp3,
8805             tmp4, tmp5,
8806             n_tmp6);
8807 
8808         pop(in_out2);
8809         pop(in_out1);
8810         pop(tmp3);
8811 
8812     addl(in_out2, 2 * size);
8813     subl(in_out1, 3 * size);
8814     jmp(L_processPartitions);
8815 
8816   bind(L_exit);
8817 }
8818 #endif //LP64
8819 
8820 #ifdef _LP64
8821 // Algorithm 2: Pipelined usage of the CRC32 instruction.
8822 // Input: A buffer I of L bytes.
8823 // Output: the CRC32C value of the buffer.
8824 // Notations:
8825 // Write L = 24N + r, with N = floor (L/24).
8826 // r = L mod 24 (0 <= r < 24).
8827 // Consider I as the concatenation of A|B|C|R, where A, B, C, each,
8828 // N quadwords, and R consists of r bytes.
8829 // A[j] = I [8j+7:8j], j= 0, 1, ..., N-1
8830 // B[j] = I [N + 8j+7:N + 8j], j= 0, 1, ..., N-1
8831 // C[j] = I [2N + 8j+7:2N + 8j], j= 0, 1, ..., N-1
8832 // if r > 0 R[j] = I [3N +j], j= 0, 1, ...,r-1
8833 void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
8834                                           Register tmp1, Register tmp2, Register tmp3,
8835                                           Register tmp4, Register tmp5, Register tmp6,
8836                                           XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
8837                                           bool is_pclmulqdq_supported) {
8838   uint32_t const_or_pre_comp_const_index[CRC32C::NUM_PRECOMPUTED_CONSTANTS];
8839   Label L_wordByWord;
8840   Label L_byteByByteProlog;
8841   Label L_byteByByte;
8842   Label L_exit;
8843 
8844   if (is_pclmulqdq_supported ) {
8845     const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr;
8846     const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr+1);
8847 
8848     const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2);
8849     const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3);
8850 
8851     const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4);
8852     const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5);
8853     assert((CRC32C::NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5, "Checking whether you declared all of the constants based on the number of \"chunks\"");
8854   } else {
8855     const_or_pre_comp_const_index[0] = 1;
8856     const_or_pre_comp_const_index[1] = 0;
8857 
8858     const_or_pre_comp_const_index[2] = 3;
8859     const_or_pre_comp_const_index[3] = 2;
8860 
8861     const_or_pre_comp_const_index[4] = 5;
8862     const_or_pre_comp_const_index[5] = 4;
8863    }
8864   crc32c_proc_chunk(CRC32C::HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported,
8865                     in2, in1, in_out, 
8866                     tmp1, tmp2, tmp3,
8867                     w_xtmp1, w_xtmp2, w_xtmp3,
8868                     tmp4, tmp5, 
8869                     tmp6);
8870   crc32c_proc_chunk(CRC32C::MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported,
8871                     in2, in1, in_out,
8872                     tmp1, tmp2, tmp3,
8873                     w_xtmp1, w_xtmp2, w_xtmp3,
8874                     tmp4, tmp5, 
8875                     tmp6);
8876   crc32c_proc_chunk(CRC32C::LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported,
8877                     in2, in1, in_out,
8878                     tmp1, tmp2, tmp3,
8879                     w_xtmp1, w_xtmp2, w_xtmp3,
8880                     tmp4, tmp5, 
8881                     tmp6);
8882   movl(tmp1, in2);
8883   andl(tmp1, 0x00000007);
8884   negl(tmp1);
8885   addl(tmp1, in2);
8886   addq(tmp1, in1);
8887 
8888   BIND(L_wordByWord);
8889   cmpq(in1, tmp1);
8890   jcc(Assembler::greaterEqual, L_byteByByteProlog);
8891     crc32(in_out, Address(in1, 0), 4);
8892     addq(in1, 4);
8893     jmp(L_wordByWord);
8894   
8895   BIND(L_byteByByteProlog);
8896   andl(in2, 0x00000007);
8897   movl(tmp2, 1);
8898 
8899   BIND(L_byteByByte);
8900   cmpl(tmp2, in2);
8901   jccb(Assembler::greater, L_exit);
8902     crc32(in_out, Address(in1, 0), 1);
8903     incq(in1);
8904     incl(tmp2);
8905     jmp(L_byteByByte);
8906 
8907   BIND(L_exit);
8908 }
8909 #else
8910 void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
8911                                           Register tmp1, Register  tmp2, Register tmp3,
8912                                           Register tmp4, Register  tmp5, Register tmp6,
8913                                           XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
8914                                           bool is_pclmulqdq_supported) {
8915   uint32_t const_or_pre_comp_const_index[CRC32C::NUM_PRECOMPUTED_CONSTANTS];
8916   Label L_wordByWord;
8917   Label L_byteByByteProlog;
8918   Label L_byteByByte;
8919   Label L_exit;
8920 
8921   if (is_pclmulqdq_supported) {
8922     const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr;
8923     const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 1);
8924 
8925     const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2);
8926     const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3);
8927 
8928     const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4);
8929     const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5);
8930   } else {
8931     const_or_pre_comp_const_index[0] = 1;
8932     const_or_pre_comp_const_index[1] = 0;
8933 
8934     const_or_pre_comp_const_index[2] = 3;
8935     const_or_pre_comp_const_index[3] = 2;
8936 
8937     const_or_pre_comp_const_index[4] = 5;
8938     const_or_pre_comp_const_index[5] = 4;
8939   }
8940   crc32c_proc_chunk(CRC32C::HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported,
8941                     in2, in1, in_out,
8942                     tmp1, tmp2, tmp3,
8943                     w_xtmp1, w_xtmp2, w_xtmp3,
8944                     tmp4, tmp5,
8945                     tmp6);
8946   crc32c_proc_chunk(CRC32C::MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported,
8947                     in2, in1, in_out,
8948                     tmp1, tmp2, tmp3,
8949                     w_xtmp1, w_xtmp2, w_xtmp3,
8950                     tmp4, tmp5,
8951                     tmp6);
8952   crc32c_proc_chunk(CRC32C::LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported,
8953                     in2, in1, in_out,
8954                     tmp1, tmp2, tmp3,
8955                     w_xtmp1, w_xtmp2, w_xtmp3,
8956                     tmp4, tmp5,
8957                     tmp6);
8958   movl(tmp1, in2);
8959   andl(tmp1, 0x00000007);
8960   negl(tmp1);
8961   addl(tmp1, in2);
8962   addl(tmp1, in1);
8963 
8964   BIND(L_wordByWord);
8965   cmpl(in1, tmp1);
8966   jcc(Assembler::greaterEqual, L_byteByByteProlog);
8967     crc32(in_out, Address(in1,0), 4);
8968     addl(in1, 4);
8969     jmp(L_wordByWord);
8970 
8971   BIND(L_byteByByteProlog);
8972   andl(in2, 0x00000007);
8973   movl(tmp2, 1);
8974 
8975   BIND(L_byteByByte);
8976   cmpl(tmp2, in2);
8977   jccb(Assembler::greater, L_exit);
8978     movb(tmp1, Address(in1, 0));
8979     crc32(in_out, tmp1, 1);
8980     incl(in1);
8981     incl(tmp2);
8982     jmp(L_byteByByte);
8983 
8984   BIND(L_exit);
8985 }
8986 #endif // LP64
8987 #undef BIND
8988 #undef BLOCK_COMMENT
8989 
8990 
8991 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
8992   switch (cond) {
8993     // Note some conditions are synonyms for others
8994     case Assembler::zero:         return Assembler::notZero;
8995     case Assembler::notZero:      return Assembler::zero;
8996     case Assembler::less:         return Assembler::greaterEqual;
8997     case Assembler::lessEqual:    return Assembler::greater;
8998     case Assembler::greater:      return Assembler::lessEqual;
8999     case Assembler::greaterEqual: return Assembler::less;
9000     case Assembler::below:        return Assembler::aboveEqual;
9001     case Assembler::belowEqual:   return Assembler::above;
9002     case Assembler::above:        return Assembler::belowEqual;
9003     case Assembler::aboveEqual:   return Assembler::below;
9004     case Assembler::overflow:     return Assembler::noOverflow;
9005     case Assembler::noOverflow:   return Assembler::overflow;
9006     case Assembler::negative:     return Assembler::positive;
< prev index next >