src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 8052081 Sdiff src/cpu/x86/vm

src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page




7299  * @param [in,out]crc   Register containing the crc.
7300  * @param [in]val       Register containing the byte to fold into the CRC.
7301  * @param [in]table     Register containing the table of crc constants.
7302  *
7303  * uint32_t crc;
7304  * val = crc_table[(val ^ crc) & 0xFF];
7305  * crc = val ^ (crc >> 8);
7306  *
7307  */
7308 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
7309   xorl(val, crc);
7310   andl(val, 0xFF);
7311   shrl(crc, 8); // unsigned shift
7312   xorl(crc, Address(table, val, Address::times_4, 0));
7313 }
7314 
7315 /**
7316  * Fold 128-bit data chunk
7317  */
7318 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) {

7319   vpclmulhdq(xtmp, xK, xcrc); // [123:64]
7320   vpclmulldq(xcrc, xK, xcrc); // [63:0]
7321   vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
7322   pxor(xcrc, xtmp);








7323 }
7324 
7325 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) {

7326   vpclmulhdq(xtmp, xK, xcrc);
7327   vpclmulldq(xcrc, xK, xcrc);
7328   pxor(xcrc, xbuf);
7329   pxor(xcrc, xtmp);







7330 }
7331 
7332 /**
7333  * 8-bit folds to compute 32-bit CRC
7334  *
7335  * uint64_t xcrc;
7336  * timesXtoThe32[xcrc & 0xFF] ^ (xcrc >> 8);
7337  */
7338 void MacroAssembler::fold_8bit_crc32(XMMRegister xcrc, Register table, XMMRegister xtmp, Register tmp) {
7339   movdl(tmp, xcrc);
7340   andl(tmp, 0xFF);
7341   movdl(xtmp, Address(table, tmp, Address::times_4, 0));
7342   psrldq(xcrc, 1); // unsigned shift one byte
7343   pxor(xcrc, xtmp);
7344 }
7345 
7346 /**
7347  * uint32_t crc;
7348  * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
7349  */


7427   movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16));
7428   fold_128bit_crc32(xmm1, xmm0, xmm5, xmm2);
7429   fold_128bit_crc32(xmm1, xmm0, xmm5, xmm3);
7430   fold_128bit_crc32(xmm1, xmm0, xmm5, xmm4);
7431 
7432   // Fold the rest of 128 bits data chunks
7433   BIND(L_fold_tail);
7434   addl(len, 3);
7435   jccb(Assembler::lessEqual, L_fold_128b);
7436   movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16));
7437 
7438   BIND(L_fold_tail_loop);
7439   fold_128bit_crc32(xmm1, xmm0, xmm5, buf,  0);
7440   addptr(buf, 16);
7441   decrementl(len);
7442   jccb(Assembler::greater, L_fold_tail_loop);
7443 
7444   // Fold 128 bits in xmm1 down into 32 bits in crc register.
7445   BIND(L_fold_128b);
7446   movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr()));

7447   vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
7448   vpand(xmm3, xmm0, xmm2, false /* vector256 */);
7449   vpclmulqdq(xmm0, xmm0, xmm3, 0x1);







7450   psrldq(xmm1, 8);
7451   psrldq(xmm2, 4);
7452   pxor(xmm0, xmm1);
7453   pxor(xmm0, xmm2);
7454 
7455   // 8 8-bit folds to compute 32-bit CRC.
7456   for (int j = 0; j < 4; j++) {
7457     fold_8bit_crc32(xmm0, table, xmm1, rax);
7458   }
7459   movdl(crc, xmm0); // mov 32 bits to general register
7460   for (int j = 0; j < 4; j++) {
7461     fold_8bit_crc32(crc, table, rax);
7462   }
7463 
7464   BIND(L_tail_restore);
7465   movl(len, tmp); // restore
7466   BIND(L_tail);
7467   andl(len, 0xf);
7468   jccb(Assembler::zero, L_exit);
7469 




7299  * @param [in,out]crc   Register containing the crc.
7300  * @param [in]val       Register containing the byte to fold into the CRC.
7301  * @param [in]table     Register containing the table of crc constants.
7302  *
7303  * uint32_t crc;
7304  * val = crc_table[(val ^ crc) & 0xFF];
7305  * crc = val ^ (crc >> 8);
7306  *
7307  */
7308 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
7309   xorl(val, crc);
7310   andl(val, 0xFF);
7311   shrl(crc, 8); // unsigned shift
7312   xorl(crc, Address(table, val, Address::times_4, 0));
7313 }
7314 
7315 /**
7316  * Fold 128-bit data chunk
7317  */
7318 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) {
7319   if (UseAVX > 0) {
7320     vpclmulhdq(xtmp, xK, xcrc); // [123:64]
7321     vpclmulldq(xcrc, xK, xcrc); // [63:0]
7322     vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
7323     pxor(xcrc, xtmp);
7324   } else {
7325     movdqa(xtmp, xcrc);
7326     pclmulhdq(xtmp, xK);   // [123:64]
7327     pclmulldq(xcrc, xK);   // [63:0]
7328     pxor(xcrc, xtmp);
7329     movdqu(xtmp, Address(buf, offset));
7330     pxor(xcrc, xtmp);
7331   }
7332 }
7333 
7334 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) {
7335   if (UseAVX > 0) {
7336     vpclmulhdq(xtmp, xK, xcrc);
7337     vpclmulldq(xcrc, xK, xcrc);
7338     pxor(xcrc, xbuf);
7339     pxor(xcrc, xtmp);
7340   } else {
7341     movdqa(xtmp, xcrc);
7342     pclmulhdq(xtmp, xK);
7343     pclmulldq(xcrc, xK);
7344     pxor(xcrc, xbuf);
7345     pxor(xcrc, xtmp);
7346   }
7347 }
7348 
7349 /**
7350  * 8-bit folds to compute 32-bit CRC
7351  *
7352  * uint64_t xcrc;
7353  * timesXtoThe32[xcrc & 0xFF] ^ (xcrc >> 8);
7354  */
7355 void MacroAssembler::fold_8bit_crc32(XMMRegister xcrc, Register table, XMMRegister xtmp, Register tmp) {
7356   movdl(tmp, xcrc);
7357   andl(tmp, 0xFF);
7358   movdl(xtmp, Address(table, tmp, Address::times_4, 0));
7359   psrldq(xcrc, 1); // unsigned shift one byte
7360   pxor(xcrc, xtmp);
7361 }
7362 
7363 /**
7364  * uint32_t crc;
7365  * timesXtoThe32[crc & 0xFF] ^ (crc >> 8);
7366  */


7444   movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16));
7445   fold_128bit_crc32(xmm1, xmm0, xmm5, xmm2);
7446   fold_128bit_crc32(xmm1, xmm0, xmm5, xmm3);
7447   fold_128bit_crc32(xmm1, xmm0, xmm5, xmm4);
7448 
7449   // Fold the rest of 128 bits data chunks
7450   BIND(L_fold_tail);
7451   addl(len, 3);
7452   jccb(Assembler::lessEqual, L_fold_128b);
7453   movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16));
7454 
7455   BIND(L_fold_tail_loop);
7456   fold_128bit_crc32(xmm1, xmm0, xmm5, buf,  0);
7457   addptr(buf, 16);
7458   decrementl(len);
7459   jccb(Assembler::greater, L_fold_tail_loop);
7460 
7461   // Fold 128 bits in xmm1 down into 32 bits in crc register.
7462   BIND(L_fold_128b);
7463   movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr()));
7464   if (UseAVX > 0) {
7465     vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
7466     vpand(xmm3, xmm0, xmm2, false /* vector256 */);
7467     vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
7468   } else {
7469     movdqa(xmm2, xmm0);
7470     pclmulqdq(xmm2, xmm1, 0x1);
7471     movdqa(xmm3, xmm0);
7472     pand(xmm3, xmm2);
7473     pclmulqdq(xmm0, xmm3, 0x1);
7474   }
7475   psrldq(xmm1, 8);
7476   psrldq(xmm2, 4);
7477   pxor(xmm0, xmm1);
7478   pxor(xmm0, xmm2);
7479 
7480   // 8 8-bit folds to compute 32-bit CRC.
7481   for (int j = 0; j < 4; j++) {
7482     fold_8bit_crc32(xmm0, table, xmm1, rax);
7483   }
7484   movdl(crc, xmm0); // mov 32 bits to general register
7485   for (int j = 0; j < 4; j++) {
7486     fold_8bit_crc32(crc, table, rax);
7487   }
7488 
7489   BIND(L_tail_restore);
7490   movl(len, tmp); // restore
7491   BIND(L_tail);
7492   andl(len, 0xf);
7493   jccb(Assembler::zero, L_exit);
7494 


src/cpu/x86/vm/macroAssembler_x86.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File