7299 * @param [in,out]crc Register containing the crc. 7300 * @param [in]val Register containing the byte to fold into the CRC. 7301 * @param [in]table Register containing the table of crc constants. 7302 * 7303 * uint32_t crc; 7304 * val = crc_table[(val ^ crc) & 0xFF]; 7305 * crc = val ^ (crc >> 8); 7306 * 7307 */ 7308 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { 7309 xorl(val, crc); 7310 andl(val, 0xFF); 7311 shrl(crc, 8); // unsigned shift 7312 xorl(crc, Address(table, val, Address::times_4, 0)); 7313 } 7314 7315 /** 7316 * Fold 128-bit data chunk 7317 */ 7318 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) { 7319 vpclmulhdq(xtmp, xK, xcrc); // [123:64] 7320 vpclmulldq(xcrc, xK, xcrc); // [63:0] 7321 vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */); 7322 pxor(xcrc, xtmp); 7323 } 7324 7325 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) { 7326 vpclmulhdq(xtmp, xK, xcrc); 7327 vpclmulldq(xcrc, xK, xcrc); 7328 pxor(xcrc, xbuf); 7329 pxor(xcrc, xtmp); 7330 } 7331 7332 /** 7333 * 8-bit folds to compute 32-bit CRC 7334 * 7335 * uint64_t xcrc; 7336 * timesXtoThe32[xcrc & 0xFF] ^ (xcrc >> 8); 7337 */ 7338 void MacroAssembler::fold_8bit_crc32(XMMRegister xcrc, Register table, XMMRegister xtmp, Register tmp) { 7339 movdl(tmp, xcrc); 7340 andl(tmp, 0xFF); 7341 movdl(xtmp, Address(table, tmp, Address::times_4, 0)); 7342 psrldq(xcrc, 1); // unsigned shift one byte 7343 pxor(xcrc, xtmp); 7344 } 7345 7346 /** 7347 * uint32_t crc; 7348 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); 7349 */ 7427 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16)); 7428 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm2); 7429 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm3); 7430 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm4); 7431 7432 // Fold the rest of 128 bits data chunks 7433 BIND(L_fold_tail); 7434 addl(len, 3); 7435 jccb(Assembler::lessEqual, L_fold_128b); 7436 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16)); 7437 7438 BIND(L_fold_tail_loop); 7439 fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0); 7440 addptr(buf, 16); 7441 decrementl(len); 7442 jccb(Assembler::greater, L_fold_tail_loop); 7443 7444 // Fold 128 bits in xmm1 down into 32 bits in crc register. 7445 BIND(L_fold_128b); 7446 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr())); 7447 vpclmulqdq(xmm2, xmm0, xmm1, 0x1); 7448 vpand(xmm3, xmm0, xmm2, false /* vector256 */); 7449 vpclmulqdq(xmm0, xmm0, xmm3, 0x1); 7450 psrldq(xmm1, 8); 7451 psrldq(xmm2, 4); 7452 pxor(xmm0, xmm1); 7453 pxor(xmm0, xmm2); 7454 7455 // 8 8-bit folds to compute 32-bit CRC. 7456 for (int j = 0; j < 4; j++) { 7457 fold_8bit_crc32(xmm0, table, xmm1, rax); 7458 } 7459 movdl(crc, xmm0); // mov 32 bits to general register 7460 for (int j = 0; j < 4; j++) { 7461 fold_8bit_crc32(crc, table, rax); 7462 } 7463 7464 BIND(L_tail_restore); 7465 movl(len, tmp); // restore 7466 BIND(L_tail); 7467 andl(len, 0xf); 7468 jccb(Assembler::zero, L_exit); 7469 | 7299 * @param [in,out]crc Register containing the crc. 7300 * @param [in]val Register containing the byte to fold into the CRC. 7301 * @param [in]table Register containing the table of crc constants. 7302 * 7303 * uint32_t crc; 7304 * val = crc_table[(val ^ crc) & 0xFF]; 7305 * crc = val ^ (crc >> 8); 7306 * 7307 */ 7308 void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { 7309 xorl(val, crc); 7310 andl(val, 0xFF); 7311 shrl(crc, 8); // unsigned shift 7312 xorl(crc, Address(table, val, Address::times_4, 0)); 7313 } 7314 7315 /** 7316 * Fold 128-bit data chunk 7317 */ 7318 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) { 7319 if (UseAVX > 0) { 7320 vpclmulhdq(xtmp, xK, xcrc); // [123:64] 7321 vpclmulldq(xcrc, xK, xcrc); // [63:0] 7322 vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */); 7323 pxor(xcrc, xtmp); 7324 } else { 7325 movdqa(xtmp, xcrc); 7326 pclmulhdq(xtmp, xK); // [123:64] 7327 pclmulldq(xcrc, xK); // [63:0] 7328 pxor(xcrc, xtmp); 7329 movdqu(xtmp, Address(buf, offset)); 7330 pxor(xcrc, xtmp); 7331 } 7332 } 7333 7334 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) { 7335 if (UseAVX > 0) { 7336 vpclmulhdq(xtmp, xK, xcrc); 7337 vpclmulldq(xcrc, xK, xcrc); 7338 pxor(xcrc, xbuf); 7339 pxor(xcrc, xtmp); 7340 } else { 7341 movdqa(xtmp, xcrc); 7342 pclmulhdq(xtmp, xK); 7343 pclmulldq(xcrc, xK); 7344 pxor(xcrc, xbuf); 7345 pxor(xcrc, xtmp); 7346 } 7347 } 7348 7349 /** 7350 * 8-bit folds to compute 32-bit CRC 7351 * 7352 * uint64_t xcrc; 7353 * timesXtoThe32[xcrc & 0xFF] ^ (xcrc >> 8); 7354 */ 7355 void MacroAssembler::fold_8bit_crc32(XMMRegister xcrc, Register table, XMMRegister xtmp, Register tmp) { 7356 movdl(tmp, xcrc); 7357 andl(tmp, 0xFF); 7358 movdl(xtmp, Address(table, tmp, Address::times_4, 0)); 7359 psrldq(xcrc, 1); // unsigned shift one byte 7360 pxor(xcrc, xtmp); 7361 } 7362 7363 /** 7364 * uint32_t crc; 7365 * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); 7366 */ 7444 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16)); 7445 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm2); 7446 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm3); 7447 fold_128bit_crc32(xmm1, xmm0, xmm5, xmm4); 7448 7449 // Fold the rest of 128 bits data chunks 7450 BIND(L_fold_tail); 7451 addl(len, 3); 7452 jccb(Assembler::lessEqual, L_fold_128b); 7453 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr() + 16)); 7454 7455 BIND(L_fold_tail_loop); 7456 fold_128bit_crc32(xmm1, xmm0, xmm5, buf, 0); 7457 addptr(buf, 16); 7458 decrementl(len); 7459 jccb(Assembler::greater, L_fold_tail_loop); 7460 7461 // Fold 128 bits in xmm1 down into 32 bits in crc register. 7462 BIND(L_fold_128b); 7463 movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr())); 7464 if (UseAVX > 0) { 7465 vpclmulqdq(xmm2, xmm0, xmm1, 0x1); 7466 vpand(xmm3, xmm0, xmm2, false /* vector256 */); 7467 vpclmulqdq(xmm0, xmm0, xmm3, 0x1); 7468 } else { 7469 movdqa(xmm2, xmm0); 7470 pclmulqdq(xmm2, xmm1, 0x1); 7471 movdqa(xmm3, xmm0); 7472 pand(xmm3, xmm2); 7473 pclmulqdq(xmm0, xmm3, 0x1); 7474 } 7475 psrldq(xmm1, 8); 7476 psrldq(xmm2, 4); 7477 pxor(xmm0, xmm1); 7478 pxor(xmm0, xmm2); 7479 7480 // 8 8-bit folds to compute 32-bit CRC. 7481 for (int j = 0; j < 4; j++) { 7482 fold_8bit_crc32(xmm0, table, xmm1, rax); 7483 } 7484 movdl(crc, xmm0); // mov 32 bits to general register 7485 for (int j = 0; j < 4; j++) { 7486 fold_8bit_crc32(crc, table, rax); 7487 } 7488 7489 BIND(L_tail_restore); 7490 movl(len, tmp); // restore 7491 BIND(L_tail); 7492 andl(len, 0xf); 7493 jccb(Assembler::zero, L_exit); 7494 |