4212 __ pxor(xmm_temp2, xmm_temp4); // xor the shifted versions 4213 __ pxor(xmm_temp2, xmm_temp5); 4214 __ pxor(xmm_temp2, xmm_temp8); 4215 __ pxor(xmm_temp3, xmm_temp2); 4216 __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 4217 4218 __ decrement(blocks); 4219 __ jcc(Assembler::zero, L_exit); 4220 __ movdqu(xmm_temp0, xmm_temp6); 4221 __ addptr(data, 16); 4222 __ jmp(L_ghash_loop); 4223 4224 __ BIND(L_exit); 4225 __ pshufb(xmm_temp6, xmm_temp10); // Byte swap 16-byte result 4226 __ movdqu(Address(state, 0), xmm_temp6); // store the result 4227 __ leave(); 4228 __ ret(0); 4229 return start; 4230 } 4231 4232 /** 4233 * Arguments: 4234 * 4235 * Inputs: 4236 * c_rarg0 - int crc 4237 * c_rarg1 - byte* buf 4238 * c_rarg2 - int length 4239 * 4240 * Ouput: 4241 * rax - int crc result 4242 */ 4243 address generate_updateBytesCRC32() { 4244 assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions"); 4245 4246 __ align(CodeEntryAlignment); 4247 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); 4248 4249 address start = __ pc(); 4250 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) 4251 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) 5087 memcpy(dst + 32 * ii, src + 16 * ii, 16); 5088 memcpy(dst + 32 * ii + 16, src + 16 * ii, 16); 5089 } 5090 StubRoutines::x86::_k256_W_adr = (address)StubRoutines::x86::_k256_W; 5091 StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask(); 5092 StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress"); 5093 StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB"); 5094 } 5095 if (UseSHA512Intrinsics) { 5096 StubRoutines::x86::_k512_W_addr = (address)StubRoutines::x86::_k512_W; 5097 StubRoutines::x86::_pshuffle_byte_flip_mask_addr_sha512 = generate_pshuffle_byte_flip_mask_sha512(); 5098 StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress"); 5099 StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB"); 5100 } 5101 5102 // Generate GHASH intrinsics code 5103 if (UseGHASHIntrinsics) { 5104 StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); 5105 StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); 5106 StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); 5107 } 5108 5109 // Safefetch stubs. 5110 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, 5111 &StubRoutines::_safefetch32_fault_pc, 5112 &StubRoutines::_safefetch32_continuation_pc); 5113 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, 5114 &StubRoutines::_safefetchN_fault_pc, 5115 &StubRoutines::_safefetchN_continuation_pc); 5116 #ifdef COMPILER2 5117 if (UseMultiplyToLenIntrinsic) { 5118 StubRoutines::_multiplyToLen = generate_multiplyToLen(); 5119 } 5120 if (UseSquareToLenIntrinsic) { 5121 StubRoutines::_squareToLen = generate_squareToLen(); 5122 } 5123 if (UseMulAddIntrinsic) { 5124 StubRoutines::_mulAdd = generate_mulAdd(); 5125 } 5126 #ifndef _WINDOWS | 4212 __ pxor(xmm_temp2, xmm_temp4); // xor the shifted versions 4213 __ pxor(xmm_temp2, xmm_temp5); 4214 __ pxor(xmm_temp2, xmm_temp8); 4215 __ pxor(xmm_temp3, xmm_temp2); 4216 __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 4217 4218 __ decrement(blocks); 4219 __ jcc(Assembler::zero, L_exit); 4220 __ movdqu(xmm_temp0, xmm_temp6); 4221 __ addptr(data, 16); 4222 __ jmp(L_ghash_loop); 4223 4224 __ BIND(L_exit); 4225 __ pshufb(xmm_temp6, xmm_temp10); // Byte swap 16-byte result 4226 __ movdqu(Address(state, 0), xmm_temp6); // store the result 4227 __ leave(); 4228 __ ret(0); 4229 return start; 4230 } 4231 4232 //base64 character set 4233 address base64_charset_addr() { 4234 __ align(CodeEntryAlignment); 4235 StubCodeMark mark(this, "StubRoutines", "base64_charset"); 4236 address start = __ pc(); 4237 __ emit_data64(0x0000004200000041, relocInfo::none); 4238 __ emit_data64(0x0000004400000043, relocInfo::none); 4239 __ emit_data64(0x0000004600000045, relocInfo::none); 4240 __ emit_data64(0x0000004800000047, relocInfo::none); 4241 __ emit_data64(0x0000004a00000049, relocInfo::none); 4242 __ emit_data64(0x0000004c0000004b, relocInfo::none); 4243 __ emit_data64(0x0000004e0000004d, relocInfo::none); 4244 __ emit_data64(0x000000500000004f, relocInfo::none); 4245 __ emit_data64(0x0000005200000051, relocInfo::none); 4246 __ emit_data64(0x0000005400000053, relocInfo::none); 4247 __ emit_data64(0x0000005600000055, relocInfo::none); 4248 __ emit_data64(0x0000005800000057, relocInfo::none); 4249 __ emit_data64(0x0000005a00000059, relocInfo::none); 4250 __ emit_data64(0x0000006200000061, relocInfo::none); 4251 __ emit_data64(0x0000006400000063, relocInfo::none); 4252 __ emit_data64(0x0000006600000065, relocInfo::none); 4253 __ emit_data64(0x0000006800000067, relocInfo::none); 4254 __ emit_data64(0x0000006a00000069, relocInfo::none); 4255 __ emit_data64(0x0000006c0000006b, relocInfo::none); 4256 __ emit_data64(0x0000006e0000006d, relocInfo::none); 4257 __ emit_data64(0x000000700000006f, relocInfo::none); 4258 __ emit_data64(0x0000007200000071, relocInfo::none); 4259 __ emit_data64(0x0000007400000073, relocInfo::none); 4260 __ emit_data64(0x0000007600000075, relocInfo::none); 4261 __ emit_data64(0x0000007800000077, relocInfo::none); 4262 __ emit_data64(0x0000007a00000079, relocInfo::none); 4263 __ emit_data64(0x0000003100000030, relocInfo::none); 4264 __ emit_data64(0x0000003300000032, relocInfo::none); 4265 __ emit_data64(0x0000003500000034, relocInfo::none); 4266 __ emit_data64(0x0000003700000036, relocInfo::none); 4267 __ emit_data64(0x0000003900000038, relocInfo::none); 4268 __ emit_data64(0x0000002f0000002b, relocInfo::none); 4269 return start; 4270 } 4271 4272 //base64 url character set 4273 address base64url_charset_addr() { 4274 __ align(CodeEntryAlignment); 4275 StubCodeMark mark(this, "StubRoutines", "base64url_charset"); 4276 address start = __ pc(); 4277 __ emit_data64(0x0000004200000041, relocInfo::none); 4278 __ emit_data64(0x0000004400000043, relocInfo::none); 4279 __ emit_data64(0x0000004600000045, relocInfo::none); 4280 __ emit_data64(0x0000004800000047, relocInfo::none); 4281 __ emit_data64(0x0000004a00000049, relocInfo::none); 4282 __ emit_data64(0x0000004c0000004b, relocInfo::none); 4283 __ emit_data64(0x0000004e0000004d, relocInfo::none); 4284 __ emit_data64(0x000000500000004f, relocInfo::none); 4285 __ emit_data64(0x0000005200000051, relocInfo::none); 4286 __ emit_data64(0x0000005400000053, relocInfo::none); 4287 __ emit_data64(0x0000005600000055, relocInfo::none); 4288 __ emit_data64(0x0000005800000057, relocInfo::none); 4289 __ emit_data64(0x0000005a00000059, relocInfo::none); 4290 __ emit_data64(0x0000006200000061, relocInfo::none); 4291 __ emit_data64(0x0000006400000063, relocInfo::none); 4292 __ emit_data64(0x0000006600000065, relocInfo::none); 4293 __ emit_data64(0x0000006800000067, relocInfo::none); 4294 __ emit_data64(0x0000006a00000069, relocInfo::none); 4295 __ emit_data64(0x0000006c0000006b, relocInfo::none); 4296 __ emit_data64(0x0000006e0000006d, relocInfo::none); 4297 __ emit_data64(0x000000700000006f, relocInfo::none); 4298 __ emit_data64(0x0000007200000071, relocInfo::none); 4299 __ emit_data64(0x0000007400000073, relocInfo::none); 4300 __ emit_data64(0x0000007600000075, relocInfo::none); 4301 __ emit_data64(0x0000007800000077, relocInfo::none); 4302 __ emit_data64(0x0000007a00000079, relocInfo::none); 4303 __ emit_data64(0x0000003100000030, relocInfo::none); 4304 __ emit_data64(0x0000003300000032, relocInfo::none); 4305 __ emit_data64(0x0000003500000034, relocInfo::none); 4306 __ emit_data64(0x0000003700000036, relocInfo::none); 4307 __ emit_data64(0x0000003900000038, relocInfo::none); 4308 __ emit_data64(0x0000005f0000002d, relocInfo::none); 4309 4310 return start; 4311 } 4312 4313 address base64_bswap_mask_addr() { 4314 __ align(CodeEntryAlignment); 4315 StubCodeMark mark(this, "StubRoutines", "bswap_mask_base64"); 4316 address start = __ pc(); 4317 __ emit_data64(0x0504038002010080, relocInfo::none); 4318 __ emit_data64(0x0b0a098008070680, relocInfo::none); 4319 __ emit_data64(0x0908078006050480, relocInfo::none); 4320 __ emit_data64(0x0f0e0d800c0b0a80, relocInfo::none); 4321 __ emit_data64(0x0605048003020180, relocInfo::none); 4322 __ emit_data64(0x0c0b0a8009080780, relocInfo::none); 4323 __ emit_data64(0x0504038002010080, relocInfo::none); 4324 __ emit_data64(0x0b0a098008070680, relocInfo::none); 4325 4326 return start; 4327 } 4328 4329 address base64_right_shift_mask_addr() { 4330 __ align(CodeEntryAlignment); 4331 StubCodeMark mark(this, "StubRoutines", "right_shift_mask"); 4332 address start = __ pc(); 4333 __ emit_data64(0x0006000400020000, relocInfo::none); 4334 __ emit_data64(0x0006000400020000, relocInfo::none); 4335 __ emit_data64(0x0006000400020000, relocInfo::none); 4336 __ emit_data64(0x0006000400020000, relocInfo::none); 4337 __ emit_data64(0x0006000400020000, relocInfo::none); 4338 __ emit_data64(0x0006000400020000, relocInfo::none); 4339 __ emit_data64(0x0006000400020000, relocInfo::none); 4340 __ emit_data64(0x0006000400020000, relocInfo::none); 4341 4342 return start; 4343 } 4344 4345 address base64_left_shift_mask_addr() { 4346 __ align(CodeEntryAlignment); 4347 StubCodeMark mark(this, "StubRoutines", "left_shift_mask"); 4348 address start = __ pc(); 4349 __ emit_data64(0x0000000200040000, relocInfo::none); 4350 __ emit_data64(0x0000000200040000, relocInfo::none); 4351 __ emit_data64(0x0000000200040000, relocInfo::none); 4352 __ emit_data64(0x0000000200040000, relocInfo::none); 4353 __ emit_data64(0x0000000200040000, relocInfo::none); 4354 __ emit_data64(0x0000000200040000, relocInfo::none); 4355 __ emit_data64(0x0000000200040000, relocInfo::none); 4356 __ emit_data64(0x0000000200040000, relocInfo::none); 4357 4358 return start; 4359 } 4360 4361 address base64_and_mask_addr() { 4362 __ align(CodeEntryAlignment); 4363 StubCodeMark mark(this, "StubRoutines", "and_mask"); 4364 address start = __ pc(); 4365 __ emit_data64(0x3f003f003f000000, relocInfo::none); 4366 __ emit_data64(0x3f003f003f000000, relocInfo::none); 4367 __ emit_data64(0x3f003f003f000000, relocInfo::none); 4368 __ emit_data64(0x3f003f003f000000, relocInfo::none); 4369 __ emit_data64(0x3f003f003f000000, relocInfo::none); 4370 __ emit_data64(0x3f003f003f000000, relocInfo::none); 4371 __ emit_data64(0x3f003f003f000000, relocInfo::none); 4372 __ emit_data64(0x3f003f003f000000, relocInfo::none); 4373 return start; 4374 } 4375 4376 address base64_gather_mask_addr() { 4377 __ align(CodeEntryAlignment); 4378 StubCodeMark mark(this, "StubRoutines", "gather_mask"); 4379 address start = __ pc(); 4380 __ emit_data64(0xffffffffffffffff, relocInfo::none); 4381 return start; 4382 } 4383 4384 // Code for generating Base64 encoding. 4385 // Intrinsic function prototype in Base64.java: 4386 // private void encodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL) { 4387 address generate_base64_encodeBlock() { 4388 __ align(CodeEntryAlignment); 4389 StubCodeMark mark(this, "StubRoutines", "implEncode"); 4390 address start = __ pc(); 4391 __ enter(); 4392 4393 // Save callee-saved registers before using them 4394 __ push(r12); 4395 __ push(r13); 4396 __ push(r14); 4397 __ push(r15); 4398 __ push(rbx); 4399 4400 // arguments 4401 const Register source = c_rarg0; // Source Array 4402 const Register start_offset = c_rarg1; // start offset 4403 const Register end_offset = c_rarg2; // end offset 4404 const Register dest = c_rarg3; // destination array 4405 4406 #ifndef _WIN64 4407 const Register dp = c_rarg4; // Position for writing to dest array 4408 const Register isURL = c_rarg5;// Base64 or URL character set 4409 #else 4410 const Address dp_mem(rbp, 6 * wordSize); // length is on stack on Win64 4411 const Address isURL_mem(rbp, 7 * wordSize); 4412 const Register isURL = r10; // pick the volatile windows register 4413 const Register dp = r12; 4414 __ movl(dp, dp_mem); 4415 __ movl(isURL, isURL_mem); 4416 #endif 4417 4418 const Register length = r14; 4419 Label L_process80, L_process32, L_process3, L_exit, L_processdata; 4420 4421 // calculate length from offsets 4422 __ movl(length, end_offset); 4423 __ subl(length, start_offset); 4424 __ cmpl(length, 0); 4425 __ jcc(Assembler::lessEqual, L_exit); 4426 4427 // Save k1 value in rbx 4428 __ kmovql(rbx, k1); 4429 __ lea(r11, ExternalAddress(StubRoutines::x86::base64_charset_addr())); 4430 // check if base64 charset(isURL=0) or base64 url charset(isURL=1) needs to be loaded 4431 __ cmpl(isURL, 0); 4432 __ jcc(Assembler::equal, L_processdata); 4433 __ lea(r11, ExternalAddress(StubRoutines::x86::base64url_charset_addr())); 4434 4435 // load masks required for encoding data 4436 __ BIND(L_processdata); 4437 __ movdqu(xmm16, ExternalAddress(StubRoutines::x86::base64_gather_mask_addr())); 4438 // Set 64 bits of K register. 4439 __ evpcmpeqb(k1, xmm16, xmm16, Assembler::AVX_512bit); 4440 __ evmovdquq(xmm12, ExternalAddress(StubRoutines::x86::base64_bswap_mask_addr()), Assembler::AVX_256bit, r13); 4441 __ evmovdquq(xmm13, ExternalAddress(StubRoutines::x86::base64_right_shift_mask_addr()), Assembler::AVX_512bit, r13); 4442 __ evmovdquq(xmm14, ExternalAddress(StubRoutines::x86::base64_left_shift_mask_addr()), Assembler::AVX_512bit, r13); 4443 __ evmovdquq(xmm15, ExternalAddress(StubRoutines::x86::base64_and_mask_addr()), Assembler::AVX_512bit, r13); 4444 4445 // Vector Base64 implementation, producing 96 bytes of encoded data 4446 __ BIND(L_process80); 4447 __ cmpl(length, 80); 4448 __ jcc(Assembler::below, L_process32); 4449 __ evmovdquq(xmm0, Address(source, start_offset, Address::times_1, 0), Assembler::AVX_256bit); 4450 __ evmovdquq(xmm1, Address(source, start_offset, Address::times_1, 24), Assembler::AVX_256bit); 4451 __ evmovdquq(xmm2, Address(source, start_offset, Address::times_1, 48), Assembler::AVX_256bit); 4452 4453 //permute the input data in such a manner that we have continuity of the source 4454 __ vpermq(xmm3, xmm0, 148, Assembler::AVX_256bit); 4455 __ vpermq(xmm4, xmm1, 148, Assembler::AVX_256bit); 4456 __ vpermq(xmm5, xmm2, 148, Assembler::AVX_256bit); 4457 4458 //shuffle input and group 3 bytes of data and to it add 0 as the 4th byte. 4459 //we can deal with 12 bytes at a time in a 128 bit register 4460 __ vpshufb(xmm3, xmm3, xmm12, Assembler::AVX_256bit); 4461 __ vpshufb(xmm4, xmm4, xmm12, Assembler::AVX_256bit); 4462 __ vpshufb(xmm5, xmm5, xmm12, Assembler::AVX_256bit); 4463 4464 //convert byte to word. Each 128 bit register will have 6 bytes for processing 4465 __ vpmovzxbw(xmm3, xmm3, Assembler::AVX_512bit); 4466 __ vpmovzxbw(xmm4, xmm4, Assembler::AVX_512bit); 4467 __ vpmovzxbw(xmm5, xmm5, Assembler::AVX_512bit); 4468 4469 // Extract bits in the following pattern 6, 4+2, 2+4, 6 to convert 3, 8 bit numbers to 4, 6 bit numbers 4470 __ evpsrlvw(xmm0, xmm3, xmm13, Assembler::AVX_512bit); 4471 __ evpsrlvw(xmm1, xmm4, xmm13, Assembler::AVX_512bit); 4472 __ evpsrlvw(xmm2, xmm5, xmm13, Assembler::AVX_512bit); 4473 4474 __ evpsllvw(xmm3, xmm3, xmm14, Assembler::AVX_512bit); 4475 __ evpsllvw(xmm4, xmm4, xmm14, Assembler::AVX_512bit); 4476 __ evpsllvw(xmm5, xmm5, xmm14, Assembler::AVX_512bit); 4477 4478 __ vpsrlq(xmm0, xmm0, 8, Assembler::AVX_512bit); 4479 __ vpsrlq(xmm1, xmm1, 8, Assembler::AVX_512bit); 4480 __ vpsrlq(xmm2, xmm2, 8, Assembler::AVX_512bit); 4481 4482 __ vpsllq(xmm3, xmm3, 8, Assembler::AVX_512bit); 4483 __ vpsllq(xmm4, xmm4, 8, Assembler::AVX_512bit); 4484 __ vpsllq(xmm5, xmm5, 8, Assembler::AVX_512bit); 4485 4486 __ vpandq(xmm3, xmm3, xmm15, Assembler::AVX_512bit); 4487 __ vpandq(xmm4, xmm4, xmm15, Assembler::AVX_512bit); 4488 __ vpandq(xmm5, xmm5, xmm15, Assembler::AVX_512bit); 4489 4490 // Get the final 4*6 bits base64 encoding 4491 __ vporq(xmm3, xmm3, xmm0, Assembler::AVX_512bit); 4492 __ vporq(xmm4, xmm4, xmm1, Assembler::AVX_512bit); 4493 __ vporq(xmm5, xmm5, xmm2, Assembler::AVX_512bit); 4494 4495 // Shift 4496 __ vpsrlq(xmm3, xmm3, 8, Assembler::AVX_512bit); 4497 __ vpsrlq(xmm4, xmm4, 8, Assembler::AVX_512bit); 4498 __ vpsrlq(xmm5, xmm5, 8, Assembler::AVX_512bit); 4499 4500 // look up 6 bits in the base64 character set to fetch the encoding 4501 // we are converting word to dword as gather instructions need dword indices for looking up encoding 4502 __ vextracti64x4(xmm6, xmm3, 0); 4503 __ vpmovzxwd(xmm0, xmm6, Assembler::AVX_512bit); 4504 __ vextracti64x4(xmm6, xmm3, 1); 4505 __ vpmovzxwd(xmm1, xmm6, Assembler::AVX_512bit); 4506 4507 __ vextracti64x4(xmm6, xmm4, 0); 4508 __ vpmovzxwd(xmm2, xmm6, Assembler::AVX_512bit); 4509 __ vextracti64x4(xmm6, xmm4, 1); 4510 __ vpmovzxwd(xmm3, xmm6, Assembler::AVX_512bit); 4511 4512 __ vextracti64x4(xmm4, xmm5, 0); 4513 __ vpmovzxwd(xmm6, xmm4, Assembler::AVX_512bit); 4514 4515 __ vextracti64x4(xmm4, xmm5, 1); 4516 __ vpmovzxwd(xmm7, xmm4, Assembler::AVX_512bit); 4517 4518 __ kmovql(k2, k1); 4519 __ evpgatherdd(xmm4, k2, Address(r11, xmm0, Address::times_4, 0), Assembler::AVX_512bit); 4520 __ kmovql(k2, k1); 4521 __ evpgatherdd(xmm5, k2, Address(r11, xmm1, Address::times_4, 0), Assembler::AVX_512bit); 4522 __ kmovql(k2, k1); 4523 __ evpgatherdd(xmm8, k2, Address(r11, xmm2, Address::times_4, 0), Assembler::AVX_512bit); 4524 __ kmovql(k2, k1); 4525 __ evpgatherdd(xmm9, k2, Address(r11, xmm3, Address::times_4, 0), Assembler::AVX_512bit); 4526 __ kmovql(k2, k1); 4527 __ evpgatherdd(xmm10, k2, Address(r11, xmm6, Address::times_4, 0), Assembler::AVX_512bit); 4528 __ kmovql(k2, k1); 4529 __ evpgatherdd(xmm11, k2, Address(r11, xmm7, Address::times_4, 0), Assembler::AVX_512bit); 4530 4531 //Down convert dword to byte. Final output is 16*6 = 96 bytes long 4532 __ evpmovdb(Address(dest, dp, Address::times_1, 0), xmm4, Assembler::AVX_512bit); 4533 __ evpmovdb(Address(dest, dp, Address::times_1, 16), xmm5, Assembler::AVX_512bit); 4534 __ evpmovdb(Address(dest, dp, Address::times_1, 32), xmm8, Assembler::AVX_512bit); 4535 __ evpmovdb(Address(dest, dp, Address::times_1, 48), xmm9, Assembler::AVX_512bit); 4536 __ evpmovdb(Address(dest, dp, Address::times_1, 64), xmm10, Assembler::AVX_512bit); 4537 __ evpmovdb(Address(dest, dp, Address::times_1, 80), xmm11, Assembler::AVX_512bit); 4538 4539 __ addq(dest, 96); 4540 __ addq(source, 72); 4541 __ subq(length, 72); 4542 __ jmp(L_process80); 4543 4544 // Vector Base64 implementation generating 32 bytes of encoded data 4545 __ BIND(L_process32); 4546 __ cmpl(length, 32); 4547 __ jcc(Assembler::below, L_process3); 4548 __ evmovdquq(xmm0, Address(source, start_offset), Assembler::AVX_256bit); 4549 __ vpermq(xmm0, xmm0, 148, Assembler::AVX_256bit); 4550 __ vpshufb(xmm6, xmm0, xmm12, Assembler::AVX_256bit); 4551 __ vpmovzxbw(xmm6, xmm6, Assembler::AVX_512bit); 4552 __ evpsrlvw(xmm2, xmm6, xmm13, Assembler::AVX_512bit); 4553 __ evpsllvw(xmm3, xmm6, xmm14, Assembler::AVX_512bit); 4554 4555 __ vpsrlq(xmm2, xmm2, 8, Assembler::AVX_512bit); 4556 __ vpsllq(xmm3, xmm3, 8, Assembler::AVX_512bit); 4557 __ vpandq(xmm3, xmm3, xmm15, Assembler::AVX_512bit); 4558 __ vporq(xmm1, xmm2, xmm3, Assembler::AVX_512bit); 4559 __ vpsrlq(xmm1, xmm1, 8, Assembler::AVX_512bit); 4560 __ vextracti64x4(xmm9, xmm1, 0); 4561 __ vpmovzxwd(xmm6, xmm9, Assembler::AVX_512bit); 4562 __ vextracti64x4(xmm9, xmm1, 1); 4563 __ vpmovzxwd(xmm5, xmm9, Assembler::AVX_512bit); 4564 __ kmovql(k2, k1); 4565 __ evpgatherdd(xmm8, k2, Address(r11, xmm6, Address::times_4, 0), Assembler::AVX_512bit); 4566 __ kmovql(k2, k1); 4567 __ evpgatherdd(xmm10, k2, Address(r11, xmm5, Address::times_4, 0), Assembler::AVX_512bit); 4568 __ evpmovdb(Address(dest, dp, Address::times_1, 0), xmm8, Assembler::AVX_512bit); 4569 __ evpmovdb(Address(dest, dp, Address::times_1, 16), xmm10, Assembler::AVX_512bit); 4570 __ subq(length, 24); 4571 __ addq(dest, 32); 4572 __ addq(source, 24); 4573 __ jmp(L_process32); 4574 4575 // Scalar data processing takes 3 bytes at a time and produces 4 bytes of encoded data 4576 /* This code corresponds to the scalar version of the following snippet in Base64.java 4577 ** int bits = (src[sp0++] & 0xff) << 16 |(src[sp0++] & 0xff) << 8 |(src[sp0++] & 0xff); 4578 ** dst[dp0++] = (byte)base64[(bits >> > 18) & 0x3f]; 4579 ** dst[dp0++] = (byte)base64[(bits >> > 12) & 0x3f]; 4580 ** dst[dp0++] = (byte)base64[(bits >> > 6) & 0x3f]; 4581 ** dst[dp0++] = (byte)base64[bits & 0x3f];*/ 4582 __ BIND(L_process3); 4583 __ cmpl(length, 3); 4584 __ jcc(Assembler::below, L_exit); 4585 // Read 1 byte at a time 4586 __ movzbl(rax, Address(source, start_offset)); 4587 __ shll(rax, 0x10); 4588 __ movl(r15, rax); 4589 __ movzbl(rax, Address(source, start_offset, Address::times_1, 1)); 4590 __ shll(rax, 0x8); 4591 __ movzwl(rax, rax); 4592 __ orl(r15, rax); 4593 __ movzbl(rax, Address(source, start_offset, Address::times_1, 2)); 4594 __ orl(rax, r15); 4595 // Save 3 bytes read in r15 4596 __ movl(r15, rax); 4597 __ shrl(rax, 0x12); 4598 __ andl(rax, 0x3f); 4599 // rax contains the index, r11 contains base64 lookup table 4600 __ movb(rax, Address(r11, rax, Address::times_4)); 4601 // Write the encoded byte to destination 4602 __ movb(Address(dest, dp, Address::times_1, 0), rax); 4603 __ movl(rax, r15); 4604 __ shrl(rax, 0xc); 4605 __ andl(rax, 0x3f); 4606 __ movb(rax, Address(r11, rax, Address::times_4)); 4607 __ movb(Address(dest, dp, Address::times_1, 1), rax); 4608 __ movl(rax, r15); 4609 __ shrl(rax, 0x6); 4610 __ andl(rax, 0x3f); 4611 __ movb(rax, Address(r11, rax, Address::times_4)); 4612 __ movb(Address(dest, dp, Address::times_1, 2), rax); 4613 __ movl(rax, r15); 4614 __ andl(rax, 0x3f); 4615 __ movb(rax, Address(r11, rax, Address::times_4)); 4616 __ movb(Address(dest, dp, Address::times_1, 3), rax); 4617 __ subl(length, 3); 4618 __ addq(dest, 4); 4619 __ addq(source, 3); 4620 __ jmp(L_process3); 4621 __ BIND(L_exit); 4622 // restore k1 register value 4623 __ kmovql(k1, rbx); 4624 __ pop(rbx); 4625 __ pop(r15); 4626 __ pop(r14); 4627 __ pop(r13); 4628 __ pop(r12); 4629 __ leave(); 4630 __ ret(0); 4631 return start; 4632 } 4633 4634 /** 4635 * Arguments: 4636 * 4637 * Inputs: 4638 * c_rarg0 - int crc 4639 * c_rarg1 - byte* buf 4640 * c_rarg2 - int length 4641 * 4642 * Ouput: 4643 * rax - int crc result 4644 */ 4645 address generate_updateBytesCRC32() { 4646 assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions"); 4647 4648 __ align(CodeEntryAlignment); 4649 StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); 4650 4651 address start = __ pc(); 4652 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) 4653 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...) 5489 memcpy(dst + 32 * ii, src + 16 * ii, 16); 5490 memcpy(dst + 32 * ii + 16, src + 16 * ii, 16); 5491 } 5492 StubRoutines::x86::_k256_W_adr = (address)StubRoutines::x86::_k256_W; 5493 StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask(); 5494 StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress"); 5495 StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB"); 5496 } 5497 if (UseSHA512Intrinsics) { 5498 StubRoutines::x86::_k512_W_addr = (address)StubRoutines::x86::_k512_W; 5499 StubRoutines::x86::_pshuffle_byte_flip_mask_addr_sha512 = generate_pshuffle_byte_flip_mask_sha512(); 5500 StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress"); 5501 StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB"); 5502 } 5503 5504 // Generate GHASH intrinsics code 5505 if (UseGHASHIntrinsics) { 5506 StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); 5507 StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask(); 5508 StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); 5509 } 5510 5511 if (UseBASE64Intrinsics) { 5512 StubRoutines::x86::_and_mask = base64_and_mask_addr(); 5513 StubRoutines::x86::_bswap_mask = base64_bswap_mask_addr(); 5514 StubRoutines::x86::_base64_charset = base64_charset_addr(); 5515 StubRoutines::x86::_url_charset = base64url_charset_addr(); 5516 StubRoutines::x86::_gather_mask = base64_gather_mask_addr(); 5517 StubRoutines::x86::_left_shift_mask = base64_left_shift_mask_addr(); 5518 StubRoutines::x86::_right_shift_mask = base64_right_shift_mask_addr(); 5519 StubRoutines::_base64_encodeBlock = generate_base64_encodeBlock(); 5520 } 5521 5522 // Safefetch stubs. 5523 generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, 5524 &StubRoutines::_safefetch32_fault_pc, 5525 &StubRoutines::_safefetch32_continuation_pc); 5526 generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, 5527 &StubRoutines::_safefetchN_fault_pc, 5528 &StubRoutines::_safefetchN_continuation_pc); 5529 #ifdef COMPILER2 5530 if (UseMultiplyToLenIntrinsic) { 5531 StubRoutines::_multiplyToLen = generate_multiplyToLen(); 5532 } 5533 if (UseSquareToLenIntrinsic) { 5534 StubRoutines::_squareToLen = generate_squareToLen(); 5535 } 5536 if (UseMulAddIntrinsic) { 5537 StubRoutines::_mulAdd = generate_mulAdd(); 5538 } 5539 #ifndef _WINDOWS |