< prev index next >

src/hotspot/cpu/x86/stubGenerator_x86_64.cpp

Print this page




4212     __ pxor(xmm_temp2, xmm_temp4);      // xor the shifted versions
4213     __ pxor(xmm_temp2, xmm_temp5);
4214     __ pxor(xmm_temp2, xmm_temp8);
4215     __ pxor(xmm_temp3, xmm_temp2);
4216     __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
4217 
4218     __ decrement(blocks);
4219     __ jcc(Assembler::zero, L_exit);
4220     __ movdqu(xmm_temp0, xmm_temp6);
4221     __ addptr(data, 16);
4222     __ jmp(L_ghash_loop);
4223 
4224     __ BIND(L_exit);
4225     __ pshufb(xmm_temp6, xmm_temp10);          // Byte swap 16-byte result
4226     __ movdqu(Address(state, 0), xmm_temp6);   // store the result
4227     __ leave();
4228     __ ret(0);
4229     return start;
4230   }
4231 


















































































































































































































































































































































































































4232   /**
4233    *  Arguments:
4234    *
4235    * Inputs:
4236    *   c_rarg0   - int crc
4237    *   c_rarg1   - byte* buf
4238    *   c_rarg2   - int length
4239    *
4240    * Ouput:
4241    *       rax   - int crc result
4242    */
4243   address generate_updateBytesCRC32() {
4244     assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
4245 
4246     __ align(CodeEntryAlignment);
4247     StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
4248 
4249     address start = __ pc();
4250     // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
4251     // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)


5087         memcpy(dst + 32 * ii,      src + 16 * ii, 16);
5088         memcpy(dst + 32 * ii + 16, src + 16 * ii, 16);
5089       }
5090       StubRoutines::x86::_k256_W_adr = (address)StubRoutines::x86::_k256_W;
5091       StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
5092       StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
5093       StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
5094     }
5095     if (UseSHA512Intrinsics) {
5096       StubRoutines::x86::_k512_W_addr = (address)StubRoutines::x86::_k512_W;
5097       StubRoutines::x86::_pshuffle_byte_flip_mask_addr_sha512 = generate_pshuffle_byte_flip_mask_sha512();
5098       StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
5099       StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
5100     }
5101 
5102     // Generate GHASH intrinsics code
5103     if (UseGHASHIntrinsics) {
5104       StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
5105       StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
5106       StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();











5107     }
5108 
5109     // Safefetch stubs.
5110     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
5111                                                        &StubRoutines::_safefetch32_fault_pc,
5112                                                        &StubRoutines::_safefetch32_continuation_pc);
5113     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
5114                                                        &StubRoutines::_safefetchN_fault_pc,
5115                                                        &StubRoutines::_safefetchN_continuation_pc);
5116 #ifdef COMPILER2
5117     if (UseMultiplyToLenIntrinsic) {
5118       StubRoutines::_multiplyToLen = generate_multiplyToLen();
5119     }
5120     if (UseSquareToLenIntrinsic) {
5121       StubRoutines::_squareToLen = generate_squareToLen();
5122     }
5123     if (UseMulAddIntrinsic) {
5124       StubRoutines::_mulAdd = generate_mulAdd();
5125     }
5126 #ifndef _WINDOWS




4212     __ pxor(xmm_temp2, xmm_temp4);      // xor the shifted versions
4213     __ pxor(xmm_temp2, xmm_temp5);
4214     __ pxor(xmm_temp2, xmm_temp8);
4215     __ pxor(xmm_temp3, xmm_temp2);
4216     __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
4217 
4218     __ decrement(blocks);
4219     __ jcc(Assembler::zero, L_exit);
4220     __ movdqu(xmm_temp0, xmm_temp6);
4221     __ addptr(data, 16);
4222     __ jmp(L_ghash_loop);
4223 
4224     __ BIND(L_exit);
4225     __ pshufb(xmm_temp6, xmm_temp10);          // Byte swap 16-byte result
4226     __ movdqu(Address(state, 0), xmm_temp6);   // store the result
4227     __ leave();
4228     __ ret(0);
4229     return start;
4230   }
4231 
4232   //base64 character set
4233   address base64_charset_addr() {
4234     __ align(CodeEntryAlignment);
4235     StubCodeMark mark(this, "StubRoutines", "base64_charset");
4236     address start = __ pc();
4237     __ emit_data64(0x0000004200000041, relocInfo::none);
4238     __ emit_data64(0x0000004400000043, relocInfo::none);
4239     __ emit_data64(0x0000004600000045, relocInfo::none);
4240     __ emit_data64(0x0000004800000047, relocInfo::none);
4241     __ emit_data64(0x0000004a00000049, relocInfo::none);
4242     __ emit_data64(0x0000004c0000004b, relocInfo::none);
4243     __ emit_data64(0x0000004e0000004d, relocInfo::none);
4244     __ emit_data64(0x000000500000004f, relocInfo::none);
4245     __ emit_data64(0x0000005200000051, relocInfo::none);
4246     __ emit_data64(0x0000005400000053, relocInfo::none);
4247     __ emit_data64(0x0000005600000055, relocInfo::none);
4248     __ emit_data64(0x0000005800000057, relocInfo::none);
4249     __ emit_data64(0x0000005a00000059, relocInfo::none);
4250     __ emit_data64(0x0000006200000061, relocInfo::none);
4251     __ emit_data64(0x0000006400000063, relocInfo::none);
4252     __ emit_data64(0x0000006600000065, relocInfo::none);
4253     __ emit_data64(0x0000006800000067, relocInfo::none);
4254     __ emit_data64(0x0000006a00000069, relocInfo::none);
4255     __ emit_data64(0x0000006c0000006b, relocInfo::none);
4256     __ emit_data64(0x0000006e0000006d, relocInfo::none);
4257     __ emit_data64(0x000000700000006f, relocInfo::none);
4258     __ emit_data64(0x0000007200000071, relocInfo::none);
4259     __ emit_data64(0x0000007400000073, relocInfo::none);
4260     __ emit_data64(0x0000007600000075, relocInfo::none);
4261     __ emit_data64(0x0000007800000077, relocInfo::none);
4262     __ emit_data64(0x0000007a00000079, relocInfo::none);
4263     __ emit_data64(0x0000003100000030, relocInfo::none);
4264     __ emit_data64(0x0000003300000032, relocInfo::none);
4265     __ emit_data64(0x0000003500000034, relocInfo::none);
4266     __ emit_data64(0x0000003700000036, relocInfo::none);
4267     __ emit_data64(0x0000003900000038, relocInfo::none);
4268     __ emit_data64(0x0000002f0000002b, relocInfo::none);
4269     return start;
4270   }
4271 
4272   //base64 url character set
4273   address base64url_charset_addr() {
4274     __ align(CodeEntryAlignment);
4275     StubCodeMark mark(this, "StubRoutines", "base64url_charset");
4276     address start = __ pc();
4277     __ emit_data64(0x0000004200000041, relocInfo::none);
4278     __ emit_data64(0x0000004400000043, relocInfo::none);
4279     __ emit_data64(0x0000004600000045, relocInfo::none);
4280     __ emit_data64(0x0000004800000047, relocInfo::none);
4281     __ emit_data64(0x0000004a00000049, relocInfo::none);
4282     __ emit_data64(0x0000004c0000004b, relocInfo::none);
4283     __ emit_data64(0x0000004e0000004d, relocInfo::none);
4284     __ emit_data64(0x000000500000004f, relocInfo::none);
4285     __ emit_data64(0x0000005200000051, relocInfo::none);
4286     __ emit_data64(0x0000005400000053, relocInfo::none);
4287     __ emit_data64(0x0000005600000055, relocInfo::none);
4288     __ emit_data64(0x0000005800000057, relocInfo::none);
4289     __ emit_data64(0x0000005a00000059, relocInfo::none);
4290     __ emit_data64(0x0000006200000061, relocInfo::none);
4291     __ emit_data64(0x0000006400000063, relocInfo::none);
4292     __ emit_data64(0x0000006600000065, relocInfo::none);
4293     __ emit_data64(0x0000006800000067, relocInfo::none);
4294     __ emit_data64(0x0000006a00000069, relocInfo::none);
4295     __ emit_data64(0x0000006c0000006b, relocInfo::none);
4296     __ emit_data64(0x0000006e0000006d, relocInfo::none);
4297     __ emit_data64(0x000000700000006f, relocInfo::none);
4298     __ emit_data64(0x0000007200000071, relocInfo::none);
4299     __ emit_data64(0x0000007400000073, relocInfo::none);
4300     __ emit_data64(0x0000007600000075, relocInfo::none);
4301     __ emit_data64(0x0000007800000077, relocInfo::none);
4302     __ emit_data64(0x0000007a00000079, relocInfo::none);
4303     __ emit_data64(0x0000003100000030, relocInfo::none);
4304     __ emit_data64(0x0000003300000032, relocInfo::none);
4305     __ emit_data64(0x0000003500000034, relocInfo::none);
4306     __ emit_data64(0x0000003700000036, relocInfo::none);
4307     __ emit_data64(0x0000003900000038, relocInfo::none);
4308     __ emit_data64(0x0000005f0000002d, relocInfo::none);
4309 
4310     return start;
4311   }
4312 
4313   address base64_bswap_mask_addr() {
4314     __ align(CodeEntryAlignment);
4315     StubCodeMark mark(this, "StubRoutines", "bswap_mask_base64");
4316     address start = __ pc();
4317     __ emit_data64(0x0504038002010080, relocInfo::none);
4318     __ emit_data64(0x0b0a098008070680, relocInfo::none);
4319     __ emit_data64(0x0908078006050480, relocInfo::none);
4320     __ emit_data64(0x0f0e0d800c0b0a80, relocInfo::none);
4321     __ emit_data64(0x0605048003020180, relocInfo::none);
4322     __ emit_data64(0x0c0b0a8009080780, relocInfo::none);
4323     __ emit_data64(0x0504038002010080, relocInfo::none);
4324     __ emit_data64(0x0b0a098008070680, relocInfo::none);
4325 
4326     return start;
4327   }
4328 
4329   address base64_right_shift_mask_addr() {
4330     __ align(CodeEntryAlignment);
4331     StubCodeMark mark(this, "StubRoutines", "right_shift_mask");
4332     address start = __ pc();
4333     __ emit_data64(0x0006000400020000, relocInfo::none);
4334     __ emit_data64(0x0006000400020000, relocInfo::none);
4335     __ emit_data64(0x0006000400020000, relocInfo::none);
4336     __ emit_data64(0x0006000400020000, relocInfo::none);
4337     __ emit_data64(0x0006000400020000, relocInfo::none);
4338     __ emit_data64(0x0006000400020000, relocInfo::none);
4339     __ emit_data64(0x0006000400020000, relocInfo::none);
4340     __ emit_data64(0x0006000400020000, relocInfo::none);
4341 
4342     return start;
4343   }
4344 
4345   address base64_left_shift_mask_addr() {
4346     __ align(CodeEntryAlignment);
4347     StubCodeMark mark(this, "StubRoutines", "left_shift_mask");
4348     address start = __ pc();
4349     __ emit_data64(0x0000000200040000, relocInfo::none);
4350     __ emit_data64(0x0000000200040000, relocInfo::none);
4351     __ emit_data64(0x0000000200040000, relocInfo::none);
4352     __ emit_data64(0x0000000200040000, relocInfo::none);
4353     __ emit_data64(0x0000000200040000, relocInfo::none);
4354     __ emit_data64(0x0000000200040000, relocInfo::none);
4355     __ emit_data64(0x0000000200040000, relocInfo::none);
4356     __ emit_data64(0x0000000200040000, relocInfo::none);
4357 
4358     return start;
4359   }
4360 
4361   address base64_and_mask_addr() {
4362     __ align(CodeEntryAlignment);
4363     StubCodeMark mark(this, "StubRoutines", "and_mask");
4364     address start = __ pc();
4365     __ emit_data64(0x3f003f003f000000, relocInfo::none);
4366     __ emit_data64(0x3f003f003f000000, relocInfo::none);
4367     __ emit_data64(0x3f003f003f000000, relocInfo::none);
4368     __ emit_data64(0x3f003f003f000000, relocInfo::none);
4369     __ emit_data64(0x3f003f003f000000, relocInfo::none);
4370     __ emit_data64(0x3f003f003f000000, relocInfo::none);
4371     __ emit_data64(0x3f003f003f000000, relocInfo::none);
4372     __ emit_data64(0x3f003f003f000000, relocInfo::none);
4373     return start;
4374   }
4375 
4376   address base64_gather_mask_addr() {
4377     __ align(CodeEntryAlignment);
4378     StubCodeMark mark(this, "StubRoutines", "gather_mask");
4379     address start = __ pc();
4380     __ emit_data64(0xffffffffffffffff, relocInfo::none);
4381     return start;
4382   }
4383 
4384 // Code for generating Base64 encoding.
4385 // Intrinsic function prototype in Base64.java:
4386 // private void encodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL) {
4387   address generate_base64_encodeBlock() {
4388     __ align(CodeEntryAlignment);
4389     StubCodeMark mark(this, "StubRoutines", "implEncode");
4390     address start = __ pc();
4391     __ enter();
4392 
4393     // Save callee-saved registers before using them
4394     __ push(r12);
4395     __ push(r13);
4396     __ push(r14);
4397     __ push(r15);
4398     __ push(rbx);
4399 
4400     // arguments
4401     const Register source = c_rarg0; // Source Array
4402     const Register start_offset = c_rarg1; // start offset
4403     const Register end_offset = c_rarg2; // end offset
4404     const Register dest = c_rarg3; // destination array
4405 
4406 #ifndef _WIN64
4407     const Register dp = c_rarg4;  // Position for writing to dest array
4408     const Register isURL = c_rarg5;// Base64 or URL character set
4409 #else
4410     const Address  dp_mem(rbp, 6 * wordSize);  // length is on stack on Win64
4411     const Address isURL_mem(rbp, 7 * wordSize);
4412     const Register isURL = r10;      // pick the volatile windows register
4413     const Register dp = r12;
4414     __ movl(dp, dp_mem);
4415     __ movl(isURL, isURL_mem);
4416 #endif
4417 
4418     const Register length = r14;
4419     Label L_process80, L_process32, L_process3, L_exit, L_processdata;
4420 
4421     // calculate length from offsets
4422     __ movl(length, end_offset);
4423     __ subl(length, start_offset);
4424     __ cmpl(length, 0);
4425     __ jcc(Assembler::lessEqual, L_exit);
4426 
4427     // Save k1 value in rbx
4428     __ kmovql(rbx, k1);
4429     __ lea(r11, ExternalAddress(StubRoutines::x86::base64_charset_addr()));
4430     // check if base64 charset(isURL=0) or base64 url charset(isURL=1) needs to be loaded
4431     __ cmpl(isURL, 0);
4432     __ jcc(Assembler::equal, L_processdata);
4433     __ lea(r11, ExternalAddress(StubRoutines::x86::base64url_charset_addr()));
4434 
4435     // load masks required for encoding data
4436     __ BIND(L_processdata);
4437     __ movdqu(xmm16, ExternalAddress(StubRoutines::x86::base64_gather_mask_addr()));
4438     // Set 64 bits of K register.
4439     __ evpcmpeqb(k1, xmm16, xmm16, Assembler::AVX_512bit);
4440     __ evmovdquq(xmm12, ExternalAddress(StubRoutines::x86::base64_bswap_mask_addr()), Assembler::AVX_256bit, r13);
4441     __ evmovdquq(xmm13, ExternalAddress(StubRoutines::x86::base64_right_shift_mask_addr()), Assembler::AVX_512bit, r13);
4442     __ evmovdquq(xmm14, ExternalAddress(StubRoutines::x86::base64_left_shift_mask_addr()), Assembler::AVX_512bit, r13);
4443     __ evmovdquq(xmm15, ExternalAddress(StubRoutines::x86::base64_and_mask_addr()), Assembler::AVX_512bit, r13);
4444 
4445     // Vector Base64 implementation, producing 96 bytes of encoded data
4446     __ BIND(L_process80);
4447     __ cmpl(length, 80);
4448     __ jcc(Assembler::below, L_process32);
4449     __ evmovdquq(xmm0, Address(source, start_offset, Address::times_1, 0), Assembler::AVX_256bit);
4450     __ evmovdquq(xmm1, Address(source, start_offset, Address::times_1, 24), Assembler::AVX_256bit);
4451     __ evmovdquq(xmm2, Address(source, start_offset, Address::times_1, 48), Assembler::AVX_256bit);
4452 
4453     //permute the input data in such a manner that we have continuity of the source
4454     __ vpermq(xmm3, xmm0, 148, Assembler::AVX_256bit);
4455     __ vpermq(xmm4, xmm1, 148, Assembler::AVX_256bit);
4456     __ vpermq(xmm5, xmm2, 148, Assembler::AVX_256bit);
4457 
4458     //shuffle input and group 3 bytes of data and to it add 0 as the 4th byte.
4459     //we can deal with 12 bytes at a time in a 128 bit register
4460     __ vpshufb(xmm3, xmm3, xmm12, Assembler::AVX_256bit);
4461     __ vpshufb(xmm4, xmm4, xmm12, Assembler::AVX_256bit);
4462     __ vpshufb(xmm5, xmm5, xmm12, Assembler::AVX_256bit);
4463 
4464     //convert byte to word. Each 128 bit register will have 6 bytes for processing
4465     __ vpmovzxbw(xmm3, xmm3, Assembler::AVX_512bit);
4466     __ vpmovzxbw(xmm4, xmm4, Assembler::AVX_512bit);
4467     __ vpmovzxbw(xmm5, xmm5, Assembler::AVX_512bit);
4468 
4469     // Extract bits in the following pattern 6, 4+2, 2+4, 6 to convert 3, 8 bit numbers to 4, 6 bit numbers
4470     __ evpsrlvw(xmm0, xmm3, xmm13,  Assembler::AVX_512bit);
4471     __ evpsrlvw(xmm1, xmm4, xmm13, Assembler::AVX_512bit);
4472     __ evpsrlvw(xmm2, xmm5, xmm13, Assembler::AVX_512bit);
4473 
4474     __ evpsllvw(xmm3, xmm3, xmm14, Assembler::AVX_512bit);
4475     __ evpsllvw(xmm4, xmm4, xmm14, Assembler::AVX_512bit);
4476     __ evpsllvw(xmm5, xmm5, xmm14, Assembler::AVX_512bit);
4477 
4478     __ vpsrlq(xmm0, xmm0, 8, Assembler::AVX_512bit);
4479     __ vpsrlq(xmm1, xmm1, 8, Assembler::AVX_512bit);
4480     __ vpsrlq(xmm2, xmm2, 8, Assembler::AVX_512bit);
4481 
4482     __ vpsllq(xmm3, xmm3, 8, Assembler::AVX_512bit);
4483     __ vpsllq(xmm4, xmm4, 8, Assembler::AVX_512bit);
4484     __ vpsllq(xmm5, xmm5, 8, Assembler::AVX_512bit);
4485 
4486     __ vpandq(xmm3, xmm3, xmm15, Assembler::AVX_512bit);
4487     __ vpandq(xmm4, xmm4, xmm15, Assembler::AVX_512bit);
4488     __ vpandq(xmm5, xmm5, xmm15, Assembler::AVX_512bit);
4489 
4490     // Get the final 4*6 bits base64 encoding
4491     __ vporq(xmm3, xmm3, xmm0, Assembler::AVX_512bit);
4492     __ vporq(xmm4, xmm4, xmm1, Assembler::AVX_512bit);
4493     __ vporq(xmm5, xmm5, xmm2, Assembler::AVX_512bit);
4494 
4495     // Shift
4496     __ vpsrlq(xmm3, xmm3, 8, Assembler::AVX_512bit);
4497     __ vpsrlq(xmm4, xmm4, 8, Assembler::AVX_512bit);
4498     __ vpsrlq(xmm5, xmm5, 8, Assembler::AVX_512bit);
4499 
4500     // look up 6 bits in the base64 character set to fetch the encoding
4501     // we are converting word to dword as gather instructions need dword indices for looking up encoding
4502     __ vextracti64x4(xmm6, xmm3, 0);
4503     __ vpmovzxwd(xmm0, xmm6, Assembler::AVX_512bit);
4504     __ vextracti64x4(xmm6, xmm3, 1);
4505     __ vpmovzxwd(xmm1, xmm6, Assembler::AVX_512bit);
4506 
4507     __ vextracti64x4(xmm6, xmm4, 0);
4508     __ vpmovzxwd(xmm2, xmm6, Assembler::AVX_512bit);
4509     __ vextracti64x4(xmm6, xmm4, 1);
4510     __ vpmovzxwd(xmm3, xmm6, Assembler::AVX_512bit);
4511 
4512     __ vextracti64x4(xmm4, xmm5, 0);
4513     __ vpmovzxwd(xmm6, xmm4, Assembler::AVX_512bit);
4514 
4515     __ vextracti64x4(xmm4, xmm5, 1);
4516     __ vpmovzxwd(xmm7, xmm4, Assembler::AVX_512bit);
4517 
4518     __ kmovql(k2, k1);
4519     __ evpgatherdd(xmm4, k2, Address(r11, xmm0, Address::times_4, 0), Assembler::AVX_512bit);
4520     __ kmovql(k2, k1);
4521     __ evpgatherdd(xmm5, k2, Address(r11, xmm1, Address::times_4, 0), Assembler::AVX_512bit);
4522     __ kmovql(k2, k1);
4523     __ evpgatherdd(xmm8, k2, Address(r11, xmm2, Address::times_4, 0), Assembler::AVX_512bit);
4524     __ kmovql(k2, k1);
4525     __ evpgatherdd(xmm9, k2, Address(r11, xmm3, Address::times_4, 0), Assembler::AVX_512bit);
4526     __ kmovql(k2, k1);
4527     __ evpgatherdd(xmm10, k2, Address(r11, xmm6, Address::times_4, 0), Assembler::AVX_512bit);
4528     __ kmovql(k2, k1);
4529     __ evpgatherdd(xmm11, k2, Address(r11, xmm7, Address::times_4, 0), Assembler::AVX_512bit);
4530 
4531     //Down convert dword to byte. Final output is 16*6 = 96 bytes long
4532     __ evpmovdb(Address(dest, dp, Address::times_1, 0), xmm4, Assembler::AVX_512bit);
4533     __ evpmovdb(Address(dest, dp, Address::times_1, 16), xmm5, Assembler::AVX_512bit);
4534     __ evpmovdb(Address(dest, dp, Address::times_1, 32), xmm8, Assembler::AVX_512bit);
4535     __ evpmovdb(Address(dest, dp, Address::times_1, 48), xmm9, Assembler::AVX_512bit);
4536     __ evpmovdb(Address(dest, dp, Address::times_1, 64), xmm10, Assembler::AVX_512bit);
4537     __ evpmovdb(Address(dest, dp, Address::times_1, 80), xmm11, Assembler::AVX_512bit);
4538 
4539     __ addq(dest, 96);
4540     __ addq(source, 72);
4541     __ subq(length, 72);
4542     __ jmp(L_process80);
4543 
4544     // Vector Base64 implementation generating 32 bytes of encoded data
4545     __ BIND(L_process32);
4546     __ cmpl(length, 32);
4547     __ jcc(Assembler::below, L_process3);
4548     __ evmovdquq(xmm0, Address(source, start_offset), Assembler::AVX_256bit);
4549     __ vpermq(xmm0, xmm0, 148, Assembler::AVX_256bit);
4550     __ vpshufb(xmm6, xmm0, xmm12, Assembler::AVX_256bit);
4551     __ vpmovzxbw(xmm6, xmm6, Assembler::AVX_512bit);
4552     __ evpsrlvw(xmm2, xmm6, xmm13, Assembler::AVX_512bit);
4553     __ evpsllvw(xmm3, xmm6, xmm14, Assembler::AVX_512bit);
4554 
4555     __ vpsrlq(xmm2, xmm2, 8, Assembler::AVX_512bit);
4556     __ vpsllq(xmm3, xmm3, 8, Assembler::AVX_512bit);
4557     __ vpandq(xmm3, xmm3, xmm15, Assembler::AVX_512bit);
4558     __ vporq(xmm1, xmm2, xmm3, Assembler::AVX_512bit);
4559     __ vpsrlq(xmm1, xmm1, 8, Assembler::AVX_512bit);
4560     __ vextracti64x4(xmm9, xmm1, 0);
4561     __ vpmovzxwd(xmm6, xmm9, Assembler::AVX_512bit);
4562     __ vextracti64x4(xmm9, xmm1, 1);
4563     __ vpmovzxwd(xmm5, xmm9,  Assembler::AVX_512bit);
4564     __ kmovql(k2, k1);
4565     __ evpgatherdd(xmm8, k2, Address(r11, xmm6, Address::times_4, 0), Assembler::AVX_512bit);
4566     __ kmovql(k2, k1);
4567     __ evpgatherdd(xmm10, k2, Address(r11, xmm5, Address::times_4, 0), Assembler::AVX_512bit);
4568     __ evpmovdb(Address(dest, dp, Address::times_1, 0), xmm8, Assembler::AVX_512bit);
4569     __ evpmovdb(Address(dest, dp, Address::times_1, 16), xmm10, Assembler::AVX_512bit);
4570     __ subq(length, 24);
4571     __ addq(dest, 32);
4572     __ addq(source, 24);
4573     __ jmp(L_process32);
4574 
4575     // Scalar data processing takes 3 bytes at a time and produces 4 bytes of encoded data
4576     /* This code corresponds to the scalar version of the following snippet in Base64.java
4577     ** int bits = (src[sp0++] & 0xff) << 16 |(src[sp0++] & 0xff) << 8 |(src[sp0++] & 0xff);
4578     ** dst[dp0++] = (byte)base64[(bits >> > 18) & 0x3f];
4579     ** dst[dp0++] = (byte)base64[(bits >> > 12) & 0x3f];
4580     ** dst[dp0++] = (byte)base64[(bits >> > 6) & 0x3f];
4581     ** dst[dp0++] = (byte)base64[bits & 0x3f];*/
4582     __ BIND(L_process3);
4583     __ cmpl(length, 3);
4584     __ jcc(Assembler::below, L_exit);
4585     // Read 1 byte at a time
4586     __ movzbl(rax, Address(source, start_offset));
4587     __ shll(rax, 0x10);
4588     __ movl(r15, rax);
4589     __ movzbl(rax, Address(source, start_offset, Address::times_1, 1));
4590     __ shll(rax, 0x8);
4591     __ movzwl(rax, rax);
4592     __ orl(r15, rax);
4593     __ movzbl(rax, Address(source, start_offset, Address::times_1, 2));
4594     __ orl(rax, r15);
4595     // Save 3 bytes read in r15
4596     __ movl(r15, rax);
4597     __ shrl(rax, 0x12);
4598     __ andl(rax, 0x3f);
4599     // rax contains the index, r11 contains base64 lookup table
4600     __ movb(rax, Address(r11, rax, Address::times_4));
4601     // Write the encoded byte to destination
4602     __ movb(Address(dest, dp, Address::times_1, 0), rax);
4603     __ movl(rax, r15);
4604     __ shrl(rax, 0xc);
4605     __ andl(rax, 0x3f);
4606     __ movb(rax, Address(r11, rax, Address::times_4));
4607     __ movb(Address(dest, dp, Address::times_1, 1), rax);
4608     __ movl(rax, r15);
4609     __ shrl(rax, 0x6);
4610     __ andl(rax, 0x3f);
4611     __ movb(rax, Address(r11, rax, Address::times_4));
4612     __ movb(Address(dest, dp, Address::times_1, 2), rax);
4613     __ movl(rax, r15);
4614     __ andl(rax, 0x3f);
4615     __ movb(rax, Address(r11, rax, Address::times_4));
4616     __ movb(Address(dest, dp, Address::times_1, 3), rax);
4617     __ subl(length, 3);
4618     __ addq(dest, 4);
4619     __ addq(source, 3);
4620     __ jmp(L_process3);
4621     __ BIND(L_exit);
4622     // restore k1 register value
4623     __ kmovql(k1, rbx);
4624     __ pop(rbx);
4625     __ pop(r15);
4626     __ pop(r14);
4627     __ pop(r13);
4628     __ pop(r12);
4629     __ leave();
4630     __ ret(0);
4631     return start;
4632   }
4633 
4634   /**
4635    *  Arguments:
4636    *
4637    * Inputs:
4638    *   c_rarg0   - int crc
4639    *   c_rarg1   - byte* buf
4640    *   c_rarg2   - int length
4641    *
4642    * Ouput:
4643    *       rax   - int crc result
4644    */
4645   address generate_updateBytesCRC32() {
4646     assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
4647 
4648     __ align(CodeEntryAlignment);
4649     StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
4650 
4651     address start = __ pc();
4652     // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
4653     // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)


5489         memcpy(dst + 32 * ii,      src + 16 * ii, 16);
5490         memcpy(dst + 32 * ii + 16, src + 16 * ii, 16);
5491       }
5492       StubRoutines::x86::_k256_W_adr = (address)StubRoutines::x86::_k256_W;
5493       StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
5494       StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
5495       StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
5496     }
5497     if (UseSHA512Intrinsics) {
5498       StubRoutines::x86::_k512_W_addr = (address)StubRoutines::x86::_k512_W;
5499       StubRoutines::x86::_pshuffle_byte_flip_mask_addr_sha512 = generate_pshuffle_byte_flip_mask_sha512();
5500       StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
5501       StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
5502     }
5503 
5504     // Generate GHASH intrinsics code
5505     if (UseGHASHIntrinsics) {
5506       StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
5507       StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
5508       StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
5509     }
5510 
5511     if (UseBASE64Intrinsics) {
5512       StubRoutines::x86::_and_mask = base64_and_mask_addr();
5513       StubRoutines::x86::_bswap_mask = base64_bswap_mask_addr();
5514       StubRoutines::x86::_base64_charset = base64_charset_addr();
5515       StubRoutines::x86::_url_charset = base64url_charset_addr();
5516       StubRoutines::x86::_gather_mask = base64_gather_mask_addr();
5517       StubRoutines::x86::_left_shift_mask = base64_left_shift_mask_addr();
5518       StubRoutines::x86::_right_shift_mask = base64_right_shift_mask_addr();
5519       StubRoutines::_base64_encodeBlock = generate_base64_encodeBlock();
5520     }
5521 
5522     // Safefetch stubs.
5523     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
5524                                                        &StubRoutines::_safefetch32_fault_pc,
5525                                                        &StubRoutines::_safefetch32_continuation_pc);
5526     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
5527                                                        &StubRoutines::_safefetchN_fault_pc,
5528                                                        &StubRoutines::_safefetchN_continuation_pc);
5529 #ifdef COMPILER2
5530     if (UseMultiplyToLenIntrinsic) {
5531       StubRoutines::_multiplyToLen = generate_multiplyToLen();
5532     }
5533     if (UseSquareToLenIntrinsic) {
5534       StubRoutines::_squareToLen = generate_squareToLen();
5535     }
5536     if (UseMulAddIntrinsic) {
5537       StubRoutines::_mulAdd = generate_mulAdd();
5538     }
5539 #ifndef _WINDOWS


< prev index next >