2157 load_key(xmmtmp, key, offset, xmm_shuf_mask); 2158 __ aesenc(xmmdst, xmmtmp); 2159 } 2160 2161 // aesdec using specified key+offset 2162 // can optionally specify that the shuffle mask is already in an xmmregister 2163 void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { 2164 load_key(xmmtmp, key, offset, xmm_shuf_mask); 2165 __ aesdec(xmmdst, xmmtmp); 2166 } 2167 2168 2169 // Arguments: 2170 // 2171 // Inputs: 2172 // c_rarg0 - source byte array address 2173 // c_rarg1 - destination byte array address 2174 // c_rarg2 - K (key) in little endian int array 2175 // 2176 address generate_aescrypt_encryptBlock() { 2177 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); 2178 __ align(CodeEntryAlignment); 2179 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); 2180 Label L_doLast; 2181 address start = __ pc(); 2182 2183 const Register from = rsi; // source array address 2184 const Register to = rdx; // destination array address 2185 const Register key = rcx; // key array address 2186 const Register keylen = rax; 2187 const Address from_param(rbp, 8+0); 2188 const Address to_param (rbp, 8+4); 2189 const Address key_param (rbp, 8+8); 2190 2191 const XMMRegister xmm_result = xmm0; 2192 const XMMRegister xmm_temp = xmm1; 2193 const XMMRegister xmm_key_shuf_mask = xmm2; 2194 2195 __ enter(); // required for proper stackwalking of RuntimeStub frame 2196 __ push(rsi); 2197 __ movptr(from , from_param); 2198 __ movptr(to , to_param); 2199 __ movptr(key , key_param); 2200 2201 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 2202 // keylen = # of 32-bit words, convert to 128-bit words 2203 __ shrl(keylen, 2); 2204 __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more 2205 2206 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); 2207 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input 2208 2209 // For encryption, the java expanded key ordering is just what we need 2210 2211 load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); 2212 __ pxor(xmm_result, xmm_temp); 2213 for (int offset = 0x10; offset <= 0x90; offset += 0x10) { 2214 aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); 2215 } 2216 load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask); 2217 __ cmpl(keylen, 0); 2218 __ jcc(Assembler::equal, L_doLast); 2219 __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys 2220 aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); 2221 load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask); 2222 __ subl(keylen, 2); 2223 __ jcc(Assembler::equal, L_doLast); 2224 __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys 2225 aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); 2226 load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask); 2227 2228 __ BIND(L_doLast); 2229 __ aesenclast(xmm_result, xmm_temp); 2230 __ movdqu(Address(to, 0), xmm_result); // store the result 2231 __ xorptr(rax, rax); // return 0 2232 __ pop(rsi); 2233 __ leave(); // required for proper stackwalking of RuntimeStub frame 2234 __ ret(0); 2235 2236 return start; 2237 } 2238 2239 2240 // Arguments: 2241 // 2242 // Inputs: 2243 // c_rarg0 - source byte array address 2244 // c_rarg1 - destination byte array address 2245 // c_rarg2 - K (key) in little endian int array 2246 // 2247 address generate_aescrypt_decryptBlock() { 2248 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); 2249 __ align(CodeEntryAlignment); 2250 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); 2251 Label L_doLast; 2252 address start = __ pc(); 2253 2254 const Register from = rsi; // source array address 2255 const Register to = rdx; // destination array address 2256 const Register key = rcx; // key array address 2257 const Register keylen = rax; 2258 const Address from_param(rbp, 8+0); 2259 const Address to_param (rbp, 8+4); 2260 const Address key_param (rbp, 8+8); 2261 2262 const XMMRegister xmm_result = xmm0; 2263 const XMMRegister xmm_temp = xmm1; 2264 const XMMRegister xmm_key_shuf_mask = xmm2; 2265 2266 __ enter(); // required for proper stackwalking of RuntimeStub frame 2267 __ push(rsi); 2268 __ movptr(from , from_param); 2269 __ movptr(to , to_param); 2270 __ movptr(key , key_param); 2271 2272 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 2273 // keylen = # of 32-bit words, convert to 128-bit words 2274 __ shrl(keylen, 2); 2275 __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more 2276 2277 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); 2278 __ movdqu(xmm_result, Address(from, 0)); 2279 2280 // for decryption java expanded key ordering is rotated one position from what we want 2281 // so we start from 0x10 here and hit 0x00 last 2282 // we don't know if the key is aligned, hence not using load-execute form 2283 load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask); 2284 __ pxor (xmm_result, xmm_temp); 2285 for (int offset = 0x20; offset <= 0xa0; offset += 0x10) { 2286 aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); 2287 } 2288 __ cmpl(keylen, 0); 2289 __ jcc(Assembler::equal, L_doLast); 2290 // only in 192 and 256 bit keys 2291 aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); 2292 aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask); 2293 __ subl(keylen, 2); 2294 __ jcc(Assembler::equal, L_doLast); 2295 // only in 256 bit keys 2296 aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); 2297 aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask); 2298 2299 __ BIND(L_doLast); 2300 // for decryption the aesdeclast operation is always on key+0x00 2301 load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); 2302 __ aesdeclast(xmm_result, xmm_temp); 2303 2304 __ movdqu(Address(to, 0), xmm_result); // store the result 2305 2306 __ xorptr(rax, rax); // return 0 2307 __ pop(rsi); 2308 __ leave(); // required for proper stackwalking of RuntimeStub frame 2309 __ ret(0); 2310 2311 return start; 2312 } 2313 2314 void handleSOERegisters(bool saving) { 2315 const int saveFrameSizeInBytes = 4 * wordSize; 2316 const Address saved_rbx (rbp, -3 * wordSize); 2317 const Address saved_rsi (rbp, -2 * wordSize); 2318 const Address saved_rdi (rbp, -1 * wordSize); 2319 2320 if (saving) { 2321 __ subptr(rsp, saveFrameSizeInBytes); 2322 __ movptr(saved_rsi, rsi); 2323 __ movptr(saved_rdi, rdi); 2324 __ movptr(saved_rbx, rbx); 2325 } else { 2326 // restoring 2327 __ movptr(rsi, saved_rsi); 2328 __ movptr(rdi, saved_rdi); 2329 __ movptr(rbx, saved_rbx); 2330 } 2331 } 2332 2333 // Arguments: 2334 // 2335 // Inputs: 2336 // c_rarg0 - source byte array address 2337 // c_rarg1 - destination byte array address 2338 // c_rarg2 - K (key) in little endian int array 2339 // c_rarg3 - r vector byte array address 2340 // c_rarg4 - input length 2341 // 2342 address generate_cipherBlockChaining_encryptAESCrypt() { 2343 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); 2344 __ align(CodeEntryAlignment); 2345 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); 2346 address start = __ pc(); 2347 2348 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; 2349 const Register from = rsi; // source array address 2350 const Register to = rdx; // destination array address 2351 const Register key = rcx; // key array address 2352 const Register rvec = rdi; // r byte array initialized from initvector array address 2353 // and left with the results of the last encryption block 2354 const Register len_reg = rbx; // src len (must be multiple of blocksize 16) 2355 const Register pos = rax; 2356 2357 // xmm register assignments for the loops below 2358 const XMMRegister xmm_result = xmm0; 2359 const XMMRegister xmm_temp = xmm1; 2360 // first 6 keys preloaded into xmm2-xmm7 2361 const int XMM_REG_NUM_KEY_FIRST = 2; 2362 const int XMM_REG_NUM_KEY_LAST = 7; 2363 const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); 2376 __ movptr(key , key_param); 2377 __ movptr(rvec , rvec_param); 2378 __ movptr(len_reg , len_param); 2379 2380 const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front 2381 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); 2382 // load up xmm regs 2 thru 7 with keys 0-5 2383 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2384 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); 2385 offset += 0x10; 2386 } 2387 2388 __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec 2389 2390 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) 2391 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 2392 __ cmpl(rax, 44); 2393 __ jcc(Assembler::notEqual, L_key_192_256); 2394 2395 // 128 bit code follows here 2396 __ movptr(pos, 0); 2397 __ align(OptoLoopAlignment); 2398 __ BIND(L_loopTop_128); 2399 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input 2400 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2401 2402 __ pxor (xmm_result, xmm_key0); // do the aes rounds 2403 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2404 __ aesenc(xmm_result, as_XMMRegister(rnum)); 2405 } 2406 for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) { 2407 aes_enc_key(xmm_result, xmm_temp, key, key_offset); 2408 } 2409 load_key(xmm_temp, key, 0xa0); 2410 __ aesenclast(xmm_result, xmm_temp); 2411 2412 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 2413 // no need to store r to memory until we exit 2414 __ addptr(pos, AESBlockSize); 2415 __ subptr(len_reg, AESBlockSize); 2416 __ jcc(Assembler::notEqual, L_loopTop_128); 2417 2418 __ BIND(L_exit); 2419 __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object 2420 2421 handleSOERegisters(false /*restoring*/); 2422 __ movl(rax, 0); // return 0 (why?) 2423 __ leave(); // required for proper stackwalking of RuntimeStub frame 2424 __ ret(0); 2425 2426 __ BIND(L_key_192_256); 2427 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) 2428 __ cmpl(rax, 52); 2429 __ jcc(Assembler::notEqual, L_key_256); 2430 2431 // 192-bit code follows here (could be changed to use more xmm registers) 2432 __ movptr(pos, 0); 2433 __ align(OptoLoopAlignment); 2434 __ BIND(L_loopTop_192); 2435 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input 2436 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2437 2438 __ pxor (xmm_result, xmm_key0); // do the aes rounds 2439 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2440 __ aesenc(xmm_result, as_XMMRegister(rnum)); 2441 } 2442 for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) { 2443 aes_enc_key(xmm_result, xmm_temp, key, key_offset); 2444 } 2445 load_key(xmm_temp, key, 0xc0); 2446 __ aesenclast(xmm_result, xmm_temp); 2447 2448 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 2449 // no need to store r to memory until we exit 2450 __ addptr(pos, AESBlockSize); 2451 __ subptr(len_reg, AESBlockSize); 2452 __ jcc(Assembler::notEqual, L_loopTop_192); 2453 __ jmp(L_exit); 2454 2455 __ BIND(L_key_256); 2456 // 256-bit code follows here (could be changed to use more xmm registers) 2457 __ movptr(pos, 0); 2458 __ align(OptoLoopAlignment); 2459 __ BIND(L_loopTop_256); 2460 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input 2461 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2462 2463 __ pxor (xmm_result, xmm_key0); // do the aes rounds 2464 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2465 __ aesenc(xmm_result, as_XMMRegister(rnum)); 2466 } 2467 for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) { 2468 aes_enc_key(xmm_result, xmm_temp, key, key_offset); 2469 } 2470 load_key(xmm_temp, key, 0xe0); 2471 __ aesenclast(xmm_result, xmm_temp); 2472 2473 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 2474 // no need to store r to memory until we exit 2475 __ addptr(pos, AESBlockSize); 2476 __ subptr(len_reg, AESBlockSize); 2477 __ jcc(Assembler::notEqual, L_loopTop_256); 2478 __ jmp(L_exit); 2479 2480 return start; 2481 } 2482 2483 2484 // CBC AES Decryption. 2485 // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time. 2486 // 2487 // Arguments: 2488 // 2489 // Inputs: 2490 // c_rarg0 - source byte array address 2491 // c_rarg1 - destination byte array address 2492 // c_rarg2 - K (key) in little endian int array 2493 // c_rarg3 - r vector byte array address 2494 // c_rarg4 - input length 2495 // 2496 2497 address generate_cipherBlockChaining_decryptAESCrypt() { 2498 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); 2499 __ align(CodeEntryAlignment); 2500 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); 2501 address start = __ pc(); 2502 2503 Label L_exit, L_key_192_256, L_key_256; 2504 Label L_singleBlock_loopTop_128; 2505 Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256; 2506 const Register from = rsi; // source array address 2507 const Register to = rdx; // destination array address 2508 const Register key = rcx; // key array address 2509 const Register rvec = rdi; // r byte array initialized from initvector array address 2510 // and left with the results of the last encryption block 2511 const Register len_reg = rbx; // src len (must be multiple of blocksize 16) 2512 const Register pos = rax; 2513 2514 // xmm register assignments for the loops below 2515 const XMMRegister xmm_result = xmm0; 2516 const XMMRegister xmm_temp = xmm1; 2517 // first 6 keys preloaded into xmm2-xmm7 2518 const int XMM_REG_NUM_KEY_FIRST = 2; 2539 // so we start from 0x10 here and hit 0x00 last 2540 const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front 2541 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); 2542 // load up xmm regs 2 thru 6 with first 5 keys 2543 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2544 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); 2545 offset += 0x10; 2546 } 2547 2548 // inside here, use the rvec register to point to previous block cipher 2549 // with which we xor at the end of each newly decrypted block 2550 const Register prev_block_cipher_ptr = rvec; 2551 2552 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) 2553 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 2554 __ cmpl(rax, 44); 2555 __ jcc(Assembler::notEqual, L_key_192_256); 2556 2557 2558 // 128-bit code follows here, parallelized 2559 __ movptr(pos, 0); 2560 __ align(OptoLoopAlignment); 2561 __ BIND(L_singleBlock_loopTop_128); 2562 __ cmpptr(len_reg, 0); // any blocks left?? 2563 __ jcc(Assembler::equal, L_exit); 2564 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input 2565 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds 2566 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2567 __ aesdec(xmm_result, as_XMMRegister(rnum)); 2568 } 2569 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0 2570 aes_dec_key(xmm_result, xmm_temp, key, key_offset); 2571 } 2572 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 2573 __ aesdeclast(xmm_result, xmm_temp); 2574 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); 2575 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2576 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 2577 // no need to store r to memory until we exit 2578 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr 2579 __ addptr(pos, AESBlockSize); 2580 __ subptr(len_reg, AESBlockSize); 2581 __ jmp(L_singleBlock_loopTop_128); 2582 2583 2584 __ BIND(L_exit); 2585 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); 2586 __ movptr(rvec , rvec_param); // restore this since used in loop 2587 __ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object 2588 handleSOERegisters(false /*restoring*/); 2589 __ movl(rax, 0); // return 0 (why?) 2590 __ leave(); // required for proper stackwalking of RuntimeStub frame 2591 __ ret(0); 2592 2593 2594 __ BIND(L_key_192_256); 2595 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) 2596 __ cmpl(rax, 52); 2597 __ jcc(Assembler::notEqual, L_key_256); 2598 2599 // 192-bit code follows here (could be optimized to use parallelism) 2600 __ movptr(pos, 0); 2601 __ align(OptoLoopAlignment); 2602 __ BIND(L_singleBlock_loopTop_192); 2603 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input 2604 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds 2605 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2606 __ aesdec(xmm_result, as_XMMRegister(rnum)); 2607 } 2608 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0 2609 aes_dec_key(xmm_result, xmm_temp, key, key_offset); 2610 } 2611 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 2612 __ aesdeclast(xmm_result, xmm_temp); 2613 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); 2614 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2615 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 2616 // no need to store r to memory until we exit 2617 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr 2618 __ addptr(pos, AESBlockSize); 2619 __ subptr(len_reg, AESBlockSize); 2620 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); 2621 __ jmp(L_exit); 2622 2623 __ BIND(L_key_256); 2624 // 256-bit code follows here (could be optimized to use parallelism) 2625 __ movptr(pos, 0); 2626 __ align(OptoLoopAlignment); 2627 __ BIND(L_singleBlock_loopTop_256); 2628 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input 2629 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds 2630 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2631 __ aesdec(xmm_result, as_XMMRegister(rnum)); 2632 } 2633 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0 2634 aes_dec_key(xmm_result, xmm_temp, key, key_offset); 2635 } 2636 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 2637 __ aesdeclast(xmm_result, xmm_temp); 2638 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); 2639 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2640 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 2641 // no need to store r to memory until we exit 2642 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr 2643 __ addptr(pos, AESBlockSize); 2644 __ subptr(len_reg, AESBlockSize); 2645 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); | 2157 load_key(xmmtmp, key, offset, xmm_shuf_mask); 2158 __ aesenc(xmmdst, xmmtmp); 2159 } 2160 2161 // aesdec using specified key+offset 2162 // can optionally specify that the shuffle mask is already in an xmmregister 2163 void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { 2164 load_key(xmmtmp, key, offset, xmm_shuf_mask); 2165 __ aesdec(xmmdst, xmmtmp); 2166 } 2167 2168 2169 // Arguments: 2170 // 2171 // Inputs: 2172 // c_rarg0 - source byte array address 2173 // c_rarg1 - destination byte array address 2174 // c_rarg2 - K (key) in little endian int array 2175 // 2176 address generate_aescrypt_encryptBlock() { 2177 assert(UseAES, "need AES instructions and misaligned SSE support"); 2178 __ align(CodeEntryAlignment); 2179 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); 2180 Label L_doLast; 2181 address start = __ pc(); 2182 2183 const Register from = rdx; // source array address 2184 const Register to = rdx; // destination array address 2185 const Register key = rcx; // key array address 2186 const Register keylen = rax; 2187 const Address from_param(rbp, 8+0); 2188 const Address to_param (rbp, 8+4); 2189 const Address key_param (rbp, 8+8); 2190 2191 const XMMRegister xmm_result = xmm0; 2192 const XMMRegister xmm_key_shuf_mask = xmm1; 2193 const XMMRegister xmm_temp1 = xmm2; 2194 const XMMRegister xmm_temp2 = xmm3; 2195 const XMMRegister xmm_temp3 = xmm4; 2196 const XMMRegister xmm_temp4 = xmm5; 2197 2198 __ enter(); // required for proper stackwalking of RuntimeStub frame 2199 __ movptr(from, from_param); 2200 __ movptr(key, key_param); 2201 2202 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} 2203 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 2204 2205 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); 2206 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input 2207 __ movptr(to, to_param); 2208 2209 // For encryption, the java expanded key ordering is just what we need 2210 2211 load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); 2212 __ pxor(xmm_result, xmm_temp1); 2213 2214 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); 2215 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); 2216 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); 2217 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); 2218 2219 __ aesenc(xmm_result, xmm_temp1); 2220 __ aesenc(xmm_result, xmm_temp2); 2221 __ aesenc(xmm_result, xmm_temp3); 2222 __ aesenc(xmm_result, xmm_temp4); 2223 2224 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); 2225 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); 2226 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); 2227 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); 2228 2229 __ aesenc(xmm_result, xmm_temp1); 2230 __ aesenc(xmm_result, xmm_temp2); 2231 __ aesenc(xmm_result, xmm_temp3); 2232 __ aesenc(xmm_result, xmm_temp4); 2233 2234 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); 2235 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); 2236 2237 __ cmpl(keylen, 44); 2238 __ jccb(Assembler::equal, L_doLast); 2239 2240 __ aesenc(xmm_result, xmm_temp1); 2241 __ aesenc(xmm_result, xmm_temp2); 2242 2243 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); 2244 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); 2245 2246 __ cmpl(keylen, 52); 2247 __ jccb(Assembler::equal, L_doLast); 2248 2249 __ aesenc(xmm_result, xmm_temp1); 2250 __ aesenc(xmm_result, xmm_temp2); 2251 2252 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); 2253 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); 2254 2255 __ BIND(L_doLast); 2256 __ aesenc(xmm_result, xmm_temp1); 2257 __ aesenclast(xmm_result, xmm_temp2); 2258 __ movdqu(Address(to, 0), xmm_result); // store the result 2259 __ xorptr(rax, rax); // return 0 2260 __ leave(); // required for proper stackwalking of RuntimeStub frame 2261 __ ret(0); 2262 2263 return start; 2264 } 2265 2266 2267 // Arguments: 2268 // 2269 // Inputs: 2270 // c_rarg0 - source byte array address 2271 // c_rarg1 - destination byte array address 2272 // c_rarg2 - K (key) in little endian int array 2273 // 2274 address generate_aescrypt_decryptBlock() { 2275 assert(UseAES, "need AES instructions and misaligned SSE support"); 2276 __ align(CodeEntryAlignment); 2277 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); 2278 Label L_doLast; 2279 address start = __ pc(); 2280 2281 const Register from = rdx; // source array address 2282 const Register to = rdx; // destination array address 2283 const Register key = rcx; // key array address 2284 const Register keylen = rax; 2285 const Address from_param(rbp, 8+0); 2286 const Address to_param (rbp, 8+4); 2287 const Address key_param (rbp, 8+8); 2288 2289 const XMMRegister xmm_result = xmm0; 2290 const XMMRegister xmm_key_shuf_mask = xmm1; 2291 const XMMRegister xmm_temp1 = xmm2; 2292 const XMMRegister xmm_temp2 = xmm3; 2293 const XMMRegister xmm_temp3 = xmm4; 2294 const XMMRegister xmm_temp4 = xmm5; 2295 2296 __ enter(); // required for proper stackwalking of RuntimeStub frame 2297 __ movptr(from, from_param); 2298 __ movptr(key, key_param); 2299 2300 // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} 2301 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 2302 2303 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); 2304 __ movdqu(xmm_result, Address(from, 0)); 2305 __ movptr(to, to_param); 2306 2307 // for decryption java expanded key ordering is rotated one position from what we want 2308 // so we start from 0x10 here and hit 0x00 last 2309 // we don't know if the key is aligned, hence not using load-execute form 2310 load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); 2311 load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); 2312 load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); 2313 load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); 2314 2315 __ pxor (xmm_result, xmm_temp1); 2316 __ aesdec(xmm_result, xmm_temp2); 2317 __ aesdec(xmm_result, xmm_temp3); 2318 __ aesdec(xmm_result, xmm_temp4); 2319 2320 load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); 2321 load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); 2322 load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); 2323 load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); 2324 2325 __ aesdec(xmm_result, xmm_temp1); 2326 __ aesdec(xmm_result, xmm_temp2); 2327 __ aesdec(xmm_result, xmm_temp3); 2328 __ aesdec(xmm_result, xmm_temp4); 2329 2330 load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); 2331 load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); 2332 load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); 2333 2334 __ cmpl(keylen, 44); 2335 __ jccb(Assembler::equal, L_doLast); 2336 2337 __ aesdec(xmm_result, xmm_temp1); 2338 __ aesdec(xmm_result, xmm_temp2); 2339 2340 load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); 2341 load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); 2342 2343 __ cmpl(keylen, 52); 2344 __ jccb(Assembler::equal, L_doLast); 2345 2346 __ aesdec(xmm_result, xmm_temp1); 2347 __ aesdec(xmm_result, xmm_temp2); 2348 2349 load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); 2350 load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); 2351 2352 __ BIND(L_doLast); 2353 __ aesdec(xmm_result, xmm_temp1); 2354 __ aesdec(xmm_result, xmm_temp2); 2355 2356 // for decryption the aesdeclast operation is always on key+0x00 2357 __ aesdeclast(xmm_result, xmm_temp3); 2358 __ movdqu(Address(to, 0), xmm_result); // store the result 2359 __ xorptr(rax, rax); // return 0 2360 __ leave(); // required for proper stackwalking of RuntimeStub frame 2361 __ ret(0); 2362 2363 return start; 2364 } 2365 2366 void handleSOERegisters(bool saving) { 2367 const int saveFrameSizeInBytes = 4 * wordSize; 2368 const Address saved_rbx (rbp, -3 * wordSize); 2369 const Address saved_rsi (rbp, -2 * wordSize); 2370 const Address saved_rdi (rbp, -1 * wordSize); 2371 2372 if (saving) { 2373 __ subptr(rsp, saveFrameSizeInBytes); 2374 __ movptr(saved_rsi, rsi); 2375 __ movptr(saved_rdi, rdi); 2376 __ movptr(saved_rbx, rbx); 2377 } else { 2378 // restoring 2379 __ movptr(rsi, saved_rsi); 2380 __ movptr(rdi, saved_rdi); 2381 __ movptr(rbx, saved_rbx); 2382 } 2383 } 2384 2385 // Arguments: 2386 // 2387 // Inputs: 2388 // c_rarg0 - source byte array address 2389 // c_rarg1 - destination byte array address 2390 // c_rarg2 - K (key) in little endian int array 2391 // c_rarg3 - r vector byte array address 2392 // c_rarg4 - input length 2393 // 2394 address generate_cipherBlockChaining_encryptAESCrypt() { 2395 assert(UseAES, "need AES instructions and misaligned SSE support"); 2396 __ align(CodeEntryAlignment); 2397 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); 2398 address start = __ pc(); 2399 2400 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; 2401 const Register from = rsi; // source array address 2402 const Register to = rdx; // destination array address 2403 const Register key = rcx; // key array address 2404 const Register rvec = rdi; // r byte array initialized from initvector array address 2405 // and left with the results of the last encryption block 2406 const Register len_reg = rbx; // src len (must be multiple of blocksize 16) 2407 const Register pos = rax; 2408 2409 // xmm register assignments for the loops below 2410 const XMMRegister xmm_result = xmm0; 2411 const XMMRegister xmm_temp = xmm1; 2412 // first 6 keys preloaded into xmm2-xmm7 2413 const int XMM_REG_NUM_KEY_FIRST = 2; 2414 const int XMM_REG_NUM_KEY_LAST = 7; 2415 const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); 2428 __ movptr(key , key_param); 2429 __ movptr(rvec , rvec_param); 2430 __ movptr(len_reg , len_param); 2431 2432 const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front 2433 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); 2434 // load up xmm regs 2 thru 7 with keys 0-5 2435 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2436 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); 2437 offset += 0x10; 2438 } 2439 2440 __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec 2441 2442 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) 2443 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 2444 __ cmpl(rax, 44); 2445 __ jcc(Assembler::notEqual, L_key_192_256); 2446 2447 // 128 bit code follows here 2448 __ movl(pos, 0); 2449 __ align(OptoLoopAlignment); 2450 __ BIND(L_loopTop_128); 2451 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input 2452 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2453 2454 __ pxor (xmm_result, xmm_key0); // do the aes rounds 2455 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2456 __ aesenc(xmm_result, as_XMMRegister(rnum)); 2457 } 2458 for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) { 2459 aes_enc_key(xmm_result, xmm_temp, key, key_offset); 2460 } 2461 load_key(xmm_temp, key, 0xa0); 2462 __ aesenclast(xmm_result, xmm_temp); 2463 2464 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 2465 // no need to store r to memory until we exit 2466 __ addptr(pos, AESBlockSize); 2467 __ subptr(len_reg, AESBlockSize); 2468 __ jcc(Assembler::notEqual, L_loopTop_128); 2469 2470 __ BIND(L_exit); 2471 __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object 2472 2473 handleSOERegisters(false /*restoring*/); 2474 __ movl(rax, 0); // return 0 (why?) 2475 __ leave(); // required for proper stackwalking of RuntimeStub frame 2476 __ ret(0); 2477 2478 __ BIND(L_key_192_256); 2479 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) 2480 __ cmpl(rax, 52); 2481 __ jcc(Assembler::notEqual, L_key_256); 2482 2483 // 192-bit code follows here (could be changed to use more xmm registers) 2484 __ movl(pos, 0); 2485 __ align(OptoLoopAlignment); 2486 __ BIND(L_loopTop_192); 2487 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input 2488 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2489 2490 __ pxor (xmm_result, xmm_key0); // do the aes rounds 2491 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2492 __ aesenc(xmm_result, as_XMMRegister(rnum)); 2493 } 2494 for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) { 2495 aes_enc_key(xmm_result, xmm_temp, key, key_offset); 2496 } 2497 load_key(xmm_temp, key, 0xc0); 2498 __ aesenclast(xmm_result, xmm_temp); 2499 2500 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 2501 // no need to store r to memory until we exit 2502 __ addptr(pos, AESBlockSize); 2503 __ subptr(len_reg, AESBlockSize); 2504 __ jcc(Assembler::notEqual, L_loopTop_192); 2505 __ jmp(L_exit); 2506 2507 __ BIND(L_key_256); 2508 // 256-bit code follows here (could be changed to use more xmm registers) 2509 __ movl(pos, 0); 2510 __ align(OptoLoopAlignment); 2511 __ BIND(L_loopTop_256); 2512 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input 2513 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2514 2515 __ pxor (xmm_result, xmm_key0); // do the aes rounds 2516 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2517 __ aesenc(xmm_result, as_XMMRegister(rnum)); 2518 } 2519 for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) { 2520 aes_enc_key(xmm_result, xmm_temp, key, key_offset); 2521 } 2522 load_key(xmm_temp, key, 0xe0); 2523 __ aesenclast(xmm_result, xmm_temp); 2524 2525 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 2526 // no need to store r to memory until we exit 2527 __ addptr(pos, AESBlockSize); 2528 __ subptr(len_reg, AESBlockSize); 2529 __ jcc(Assembler::notEqual, L_loopTop_256); 2530 __ jmp(L_exit); 2531 2532 return start; 2533 } 2534 2535 2536 // CBC AES Decryption. 2537 // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time. 2538 // 2539 // Arguments: 2540 // 2541 // Inputs: 2542 // c_rarg0 - source byte array address 2543 // c_rarg1 - destination byte array address 2544 // c_rarg2 - K (key) in little endian int array 2545 // c_rarg3 - r vector byte array address 2546 // c_rarg4 - input length 2547 // 2548 2549 address generate_cipherBlockChaining_decryptAESCrypt() { 2550 assert(UseAES, "need AES instructions and misaligned SSE support"); 2551 __ align(CodeEntryAlignment); 2552 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); 2553 address start = __ pc(); 2554 2555 Label L_exit, L_key_192_256, L_key_256; 2556 Label L_singleBlock_loopTop_128; 2557 Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256; 2558 const Register from = rsi; // source array address 2559 const Register to = rdx; // destination array address 2560 const Register key = rcx; // key array address 2561 const Register rvec = rdi; // r byte array initialized from initvector array address 2562 // and left with the results of the last encryption block 2563 const Register len_reg = rbx; // src len (must be multiple of blocksize 16) 2564 const Register pos = rax; 2565 2566 // xmm register assignments for the loops below 2567 const XMMRegister xmm_result = xmm0; 2568 const XMMRegister xmm_temp = xmm1; 2569 // first 6 keys preloaded into xmm2-xmm7 2570 const int XMM_REG_NUM_KEY_FIRST = 2; 2591 // so we start from 0x10 here and hit 0x00 last 2592 const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front 2593 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); 2594 // load up xmm regs 2 thru 6 with first 5 keys 2595 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2596 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); 2597 offset += 0x10; 2598 } 2599 2600 // inside here, use the rvec register to point to previous block cipher 2601 // with which we xor at the end of each newly decrypted block 2602 const Register prev_block_cipher_ptr = rvec; 2603 2604 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) 2605 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 2606 __ cmpl(rax, 44); 2607 __ jcc(Assembler::notEqual, L_key_192_256); 2608 2609 2610 // 128-bit code follows here, parallelized 2611 __ movl(pos, 0); 2612 __ align(OptoLoopAlignment); 2613 __ BIND(L_singleBlock_loopTop_128); 2614 __ cmpptr(len_reg, 0); // any blocks left?? 2615 __ jcc(Assembler::equal, L_exit); 2616 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input 2617 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds 2618 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2619 __ aesdec(xmm_result, as_XMMRegister(rnum)); 2620 } 2621 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0 2622 aes_dec_key(xmm_result, xmm_temp, key, key_offset); 2623 } 2624 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 2625 __ aesdeclast(xmm_result, xmm_temp); 2626 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); 2627 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2628 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 2629 // no need to store r to memory until we exit 2630 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr 2631 __ addptr(pos, AESBlockSize); 2632 __ subptr(len_reg, AESBlockSize); 2633 __ jmp(L_singleBlock_loopTop_128); 2634 2635 2636 __ BIND(L_exit); 2637 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); 2638 __ movptr(rvec , rvec_param); // restore this since used in loop 2639 __ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object 2640 handleSOERegisters(false /*restoring*/); 2641 __ movl(rax, 0); // return 0 (why?) 2642 __ leave(); // required for proper stackwalking of RuntimeStub frame 2643 __ ret(0); 2644 2645 2646 __ BIND(L_key_192_256); 2647 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) 2648 __ cmpl(rax, 52); 2649 __ jcc(Assembler::notEqual, L_key_256); 2650 2651 // 192-bit code follows here (could be optimized to use parallelism) 2652 __ movl(pos, 0); 2653 __ align(OptoLoopAlignment); 2654 __ BIND(L_singleBlock_loopTop_192); 2655 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input 2656 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds 2657 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2658 __ aesdec(xmm_result, as_XMMRegister(rnum)); 2659 } 2660 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0 2661 aes_dec_key(xmm_result, xmm_temp, key, key_offset); 2662 } 2663 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 2664 __ aesdeclast(xmm_result, xmm_temp); 2665 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); 2666 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2667 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 2668 // no need to store r to memory until we exit 2669 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr 2670 __ addptr(pos, AESBlockSize); 2671 __ subptr(len_reg, AESBlockSize); 2672 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); 2673 __ jmp(L_exit); 2674 2675 __ BIND(L_key_256); 2676 // 256-bit code follows here (could be optimized to use parallelism) 2677 __ movl(pos, 0); 2678 __ align(OptoLoopAlignment); 2679 __ BIND(L_singleBlock_loopTop_256); 2680 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input 2681 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds 2682 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { 2683 __ aesdec(xmm_result, as_XMMRegister(rnum)); 2684 } 2685 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0 2686 aes_dec_key(xmm_result, xmm_temp, key, key_offset); 2687 } 2688 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 2689 __ aesdeclast(xmm_result, xmm_temp); 2690 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); 2691 __ pxor (xmm_result, xmm_temp); // xor with the current r vector 2692 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output 2693 // no need to store r to memory until we exit 2694 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr 2695 __ addptr(pos, AESBlockSize); 2696 __ subptr(len_reg, AESBlockSize); 2697 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); |