src/cpu/x86/vm/stubGenerator_x86_32.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File 8004835 Sdiff src/cpu/x86/vm

src/cpu/x86/vm/stubGenerator_x86_32.cpp

Print this page




2157     load_key(xmmtmp, key, offset, xmm_shuf_mask);
2158     __ aesenc(xmmdst, xmmtmp);
2159   }
2160 
2161   // aesdec using specified key+offset
2162   // can optionally specify that the shuffle mask is already in an xmmregister
2163   void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
2164     load_key(xmmtmp, key, offset, xmm_shuf_mask);
2165     __ aesdec(xmmdst, xmmtmp);
2166   }
2167 
2168 
2169   // Arguments:
2170   //
2171   // Inputs:
2172   //   c_rarg0   - source byte array address
2173   //   c_rarg1   - destination byte array address
2174   //   c_rarg2   - K (key) in little endian int array
2175   //
2176   address generate_aescrypt_encryptBlock() {
2177     assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
2178     __ align(CodeEntryAlignment);
2179     StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
2180     Label L_doLast;
2181     address start = __ pc();
2182 
2183     const Register from        = rsi;      // source array address
2184     const Register to          = rdx;      // destination array address
2185     const Register key         = rcx;      // key array address
2186     const Register keylen      = rax;
2187     const Address  from_param(rbp, 8+0);
2188     const Address  to_param  (rbp, 8+4);
2189     const Address  key_param (rbp, 8+8);
2190 
2191     const XMMRegister xmm_result = xmm0;
2192     const XMMRegister xmm_temp   = xmm1;
2193     const XMMRegister xmm_key_shuf_mask = xmm2;



2194 
2195     __ enter(); // required for proper stackwalking of RuntimeStub frame
2196     __ push(rsi);
2197     __ movptr(from , from_param);
2198     __ movptr(to   , to_param);
2199     __ movptr(key  , key_param);
2200 

2201     __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2202     // keylen = # of 32-bit words, convert to 128-bit words
2203     __ shrl(keylen, 2);
2204     __ subl(keylen, 11);   // every key has at least 11 128-bit words, some have more
2205 
2206     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2207     __ movdqu(xmm_result, Address(from, 0));  // get 16 bytes of input

2208 
2209     // For encryption, the java expanded key ordering is just what we need
2210 
2211     load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
2212     __ pxor(xmm_result, xmm_temp);
2213     for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
2214       aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
2215     }
2216     load_key  (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
2217     __ cmpl(keylen, 0);
2218     __ jcc(Assembler::equal, L_doLast);
2219     __ aesenc(xmm_result, xmm_temp);                   // only in 192 and 256 bit keys
2220     aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
2221     load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
2222     __ subl(keylen, 2);
2223     __ jcc(Assembler::equal, L_doLast);
2224     __ aesenc(xmm_result, xmm_temp);                   // only in 256 bit keys
2225     aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
2226     load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);



























2227 
2228     __ BIND(L_doLast);
2229     __ aesenclast(xmm_result, xmm_temp);

2230     __ movdqu(Address(to, 0), xmm_result);        // store the result
2231     __ xorptr(rax, rax); // return 0
2232     __ pop(rsi);
2233     __ leave(); // required for proper stackwalking of RuntimeStub frame
2234     __ ret(0);
2235 
2236     return start;
2237   }
2238 
2239 
2240   // Arguments:
2241   //
2242   // Inputs:
2243   //   c_rarg0   - source byte array address
2244   //   c_rarg1   - destination byte array address
2245   //   c_rarg2   - K (key) in little endian int array
2246   //
2247   address generate_aescrypt_decryptBlock() {
2248     assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
2249     __ align(CodeEntryAlignment);
2250     StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
2251     Label L_doLast;
2252     address start = __ pc();
2253 
2254     const Register from        = rsi;      // source array address
2255     const Register to          = rdx;      // destination array address
2256     const Register key         = rcx;      // key array address
2257     const Register keylen      = rax;
2258     const Address  from_param(rbp, 8+0);
2259     const Address  to_param  (rbp, 8+4);
2260     const Address  key_param (rbp, 8+8);
2261 
2262     const XMMRegister xmm_result = xmm0;
2263     const XMMRegister xmm_temp   = xmm1;
2264     const XMMRegister xmm_key_shuf_mask = xmm2;



2265 
2266     __ enter(); // required for proper stackwalking of RuntimeStub frame
2267     __ push(rsi);
2268     __ movptr(from , from_param);
2269     __ movptr(to   , to_param);
2270     __ movptr(key  , key_param);
2271 

2272     __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2273     // keylen = # of 32-bit words, convert to 128-bit words
2274     __ shrl(keylen, 2);
2275     __ subl(keylen, 11);   // every key has at least 11 128-bit words, some have more
2276 
2277     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2278     __ movdqu(xmm_result, Address(from, 0));

2279 
2280     // for decryption java expanded key ordering is rotated one position from what we want
2281     // so we start from 0x10 here and hit 0x00 last
2282     // we don't know if the key is aligned, hence not using load-execute form
2283     load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
2284     __ pxor  (xmm_result, xmm_temp);
2285     for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
2286       aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
2287     }
2288     __ cmpl(keylen, 0);
2289     __ jcc(Assembler::equal, L_doLast);
2290     // only in 192 and 256 bit keys
2291     aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
2292     aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
2293     __ subl(keylen, 2);
2294     __ jcc(Assembler::equal, L_doLast);
2295     // only in 256 bit keys
2296     aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
2297     aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);


























2298 
2299     __ BIND(L_doLast);
2300     // for decryption the aesdeclast operation is always on key+0x00
2301     load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
2302     __ aesdeclast(xmm_result, xmm_temp);
2303 


2304     __ movdqu(Address(to, 0), xmm_result);  // store the result
2305 
2306     __ xorptr(rax, rax); // return 0
2307     __ pop(rsi);
2308     __ leave(); // required for proper stackwalking of RuntimeStub frame
2309     __ ret(0);
2310 
2311     return start;
2312   }
2313 
2314   void handleSOERegisters(bool saving) {
2315     const int saveFrameSizeInBytes = 4 * wordSize;
2316     const Address saved_rbx     (rbp, -3 * wordSize);
2317     const Address saved_rsi     (rbp, -2 * wordSize);
2318     const Address saved_rdi     (rbp, -1 * wordSize);
2319 
2320     if (saving) {
2321       __ subptr(rsp, saveFrameSizeInBytes);
2322       __ movptr(saved_rsi, rsi);
2323       __ movptr(saved_rdi, rdi);
2324       __ movptr(saved_rbx, rbx);
2325     } else {
2326       // restoring
2327       __ movptr(rsi, saved_rsi);
2328       __ movptr(rdi, saved_rdi);
2329       __ movptr(rbx, saved_rbx);
2330     }
2331   }
2332 
2333   // Arguments:
2334   //
2335   // Inputs:
2336   //   c_rarg0   - source byte array address
2337   //   c_rarg1   - destination byte array address
2338   //   c_rarg2   - K (key) in little endian int array
2339   //   c_rarg3   - r vector byte array address
2340   //   c_rarg4   - input length
2341   //
2342   address generate_cipherBlockChaining_encryptAESCrypt() {
2343     assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
2344     __ align(CodeEntryAlignment);
2345     StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
2346     address start = __ pc();
2347 
2348     Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
2349     const Register from        = rsi;      // source array address
2350     const Register to          = rdx;      // destination array address
2351     const Register key         = rcx;      // key array address
2352     const Register rvec        = rdi;      // r byte array initialized from initvector array address
2353                                            // and left with the results of the last encryption block
2354     const Register len_reg     = rbx;      // src len (must be multiple of blocksize 16)
2355     const Register pos         = rax;
2356 
2357     // xmm register assignments for the loops below
2358     const XMMRegister xmm_result = xmm0;
2359     const XMMRegister xmm_temp   = xmm1;
2360     // first 6 keys preloaded into xmm2-xmm7
2361     const int XMM_REG_NUM_KEY_FIRST = 2;
2362     const int XMM_REG_NUM_KEY_LAST  = 7;
2363     const XMMRegister xmm_key0   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);


2376     __ movptr(key  , key_param);
2377     __ movptr(rvec , rvec_param);
2378     __ movptr(len_reg , len_param);
2379 
2380     const XMMRegister xmm_key_shuf_mask = xmm_temp;  // used temporarily to swap key bytes up front
2381     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2382     // load up xmm regs 2 thru 7 with keys 0-5
2383     for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
2384       load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
2385       offset += 0x10;
2386     }
2387 
2388     __ movdqu(xmm_result, Address(rvec, 0x00));   // initialize xmm_result with r vec
2389 
2390     // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
2391     __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2392     __ cmpl(rax, 44);
2393     __ jcc(Assembler::notEqual, L_key_192_256);
2394 
2395     // 128 bit code follows here
2396     __ movptr(pos, 0);
2397     __ align(OptoLoopAlignment);
2398     __ BIND(L_loopTop_128);
2399     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
2400     __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
2401 
2402     __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
2403     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
2404       __ aesenc(xmm_result, as_XMMRegister(rnum));
2405     }
2406     for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) {
2407       aes_enc_key(xmm_result, xmm_temp, key, key_offset);
2408     }
2409     load_key(xmm_temp, key, 0xa0);
2410     __ aesenclast(xmm_result, xmm_temp);
2411 
2412     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
2413     // no need to store r to memory until we exit
2414     __ addptr(pos, AESBlockSize);
2415     __ subptr(len_reg, AESBlockSize);
2416     __ jcc(Assembler::notEqual, L_loopTop_128);
2417 
2418     __ BIND(L_exit);
2419     __ movdqu(Address(rvec, 0), xmm_result);     // final value of r stored in rvec of CipherBlockChaining object
2420 
2421     handleSOERegisters(false /*restoring*/);
2422     __ movl(rax, 0);                             // return 0 (why?)
2423     __ leave();                                  // required for proper stackwalking of RuntimeStub frame
2424     __ ret(0);
2425 
2426   __ BIND(L_key_192_256);
2427   // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
2428     __ cmpl(rax, 52);
2429     __ jcc(Assembler::notEqual, L_key_256);
2430 
2431     // 192-bit code follows here (could be changed to use more xmm registers)
2432     __ movptr(pos, 0);
2433   __ align(OptoLoopAlignment);
2434   __ BIND(L_loopTop_192);
2435     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
2436     __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
2437 
2438     __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
2439     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
2440       __ aesenc(xmm_result, as_XMMRegister(rnum));
2441     }
2442     for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) {
2443       aes_enc_key(xmm_result, xmm_temp, key, key_offset);
2444     }
2445     load_key(xmm_temp, key, 0xc0);
2446     __ aesenclast(xmm_result, xmm_temp);
2447 
2448     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);   // store into the next 16 bytes of output
2449     // no need to store r to memory until we exit
2450     __ addptr(pos, AESBlockSize);
2451     __ subptr(len_reg, AESBlockSize);
2452     __ jcc(Assembler::notEqual, L_loopTop_192);
2453     __ jmp(L_exit);
2454 
2455   __ BIND(L_key_256);
2456     // 256-bit code follows here (could be changed to use more xmm registers)
2457     __ movptr(pos, 0);
2458   __ align(OptoLoopAlignment);
2459   __ BIND(L_loopTop_256);
2460     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
2461     __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
2462 
2463     __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
2464     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
2465       __ aesenc(xmm_result, as_XMMRegister(rnum));
2466     }
2467     for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) {
2468       aes_enc_key(xmm_result, xmm_temp, key, key_offset);
2469     }
2470     load_key(xmm_temp, key, 0xe0);
2471     __ aesenclast(xmm_result, xmm_temp);
2472 
2473     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);   // store into the next 16 bytes of output
2474     // no need to store r to memory until we exit
2475     __ addptr(pos, AESBlockSize);
2476     __ subptr(len_reg, AESBlockSize);
2477     __ jcc(Assembler::notEqual, L_loopTop_256);
2478     __ jmp(L_exit);
2479 
2480     return start;
2481   }
2482 
2483 
2484   // CBC AES Decryption.
2485   // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time.
2486   //
2487   // Arguments:
2488   //
2489   // Inputs:
2490   //   c_rarg0   - source byte array address
2491   //   c_rarg1   - destination byte array address
2492   //   c_rarg2   - K (key) in little endian int array
2493   //   c_rarg3   - r vector byte array address
2494   //   c_rarg4   - input length
2495   //
2496 
2497   address generate_cipherBlockChaining_decryptAESCrypt() {
2498     assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
2499     __ align(CodeEntryAlignment);
2500     StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
2501     address start = __ pc();
2502 
2503     Label L_exit, L_key_192_256, L_key_256;
2504     Label L_singleBlock_loopTop_128;
2505     Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256;
2506     const Register from        = rsi;      // source array address
2507     const Register to          = rdx;      // destination array address
2508     const Register key         = rcx;      // key array address
2509     const Register rvec        = rdi;      // r byte array initialized from initvector array address
2510                                            // and left with the results of the last encryption block
2511     const Register len_reg     = rbx;      // src len (must be multiple of blocksize 16)
2512     const Register pos         = rax;
2513 
2514     // xmm register assignments for the loops below
2515     const XMMRegister xmm_result = xmm0;
2516     const XMMRegister xmm_temp   = xmm1;
2517     // first 6 keys preloaded into xmm2-xmm7
2518     const int XMM_REG_NUM_KEY_FIRST = 2;


2539     // so we start from 0x10 here and hit 0x00 last
2540     const XMMRegister xmm_key_shuf_mask = xmm1;  // used temporarily to swap key bytes up front
2541     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2542     // load up xmm regs 2 thru 6 with first 5 keys
2543     for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
2544       load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
2545       offset += 0x10;
2546     }
2547 
2548     // inside here, use the rvec register to point to previous block cipher
2549     // with which we xor at the end of each newly decrypted block
2550     const Register  prev_block_cipher_ptr = rvec;
2551 
2552     // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
2553     __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2554     __ cmpl(rax, 44);
2555     __ jcc(Assembler::notEqual, L_key_192_256);
2556 
2557 
2558     // 128-bit code follows here, parallelized
2559     __ movptr(pos, 0);
2560   __ align(OptoLoopAlignment);
2561   __ BIND(L_singleBlock_loopTop_128);
2562     __ cmpptr(len_reg, 0);           // any blocks left??
2563     __ jcc(Assembler::equal, L_exit);
2564     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
2565     __ pxor  (xmm_result, xmm_key_first);                             // do the aes dec rounds
2566     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
2567       __ aesdec(xmm_result, as_XMMRegister(rnum));
2568     }
2569     for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) {   // 128-bit runs up to key offset a0
2570       aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2571     }
2572     load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
2573     __ aesdeclast(xmm_result, xmm_temp);
2574     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2575     __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
2576     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
2577     // no need to store r to memory until we exit
2578     __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
2579     __ addptr(pos, AESBlockSize);
2580     __ subptr(len_reg, AESBlockSize);
2581     __ jmp(L_singleBlock_loopTop_128);
2582 
2583 
2584     __ BIND(L_exit);
2585     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2586     __ movptr(rvec , rvec_param);                                     // restore this since used in loop
2587     __ movdqu(Address(rvec, 0), xmm_temp);                            // final value of r stored in rvec of CipherBlockChaining object
2588     handleSOERegisters(false /*restoring*/);
2589     __ movl(rax, 0);                                                  // return 0 (why?)
2590     __ leave();                                                       // required for proper stackwalking of RuntimeStub frame
2591     __ ret(0);
2592 
2593 
2594     __ BIND(L_key_192_256);
2595     // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
2596     __ cmpl(rax, 52);
2597     __ jcc(Assembler::notEqual, L_key_256);
2598 
2599     // 192-bit code follows here (could be optimized to use parallelism)
2600     __ movptr(pos, 0);
2601     __ align(OptoLoopAlignment);
2602     __ BIND(L_singleBlock_loopTop_192);
2603     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
2604     __ pxor  (xmm_result, xmm_key_first);                             // do the aes dec rounds
2605     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
2606       __ aesdec(xmm_result, as_XMMRegister(rnum));
2607     }
2608     for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) {   // 192-bit runs up to key offset c0
2609       aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2610     }
2611     load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
2612     __ aesdeclast(xmm_result, xmm_temp);
2613     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2614     __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
2615     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
2616     // no need to store r to memory until we exit
2617     __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
2618     __ addptr(pos, AESBlockSize);
2619     __ subptr(len_reg, AESBlockSize);
2620     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
2621     __ jmp(L_exit);
2622 
2623     __ BIND(L_key_256);
2624     // 256-bit code follows here (could be optimized to use parallelism)
2625     __ movptr(pos, 0);
2626     __ align(OptoLoopAlignment);
2627     __ BIND(L_singleBlock_loopTop_256);
2628     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
2629     __ pxor  (xmm_result, xmm_key_first);                             // do the aes dec rounds
2630     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
2631       __ aesdec(xmm_result, as_XMMRegister(rnum));
2632     }
2633     for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) {   // 256-bit runs up to key offset e0
2634       aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2635     }
2636     load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
2637     __ aesdeclast(xmm_result, xmm_temp);
2638     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2639     __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
2640     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
2641     // no need to store r to memory until we exit
2642     __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
2643     __ addptr(pos, AESBlockSize);
2644     __ subptr(len_reg, AESBlockSize);
2645     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);




2157     load_key(xmmtmp, key, offset, xmm_shuf_mask);
2158     __ aesenc(xmmdst, xmmtmp);
2159   }
2160 
2161   // aesdec using specified key+offset
2162   // can optionally specify that the shuffle mask is already in an xmmregister
2163   void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
2164     load_key(xmmtmp, key, offset, xmm_shuf_mask);
2165     __ aesdec(xmmdst, xmmtmp);
2166   }
2167 
2168 
2169   // Arguments:
2170   //
2171   // Inputs:
2172   //   c_rarg0   - source byte array address
2173   //   c_rarg1   - destination byte array address
2174   //   c_rarg2   - K (key) in little endian int array
2175   //
2176   address generate_aescrypt_encryptBlock() {
2177     assert(UseAES, "need AES instructions and misaligned SSE support");
2178     __ align(CodeEntryAlignment);
2179     StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
2180     Label L_doLast;
2181     address start = __ pc();
2182 
2183     const Register from        = rdx;      // source array address
2184     const Register to          = rdx;      // destination array address
2185     const Register key         = rcx;      // key array address
2186     const Register keylen      = rax;
2187     const Address  from_param(rbp, 8+0);
2188     const Address  to_param  (rbp, 8+4);
2189     const Address  key_param (rbp, 8+8);
2190 
2191     const XMMRegister xmm_result = xmm0;
2192     const XMMRegister xmm_key_shuf_mask = xmm1;
2193     const XMMRegister xmm_temp1  = xmm2;
2194     const XMMRegister xmm_temp2  = xmm3;
2195     const XMMRegister xmm_temp3  = xmm4;
2196     const XMMRegister xmm_temp4  = xmm5;
2197 
2198     __ enter();   // required for proper stackwalking of RuntimeStub frame
2199     __ movptr(from, from_param);
2200     __ movptr(key, key_param);


2201 
2202     // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
2203     __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));



2204 
2205     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2206     __ movdqu(xmm_result, Address(from, 0));  // get 16 bytes of input
2207     __ movptr(to, to_param);
2208 
2209     // For encryption, the java expanded key ordering is just what we need
2210 
2211     load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
2212     __ pxor(xmm_result, xmm_temp1);
2213     
2214     load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
2215     load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
2216     load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
2217     load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
2218 
2219     __ aesenc(xmm_result, xmm_temp1);
2220     __ aesenc(xmm_result, xmm_temp2);
2221     __ aesenc(xmm_result, xmm_temp3);
2222     __ aesenc(xmm_result, xmm_temp4);
2223 
2224     load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
2225     load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
2226     load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
2227     load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
2228 
2229     __ aesenc(xmm_result, xmm_temp1);
2230     __ aesenc(xmm_result, xmm_temp2);
2231     __ aesenc(xmm_result, xmm_temp3);
2232     __ aesenc(xmm_result, xmm_temp4);
2233 
2234     load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
2235     load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
2236     
2237     __ cmpl(keylen, 44);
2238     __ jccb(Assembler::equal, L_doLast);
2239 
2240     __ aesenc(xmm_result, xmm_temp1);
2241     __ aesenc(xmm_result, xmm_temp2);
2242 
2243     load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
2244     load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
2245 
2246     __ cmpl(keylen, 52);
2247     __ jccb(Assembler::equal, L_doLast);
2248 
2249     __ aesenc(xmm_result, xmm_temp1);
2250     __ aesenc(xmm_result, xmm_temp2);
2251     
2252     load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
2253     load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
2254 
2255     __ BIND(L_doLast);
2256     __ aesenc(xmm_result, xmm_temp1);
2257     __ aesenclast(xmm_result, xmm_temp2);
2258     __ movdqu(Address(to, 0), xmm_result);        // store the result
2259     __ xorptr(rax, rax); // return 0

2260     __ leave(); // required for proper stackwalking of RuntimeStub frame
2261     __ ret(0);
2262 
2263     return start;
2264   }
2265 
2266 
2267   // Arguments:
2268   //
2269   // Inputs:
2270   //   c_rarg0   - source byte array address
2271   //   c_rarg1   - destination byte array address
2272   //   c_rarg2   - K (key) in little endian int array
2273   //
2274   address generate_aescrypt_decryptBlock() {
2275     assert(UseAES, "need AES instructions and misaligned SSE support");
2276     __ align(CodeEntryAlignment);
2277     StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
2278     Label L_doLast;
2279     address start = __ pc();
2280 
2281     const Register from        = rdx;      // source array address
2282     const Register to          = rdx;      // destination array address
2283     const Register key         = rcx;      // key array address
2284     const Register keylen      = rax;
2285     const Address  from_param(rbp, 8+0);
2286     const Address  to_param  (rbp, 8+4);
2287     const Address  key_param (rbp, 8+8);
2288 
2289     const XMMRegister xmm_result = xmm0;
2290     const XMMRegister xmm_key_shuf_mask = xmm1;
2291     const XMMRegister xmm_temp1  = xmm2;
2292     const XMMRegister xmm_temp2  = xmm3;
2293     const XMMRegister xmm_temp3  = xmm4;
2294     const XMMRegister xmm_temp4  = xmm5;
2295 
2296     __ enter(); // required for proper stackwalking of RuntimeStub frame
2297     __ movptr(from, from_param);
2298     __ movptr(key, key_param);


2299 
2300     // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
2301     __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));



2302 
2303     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2304     __ movdqu(xmm_result, Address(from, 0));
2305     __ movptr(to, to_param);
2306 
2307     // for decryption java expanded key ordering is rotated one position from what we want
2308     // so we start from 0x10 here and hit 0x00 last
2309     // we don't know if the key is aligned, hence not using load-execute form
2310     load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
2311     load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
2312     load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
2313     load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
2314 
2315     __ pxor  (xmm_result, xmm_temp1);
2316     __ aesdec(xmm_result, xmm_temp2);
2317     __ aesdec(xmm_result, xmm_temp3);
2318     __ aesdec(xmm_result, xmm_temp4);
2319 
2320     load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
2321     load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
2322     load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
2323     load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
2324 
2325     __ aesdec(xmm_result, xmm_temp1);
2326     __ aesdec(xmm_result, xmm_temp2);
2327     __ aesdec(xmm_result, xmm_temp3);
2328     __ aesdec(xmm_result, xmm_temp4);
2329 
2330     load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
2331     load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
2332     load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
2333     
2334     __ cmpl(keylen, 44);
2335     __ jccb(Assembler::equal, L_doLast);
2336 
2337     __ aesdec(xmm_result, xmm_temp1);
2338     __ aesdec(xmm_result, xmm_temp2);
2339 
2340     load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
2341     load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
2342 
2343     __ cmpl(keylen, 52);
2344     __ jccb(Assembler::equal, L_doLast);
2345 
2346     __ aesdec(xmm_result, xmm_temp1);
2347     __ aesdec(xmm_result, xmm_temp2);
2348     
2349     load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
2350     load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
2351 
2352     __ BIND(L_doLast);
2353     __ aesdec(xmm_result, xmm_temp1);
2354     __ aesdec(xmm_result, xmm_temp2);

2355 
2356     // for decryption the aesdeclast operation is always on key+0x00
2357     __ aesdeclast(xmm_result, xmm_temp3);
2358     __ movdqu(Address(to, 0), xmm_result);  // store the result

2359     __ xorptr(rax, rax); // return 0

2360     __ leave(); // required for proper stackwalking of RuntimeStub frame
2361     __ ret(0);
2362 
2363     return start;
2364   }
2365 
2366   void handleSOERegisters(bool saving) {
2367     const int saveFrameSizeInBytes = 4 * wordSize;
2368     const Address saved_rbx     (rbp, -3 * wordSize);
2369     const Address saved_rsi     (rbp, -2 * wordSize);
2370     const Address saved_rdi     (rbp, -1 * wordSize);
2371 
2372     if (saving) {
2373       __ subptr(rsp, saveFrameSizeInBytes);
2374       __ movptr(saved_rsi, rsi);
2375       __ movptr(saved_rdi, rdi);
2376       __ movptr(saved_rbx, rbx);
2377     } else {
2378       // restoring
2379       __ movptr(rsi, saved_rsi);
2380       __ movptr(rdi, saved_rdi);
2381       __ movptr(rbx, saved_rbx);
2382     }
2383   }
2384 
2385   // Arguments:
2386   //
2387   // Inputs:
2388   //   c_rarg0   - source byte array address
2389   //   c_rarg1   - destination byte array address
2390   //   c_rarg2   - K (key) in little endian int array
2391   //   c_rarg3   - r vector byte array address
2392   //   c_rarg4   - input length
2393   //
2394   address generate_cipherBlockChaining_encryptAESCrypt() {
2395     assert(UseAES, "need AES instructions and misaligned SSE support");
2396     __ align(CodeEntryAlignment);
2397     StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
2398     address start = __ pc();
2399 
2400     Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
2401     const Register from        = rsi;      // source array address
2402     const Register to          = rdx;      // destination array address
2403     const Register key         = rcx;      // key array address
2404     const Register rvec        = rdi;      // r byte array initialized from initvector array address
2405                                            // and left with the results of the last encryption block
2406     const Register len_reg     = rbx;      // src len (must be multiple of blocksize 16)
2407     const Register pos         = rax;
2408 
2409     // xmm register assignments for the loops below
2410     const XMMRegister xmm_result = xmm0;
2411     const XMMRegister xmm_temp   = xmm1;
2412     // first 6 keys preloaded into xmm2-xmm7
2413     const int XMM_REG_NUM_KEY_FIRST = 2;
2414     const int XMM_REG_NUM_KEY_LAST  = 7;
2415     const XMMRegister xmm_key0   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);


2428     __ movptr(key  , key_param);
2429     __ movptr(rvec , rvec_param);
2430     __ movptr(len_reg , len_param);
2431 
2432     const XMMRegister xmm_key_shuf_mask = xmm_temp;  // used temporarily to swap key bytes up front
2433     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2434     // load up xmm regs 2 thru 7 with keys 0-5
2435     for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
2436       load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
2437       offset += 0x10;
2438     }
2439 
2440     __ movdqu(xmm_result, Address(rvec, 0x00));   // initialize xmm_result with r vec
2441 
2442     // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
2443     __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2444     __ cmpl(rax, 44);
2445     __ jcc(Assembler::notEqual, L_key_192_256);
2446 
2447     // 128 bit code follows here
2448     __ movl(pos, 0);
2449     __ align(OptoLoopAlignment);
2450     __ BIND(L_loopTop_128);
2451     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
2452     __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
2453 
2454     __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
2455     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
2456       __ aesenc(xmm_result, as_XMMRegister(rnum));
2457     }
2458     for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) {
2459       aes_enc_key(xmm_result, xmm_temp, key, key_offset);
2460     }
2461     load_key(xmm_temp, key, 0xa0);
2462     __ aesenclast(xmm_result, xmm_temp);
2463 
2464     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
2465     // no need to store r to memory until we exit
2466     __ addptr(pos, AESBlockSize);
2467     __ subptr(len_reg, AESBlockSize);
2468     __ jcc(Assembler::notEqual, L_loopTop_128);
2469 
2470     __ BIND(L_exit);
2471     __ movdqu(Address(rvec, 0), xmm_result);     // final value of r stored in rvec of CipherBlockChaining object
2472 
2473     handleSOERegisters(false /*restoring*/);
2474     __ movl(rax, 0);                             // return 0 (why?)
2475     __ leave();                                  // required for proper stackwalking of RuntimeStub frame
2476     __ ret(0);
2477 
2478     __ BIND(L_key_192_256);
2479     // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
2480     __ cmpl(rax, 52);
2481     __ jcc(Assembler::notEqual, L_key_256);
2482 
2483     // 192-bit code follows here (could be changed to use more xmm registers)
2484     __ movl(pos, 0);
2485     __ align(OptoLoopAlignment);
2486     __ BIND(L_loopTop_192);
2487     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
2488     __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
2489 
2490     __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
2491     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
2492       __ aesenc(xmm_result, as_XMMRegister(rnum));
2493     }
2494     for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) {
2495       aes_enc_key(xmm_result, xmm_temp, key, key_offset);
2496     }
2497     load_key(xmm_temp, key, 0xc0);
2498     __ aesenclast(xmm_result, xmm_temp);
2499 
2500     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);   // store into the next 16 bytes of output
2501     // no need to store r to memory until we exit
2502     __ addptr(pos, AESBlockSize);
2503     __ subptr(len_reg, AESBlockSize);
2504     __ jcc(Assembler::notEqual, L_loopTop_192);
2505     __ jmp(L_exit);
2506 
2507     __ BIND(L_key_256);
2508     // 256-bit code follows here (could be changed to use more xmm registers)
2509     __ movl(pos, 0);
2510     __ align(OptoLoopAlignment);
2511     __ BIND(L_loopTop_256);
2512     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
2513     __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
2514 
2515     __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
2516     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
2517       __ aesenc(xmm_result, as_XMMRegister(rnum));
2518     }
2519     for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) {
2520       aes_enc_key(xmm_result, xmm_temp, key, key_offset);
2521     }
2522     load_key(xmm_temp, key, 0xe0);
2523     __ aesenclast(xmm_result, xmm_temp);
2524 
2525     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);   // store into the next 16 bytes of output
2526     // no need to store r to memory until we exit
2527     __ addptr(pos, AESBlockSize);
2528     __ subptr(len_reg, AESBlockSize);
2529     __ jcc(Assembler::notEqual, L_loopTop_256);
2530     __ jmp(L_exit);
2531 
2532     return start;
2533   }
2534 
2535 
2536   // CBC AES Decryption.
2537   // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time.
2538   //
2539   // Arguments:
2540   //
2541   // Inputs:
2542   //   c_rarg0   - source byte array address
2543   //   c_rarg1   - destination byte array address
2544   //   c_rarg2   - K (key) in little endian int array
2545   //   c_rarg3   - r vector byte array address
2546   //   c_rarg4   - input length
2547   //
2548 
2549   address generate_cipherBlockChaining_decryptAESCrypt() {
2550     assert(UseAES, "need AES instructions and misaligned SSE support");
2551     __ align(CodeEntryAlignment);
2552     StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
2553     address start = __ pc();
2554 
2555     Label L_exit, L_key_192_256, L_key_256;
2556     Label L_singleBlock_loopTop_128;
2557     Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256;
2558     const Register from        = rsi;      // source array address
2559     const Register to          = rdx;      // destination array address
2560     const Register key         = rcx;      // key array address
2561     const Register rvec        = rdi;      // r byte array initialized from initvector array address
2562                                            // and left with the results of the last encryption block
2563     const Register len_reg     = rbx;      // src len (must be multiple of blocksize 16)
2564     const Register pos         = rax;
2565 
2566     // xmm register assignments for the loops below
2567     const XMMRegister xmm_result = xmm0;
2568     const XMMRegister xmm_temp   = xmm1;
2569     // first 6 keys preloaded into xmm2-xmm7
2570     const int XMM_REG_NUM_KEY_FIRST = 2;


2591     // so we start from 0x10 here and hit 0x00 last
2592     const XMMRegister xmm_key_shuf_mask = xmm1;  // used temporarily to swap key bytes up front
2593     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
2594     // load up xmm regs 2 thru 6 with first 5 keys
2595     for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
2596       load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
2597       offset += 0x10;
2598     }
2599 
2600     // inside here, use the rvec register to point to previous block cipher
2601     // with which we xor at the end of each newly decrypted block
2602     const Register  prev_block_cipher_ptr = rvec;
2603 
2604     // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
2605     __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2606     __ cmpl(rax, 44);
2607     __ jcc(Assembler::notEqual, L_key_192_256);
2608 
2609 
2610     // 128-bit code follows here, parallelized
2611     __ movl(pos, 0);
2612     __ align(OptoLoopAlignment);
2613     __ BIND(L_singleBlock_loopTop_128);
2614     __ cmpptr(len_reg, 0);           // any blocks left??
2615     __ jcc(Assembler::equal, L_exit);
2616     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
2617     __ pxor  (xmm_result, xmm_key_first);                             // do the aes dec rounds
2618     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
2619       __ aesdec(xmm_result, as_XMMRegister(rnum));
2620     }
2621     for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) {   // 128-bit runs up to key offset a0
2622       aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2623     }
2624     load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
2625     __ aesdeclast(xmm_result, xmm_temp);
2626     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2627     __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
2628     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
2629     // no need to store r to memory until we exit
2630     __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
2631     __ addptr(pos, AESBlockSize);
2632     __ subptr(len_reg, AESBlockSize);
2633     __ jmp(L_singleBlock_loopTop_128);
2634 
2635 
2636     __ BIND(L_exit);
2637     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2638     __ movptr(rvec , rvec_param);                                     // restore this since used in loop
2639     __ movdqu(Address(rvec, 0), xmm_temp);                            // final value of r stored in rvec of CipherBlockChaining object
2640     handleSOERegisters(false /*restoring*/);
2641     __ movl(rax, 0);                                                  // return 0 (why?)
2642     __ leave();                                                       // required for proper stackwalking of RuntimeStub frame
2643     __ ret(0);
2644 
2645 
2646     __ BIND(L_key_192_256);
2647     // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
2648     __ cmpl(rax, 52);
2649     __ jcc(Assembler::notEqual, L_key_256);
2650 
2651     // 192-bit code follows here (could be optimized to use parallelism)
2652     __ movl(pos, 0);
2653     __ align(OptoLoopAlignment);
2654     __ BIND(L_singleBlock_loopTop_192);
2655     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
2656     __ pxor  (xmm_result, xmm_key_first);                             // do the aes dec rounds
2657     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
2658       __ aesdec(xmm_result, as_XMMRegister(rnum));
2659     }
2660     for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) {   // 192-bit runs up to key offset c0
2661       aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2662     }
2663     load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
2664     __ aesdeclast(xmm_result, xmm_temp);
2665     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2666     __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
2667     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
2668     // no need to store r to memory until we exit
2669     __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
2670     __ addptr(pos, AESBlockSize);
2671     __ subptr(len_reg, AESBlockSize);
2672     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
2673     __ jmp(L_exit);
2674 
2675     __ BIND(L_key_256);
2676     // 256-bit code follows here (could be optimized to use parallelism)
2677     __ movl(pos, 0);
2678     __ align(OptoLoopAlignment);
2679     __ BIND(L_singleBlock_loopTop_256);
2680     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
2681     __ pxor  (xmm_result, xmm_key_first);                             // do the aes dec rounds
2682     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
2683       __ aesdec(xmm_result, as_XMMRegister(rnum));
2684     }
2685     for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) {   // 256-bit runs up to key offset e0
2686       aes_dec_key(xmm_result, xmm_temp, key, key_offset);
2687     }
2688     load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
2689     __ aesdeclast(xmm_result, xmm_temp);
2690     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
2691     __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
2692     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
2693     // no need to store r to memory until we exit
2694     __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
2695     __ addptr(pos, AESBlockSize);
2696     __ subptr(len_reg, AESBlockSize);
2697     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);


src/cpu/x86/vm/stubGenerator_x86_32.cpp
Index Unified diffs Context diffs Sdiffs Wdiffs Patch New Old Previous File Next File