< prev index next >

src/cpu/ppc/vm/stubGenerator_ppc.cpp

Print this page




2207     address start = __ function_entry();
2208 
2209     gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7);
2210 
2211     // save some arguments, disjoint_long_copy_core destroys them.
2212     // needed for post barrier
2213     __ mr(R9_ARG7, R4_ARG2);
2214     __ mr(R10_ARG8, R5_ARG3);
2215 
2216     if (UseCompressedOops) {
2217       generate_disjoint_int_copy_core(aligned);
2218     } else {
2219       generate_disjoint_long_copy_core(aligned);
2220     }
2221 
2222     gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
2223 
2224     return start;
2225   }
2226 
2227   // Arguments for generated stub (little endian only):
2228   //   R3_ARG1   - source byte array address
2229   //   R4_ARG2   - destination byte array address
2230   //   R5_ARG3   - round key array
2231   address generate_aescrypt_encryptBlock() {
2232     assert(UseAES, "need AES instructions and misaligned SSE support");
2233     StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
2234 
2235     address start = __ function_entry();
2236 
2237     Label L_doLast;
2238 
2239     Register from           = R3_ARG1;  // source array address
2240     Register to             = R4_ARG2;  // destination array address
2241     Register key            = R5_ARG3;  // round key array
2242 
2243     Register keylen         = R8;
2244     Register temp           = R9;
2245     Register keypos         = R10;
2246     Register hex            = R11;
2247     Register fifteen        = R12;
2248 
2249     VectorRegister vRet     = VR0;
2250 
2251     VectorRegister vKey1    = VR1;
2252     VectorRegister vKey2    = VR2;
2253     VectorRegister vKey3    = VR3;
2254     VectorRegister vKey4    = VR4;
2255 
2256     VectorRegister fromPerm = VR5;
2257     VectorRegister keyPerm  = VR6;
2258     VectorRegister toPerm   = VR7;
2259     VectorRegister fSplt    = VR8;
2260 
2261     VectorRegister vTmp1    = VR9;
2262     VectorRegister vTmp2    = VR10;
2263     VectorRegister vTmp3    = VR11;
2264     VectorRegister vTmp4    = VR12;
2265 
2266     VectorRegister vLow     = VR13;
2267     VectorRegister vHigh    = VR14;
2268 
2269     __ li              (hex, 16);
2270     __ li              (fifteen, 15);
2271     __ vspltisb        (fSplt, 0x0f);
2272 
2273     // load unaligned from[0-15] to vsRet
2274     __ lvx             (vRet, from);
2275     __ lvx             (vTmp1, fifteen, from);
2276     __ lvsl            (fromPerm, from);


2277     __ vxor            (fromPerm, fromPerm, fSplt);

2278     __ vperm           (vRet, vRet, vTmp1, fromPerm);
2279 
2280     // load keylen (44 or 52 or 60)
2281     __ lwz             (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
2282 
2283     // to load keys
2284     __ lvsr            (keyPerm, key);
2285     __ vxor            (vTmp2, vTmp2, vTmp2);
2286     __ vspltisb        (vTmp2, -16);
2287     __ vrld            (keyPerm, keyPerm, vTmp2);
2288     __ vrld            (keyPerm, keyPerm, vTmp2);
2289     __ vsldoi          (keyPerm, keyPerm, keyPerm, 8);

2290 
2291     // load the 1st round key to vKey1
2292     __ li              (keypos, 0);

2293     __ lvx             (vKey1, keypos, key);
2294     __ addi            (keypos, keypos, 16);
2295     __ lvx             (vTmp1, keypos, key);
2296     __ vperm           (vKey1, vTmp1, vKey1, keyPerm);
2297 
2298     // 1st round
2299     __ vxor (vRet, vRet, vKey1);
2300 
2301     // load the 2nd round key to vKey1
2302     __ addi            (keypos, keypos, 16);
2303     __ lvx             (vTmp2, keypos, key);
2304     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2305 
2306     // load the 3rd round key to vKey2
2307     __ addi            (keypos, keypos, 16);
2308     __ lvx             (vTmp1, keypos, key);
2309     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2310 
2311     // load the 4th round key to vKey3
2312     __ addi            (keypos, keypos, 16);
2313     __ lvx             (vTmp2, keypos, key);
2314     __ vperm           (vKey3, vTmp2, vTmp1, keyPerm);
2315 
2316     // load the 5th round key to vKey4
2317     __ addi            (keypos, keypos, 16);
2318     __ lvx             (vTmp1, keypos, key);
2319     __ vperm           (vKey4, vTmp1, vTmp2, keyPerm);
2320 
2321     // 2nd - 5th rounds
2322     __ vcipher (vRet, vRet, vKey1);
2323     __ vcipher (vRet, vRet, vKey2);
2324     __ vcipher (vRet, vRet, vKey3);
2325     __ vcipher (vRet, vRet, vKey4);
2326 
2327     // load the 6th round key to vKey1
2328     __ addi            (keypos, keypos, 16);
2329     __ lvx             (vTmp2, keypos, key);
2330     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2331 
2332     // load the 7th round key to vKey2
2333     __ addi            (keypos, keypos, 16);
2334     __ lvx             (vTmp1, keypos, key);
2335     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2336 
2337     // load the 8th round key to vKey3
2338     __ addi            (keypos, keypos, 16);
2339     __ lvx             (vTmp2, keypos, key);
2340     __ vperm           (vKey3, vTmp2, vTmp1, keyPerm);
2341 
2342     // load the 9th round key to vKey4
2343     __ addi            (keypos, keypos, 16);
2344     __ lvx             (vTmp1, keypos, key);
2345     __ vperm           (vKey4, vTmp1, vTmp2, keyPerm);
2346 
2347     // 6th - 9th rounds
2348     __ vcipher (vRet, vRet, vKey1);
2349     __ vcipher (vRet, vRet, vKey2);
2350     __ vcipher (vRet, vRet, vKey3);
2351     __ vcipher (vRet, vRet, vKey4);
2352 
2353     // load the 10th round key to vKey1
2354     __ addi            (keypos, keypos, 16);
2355     __ lvx             (vTmp2, keypos, key);
2356     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2357 
2358     // load the 11th round key to vKey2
2359     __ addi            (keypos, keypos, 16);
2360     __ lvx             (vTmp1, keypos, key);
2361     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2362 
2363     // if all round keys are loaded, skip next 4 rounds
2364     __ cmpwi           (CCR0, keylen, 44);
2365     __ beq             (CCR0, L_doLast);
2366 
2367     // 10th - 11th rounds
2368     __ vcipher (vRet, vRet, vKey1);
2369     __ vcipher (vRet, vRet, vKey2);
2370 
2371     // load the 12th round key to vKey1
2372     __ addi            (keypos, keypos, 16);
2373     __ lvx             (vTmp2, keypos, key);
2374     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2375 
2376     // load the 13th round key to vKey2
2377     __ addi            (keypos, keypos, 16);
2378     __ lvx             (vTmp1, keypos, key);
2379     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2380 
2381     // if all round keys are loaded, skip next 2 rounds
2382     __ cmpwi           (CCR0, keylen, 52);
2383     __ beq             (CCR0, L_doLast);
2384 
2385     // 12th - 13th rounds
2386     __ vcipher (vRet, vRet, vKey1);
2387     __ vcipher (vRet, vRet, vKey2);
2388 
2389     // load the 14th round key to vKey1
2390     __ addi            (keypos, keypos, 16);
2391     __ lvx             (vTmp2, keypos, key);
2392     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2393 
2394     // load the 15th round key to vKey2
2395     __ addi            (keypos, keypos, 16);
2396     __ lvx             (vTmp1, keypos, key);
2397     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2398 
2399     __ bind(L_doLast);
2400 
2401     // last two rounds
2402     __ vcipher (vRet, vRet, vKey1);
2403     __ vcipherlast (vRet, vRet, vKey2);
2404 
2405     __ neg             (temp, to);
2406     __ lvsr            (toPerm, temp);
2407     __ vspltisb        (vTmp2, -1);
2408     __ vxor            (vTmp1, vTmp1, vTmp1);
2409     __ vperm           (vTmp2, vTmp2, vTmp1, toPerm);
2410     __ vxor            (toPerm, toPerm, fSplt);


2411     __ lvx             (vTmp1, to);
2412     __ vperm           (vRet, vRet, vRet, toPerm);
2413     __ vsel            (vTmp1, vTmp1, vRet, vTmp2);
2414     __ lvx             (vTmp4, fifteen, to);








2415     __ stvx            (vTmp1, to);
2416     __ vsel            (vRet, vRet, vTmp4, vTmp2);
2417     __ stvx            (vRet, fifteen, to);
2418 
2419     __ blr();
2420      return start;
2421   }
2422 
2423   // Arguments for generated stub (little endian only):
2424   //   R3_ARG1   - source byte array address
2425   //   R4_ARG2   - destination byte array address
2426   //   R5_ARG3   - K (key) in little endian int array
2427   address generate_aescrypt_decryptBlock() {
2428     assert(UseAES, "need AES instructions and misaligned SSE support");
2429     StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
2430 
2431     address start = __ function_entry();
2432 
2433     Label L_doLast;
2434     Label L_do44;
2435     Label L_do52;
2436     Label L_do60;
2437 
2438     Register from           = R3_ARG1;  // source array address
2439     Register to             = R4_ARG2;  // destination array address
2440     Register key            = R5_ARG3;  // round key array
2441 
2442     Register keylen         = R8;
2443     Register temp           = R9;
2444     Register keypos         = R10;
2445     Register hex            = R11;
2446     Register fifteen        = R12;
2447 
2448     VectorRegister vRet     = VR0;
2449 
2450     VectorRegister vKey1    = VR1;
2451     VectorRegister vKey2    = VR2;
2452     VectorRegister vKey3    = VR3;
2453     VectorRegister vKey4    = VR4;
2454     VectorRegister vKey5    = VR5;
2455 
2456     VectorRegister fromPerm = VR6;
2457     VectorRegister keyPerm  = VR7;
2458     VectorRegister toPerm   = VR8;
2459     VectorRegister fSplt    = VR9;
2460 
2461     VectorRegister vTmp1    = VR10;
2462     VectorRegister vTmp2    = VR11;
2463     VectorRegister vTmp3    = VR12;
2464     VectorRegister vTmp4    = VR13;
2465 
2466     VectorRegister vLow     = VR14;
2467     VectorRegister vHigh    = VR15;
2468 
2469     __ li              (hex, 16);
2470     __ li              (fifteen, 15);
2471     __ vspltisb        (fSplt, 0x0f);
2472 
2473     // load unaligned from[0-15] to vsRet
2474     __ lvx             (vRet, from);
2475     __ lvx             (vTmp1, fifteen, from);
2476     __ lvsl            (fromPerm, from);


2477     __ vxor            (fromPerm, fromPerm, fSplt);

2478     __ vperm           (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE]
2479 
2480     // load keylen (44 or 52 or 60)
2481     __ lwz             (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
2482 
2483     // to load keys
2484     __ lvsr            (keyPerm, key);

2485     __ vxor            (vTmp2, vTmp2, vTmp2);
2486     __ vspltisb        (vTmp2, -16);
2487     __ vrld            (keyPerm, keyPerm, vTmp2);
2488     __ vrld            (keyPerm, keyPerm, vTmp2);
2489     __ vsldoi          (keyPerm, keyPerm, keyPerm, 8);

2490 
2491     __ cmpwi           (CCR0, keylen, 44);
2492     __ beq             (CCR0, L_do44);
2493 
2494     __ cmpwi           (CCR0, keylen, 52);
2495     __ beq             (CCR0, L_do52);
2496 
2497     // load the 15th round key to vKey11
2498     __ li              (keypos, 240);
2499     __ lvx             (vTmp1, keypos, key);
2500     __ addi            (keypos, keypos, -16);
2501     __ lvx             (vTmp2, keypos, key);
2502     __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
2503 
2504     // load the 14th round key to vKey10
2505     __ addi            (keypos, keypos, -16);
2506     __ lvx             (vTmp1, keypos, key);
2507     __ vperm           (vKey2, vTmp2, vTmp1, keyPerm);
2508 
2509     // load the 13th round key to vKey10
2510     __ addi            (keypos, keypos, -16);
2511     __ lvx             (vTmp2, keypos, key);
2512     __ vperm           (vKey3, vTmp1, vTmp2, keyPerm);
2513 
2514     // load the 12th round key to vKey10
2515     __ addi            (keypos, keypos, -16);
2516     __ lvx             (vTmp1, keypos, key);
2517     __ vperm           (vKey4, vTmp2, vTmp1, keyPerm);
2518 
2519     // load the 11th round key to vKey10
2520     __ addi            (keypos, keypos, -16);
2521     __ lvx             (vTmp2, keypos, key);
2522     __ vperm           (vKey5, vTmp1, vTmp2, keyPerm);
2523 
2524     // 1st - 5th rounds
2525     __ vxor            (vRet, vRet, vKey1);
2526     __ vncipher        (vRet, vRet, vKey2);
2527     __ vncipher        (vRet, vRet, vKey3);
2528     __ vncipher        (vRet, vRet, vKey4);
2529     __ vncipher        (vRet, vRet, vKey5);
2530 
2531     __ b               (L_doLast);
2532 
2533     __ bind            (L_do52);
2534 
2535     // load the 13th round key to vKey11
2536     __ li              (keypos, 208);
2537     __ lvx             (vTmp1, keypos, key);
2538     __ addi            (keypos, keypos, -16);
2539     __ lvx             (vTmp2, keypos, key);
2540     __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
2541 
2542     // load the 12th round key to vKey10
2543     __ addi            (keypos, keypos, -16);
2544     __ lvx             (vTmp1, keypos, key);
2545     __ vperm           (vKey2, vTmp2, vTmp1, keyPerm);
2546 
2547     // load the 11th round key to vKey10
2548     __ addi            (keypos, keypos, -16);
2549     __ lvx             (vTmp2, keypos, key);
2550     __ vperm           (vKey3, vTmp1, vTmp2, keyPerm);
2551 
2552     // 1st - 3rd rounds
2553     __ vxor            (vRet, vRet, vKey1);
2554     __ vncipher        (vRet, vRet, vKey2);
2555     __ vncipher        (vRet, vRet, vKey3);
2556 
2557     __ b               (L_doLast);
2558 
2559     __ bind            (L_do44);
2560 
2561     // load the 11th round key to vKey11
2562     __ li              (keypos, 176);


2563     __ lvx             (vTmp1, keypos, key);
2564     __ addi            (keypos, keypos, -16);
2565     __ lvx             (vTmp2, keypos, key);
2566     __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
2567 
2568     // 1st round
2569     __ vxor            (vRet, vRet, vKey1);
2570 
2571     __ bind            (L_doLast);
2572 
2573     // load the 10th round key to vKey10
2574     __ addi            (keypos, keypos, -16);
2575     __ lvx             (vTmp1, keypos, key);
2576     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2577 
2578     // load the 9th round key to vKey10
2579     __ addi            (keypos, keypos, -16);
2580     __ lvx             (vTmp2, keypos, key);
2581     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2582 
2583     // load the 8th round key to vKey10
2584     __ addi            (keypos, keypos, -16);
2585     __ lvx             (vTmp1, keypos, key);
2586     __ vperm           (vKey3, vTmp2, vTmp1, keyPerm);
2587 
2588     // load the 7th round key to vKey10
2589     __ addi            (keypos, keypos, -16);
2590     __ lvx             (vTmp2, keypos, key);
2591     __ vperm           (vKey4, vTmp1, vTmp2, keyPerm);





2592 
2593     // load the 6th round key to vKey10
2594     __ addi            (keypos, keypos, -16);
2595     __ lvx             (vTmp1, keypos, key);
2596     __ vperm           (vKey5, vTmp2, vTmp1, keyPerm);
2597 
2598     // last 10th - 6th rounds
2599     __ vncipher        (vRet, vRet, vKey1);
2600     __ vncipher        (vRet, vRet, vKey2);
2601     __ vncipher        (vRet, vRet, vKey3);
2602     __ vncipher        (vRet, vRet, vKey4);
2603     __ vncipher        (vRet, vRet, vKey5);
2604 
2605     // load the 5th round key to vKey10
2606     __ addi            (keypos, keypos, -16);
2607     __ lvx             (vTmp2, keypos, key);
2608     __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
2609 
2610     // load the 4th round key to vKey10
2611     __ addi            (keypos, keypos, -16);
2612     __ lvx             (vTmp1, keypos, key);
2613     __ vperm           (vKey2, vTmp2, vTmp1, keyPerm);
2614 
2615     // load the 3rd round key to vKey10
2616     __ addi            (keypos, keypos, -16);
2617     __ lvx             (vTmp2, keypos, key);
2618     __ vperm           (vKey3, vTmp1, vTmp2, keyPerm);
2619 
2620     // load the 2nd round key to vKey10
2621     __ addi            (keypos, keypos, -16);
2622     __ lvx             (vTmp1, keypos, key);
2623     __ vperm           (vKey4, vTmp2, vTmp1, keyPerm);
2624 
2625     // load the 1st round key to vKey10
2626     __ addi            (keypos, keypos, -16);
2627     __ lvx             (vTmp2, keypos, key);
2628     __ vperm           (vKey5, vTmp1, vTmp2, keyPerm);
2629 
2630     // last 5th - 1th rounds
2631     __ vncipher        (vRet, vRet, vKey1);
2632     __ vncipher        (vRet, vRet, vKey2);
2633     __ vncipher        (vRet, vRet, vKey3);
2634     __ vncipher        (vRet, vRet, vKey4);
2635     __ vncipherlast    (vRet, vRet, vKey5);
2636 
2637     __ neg             (temp, to);
2638     __ lvsr            (toPerm, temp);
2639     __ vspltisb        (vTmp2, -1);
2640     __ vxor            (vTmp1, vTmp1, vTmp1);
2641     __ vperm           (vTmp2, vTmp2, vTmp1, toPerm);
2642     __ vxor            (toPerm, toPerm, fSplt);


2643     __ lvx             (vTmp1, to);
2644     __ vperm           (vRet, vRet, vRet, toPerm);
2645     __ vsel            (vTmp1, vTmp1, vRet, vTmp2);
2646     __ lvx             (vTmp4, fifteen, to);








2647     __ stvx            (vTmp1, to);
2648     __ vsel            (vRet, vRet, vTmp4, vTmp2);
2649     __ stvx            (vRet, fifteen, to);
2650 
2651     __ blr();
2652      return start;
2653   }
2654 
2655   address generate_sha256_implCompress(bool multi_block, const char *name) {
2656     assert(UseSHA, "need SHA instructions");
2657     StubCodeMark mark(this, "StubRoutines", name);
2658     address start = __ function_entry();
2659 
2660     __ sha256 (multi_block);
2661 
2662     __ blr();
2663     return start;
2664   }
2665 
2666   address generate_sha512_implCompress(bool multi_block, const char *name) {
2667     assert(UseSHA, "need SHA instructions");
2668     StubCodeMark mark(this, "StubRoutines", name);
2669     address start = __ function_entry();




2207     address start = __ function_entry();
2208 
2209     gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7);
2210 
2211     // save some arguments, disjoint_long_copy_core destroys them.
2212     // needed for post barrier
2213     __ mr(R9_ARG7, R4_ARG2);
2214     __ mr(R10_ARG8, R5_ARG3);
2215 
2216     if (UseCompressedOops) {
2217       generate_disjoint_int_copy_core(aligned);
2218     } else {
2219       generate_disjoint_long_copy_core(aligned);
2220     }
2221 
2222     gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
2223 
2224     return start;
2225   }
2226 
2227   // Arguments for generated stub:
2228   //   R3_ARG1   - source byte array address
2229   //   R4_ARG2   - destination byte array address
2230   //   R5_ARG3   - round key array
2231   address generate_aescrypt_encryptBlock() {
2232     assert(UseAES, "need AES instructions and misaligned SSE support");
2233     StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
2234 
2235     address start = __ function_entry();
2236 
2237     Label L_doLast;
2238 
2239     Register from           = R3_ARG1;  // source array address
2240     Register to             = R4_ARG2;  // destination array address
2241     Register key            = R5_ARG3;  // round key array
2242 
2243     Register keylen         = R8;
2244     Register temp           = R9;
2245     Register keypos         = R10;

2246     Register fifteen        = R12;
2247 
2248     VectorRegister vRet     = VR0;
2249 
2250     VectorRegister vKey1    = VR1;
2251     VectorRegister vKey2    = VR2;
2252     VectorRegister vKey3    = VR3;
2253     VectorRegister vKey4    = VR4;
2254 
2255     VectorRegister fromPerm = VR5;
2256     VectorRegister keyPerm  = VR6;
2257     VectorRegister toPerm   = VR7;
2258     VectorRegister fSplt    = VR8;
2259 
2260     VectorRegister vTmp1    = VR9;
2261     VectorRegister vTmp2    = VR10;
2262     VectorRegister vTmp3    = VR11;
2263     VectorRegister vTmp4    = VR12;
2264 




2265     __ li              (fifteen, 15);

2266 
2267     // load unaligned from[0-15] to vsRet
2268     __ lvx             (vRet, from);
2269     __ lvx             (vTmp1, fifteen, from);
2270     __ lvsl            (fromPerm, from);
2271 #ifdef VM_LITTLE_ENDIAN
2272     __ vspltisb        (fSplt, 0x0f);
2273     __ vxor            (fromPerm, fromPerm, fSplt);
2274 #endif
2275     __ vperm           (vRet, vRet, vTmp1, fromPerm);
2276 
2277     // load keylen (44 or 52 or 60)
2278     __ lwz             (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
2279 
2280     // to load keys
2281     __ load_perm       (keyPerm, key);
2282 #ifdef VM_LITTLE_ENDIAN
2283     __ vspltisb        (vTmp2, -16);
2284     __ vrld            (keyPerm, keyPerm, vTmp2);
2285     __ vrld            (keyPerm, keyPerm, vTmp2);
2286     __ vsldoi          (keyPerm, keyPerm, keyPerm, 8);
2287 #endif
2288 
2289     // load the 1st round key to vTmp1
2290     __ lvx             (vTmp1, key);
2291     __ li              (keypos, 16);
2292     __ lvx             (vKey1, keypos, key);
2293     __ vec_perm        (vTmp1, vKey1, keyPerm);


2294 
2295     // 1st round
2296     __ vxor            (vRet, vRet, vTmp1);
2297 
2298     // load the 2nd round key to vKey1
2299     __ li              (keypos, 32);
2300     __ lvx             (vKey2, keypos, key);
2301     __ vec_perm        (vKey1, vKey2, keyPerm);
2302 
2303     // load the 3rd round key to vKey2
2304     __ li              (keypos, 48);
2305     __ lvx             (vKey3, keypos, key);
2306     __ vec_perm        (vKey2, vKey3, keyPerm);
2307 
2308     // load the 4th round key to vKey3
2309     __ li              (keypos, 64);
2310     __ lvx             (vKey4, keypos, key);
2311     __ vec_perm        (vKey3, vKey4, keyPerm);
2312 
2313     // load the 5th round key to vKey4
2314     __ li              (keypos, 80);
2315     __ lvx             (vTmp1, keypos, key);
2316     __ vec_perm        (vKey4, vTmp1, keyPerm);
2317 
2318     // 2nd - 5th rounds
2319     __ vcipher         (vRet, vRet, vKey1);
2320     __ vcipher         (vRet, vRet, vKey2);
2321     __ vcipher         (vRet, vRet, vKey3);
2322     __ vcipher         (vRet, vRet, vKey4);
2323 
2324     // load the 6th round key to vKey1
2325     __ li              (keypos, 96);
2326     __ lvx             (vKey2, keypos, key);
2327     __ vec_perm        (vKey1, vTmp1, vKey2, keyPerm);
2328 
2329     // load the 7th round key to vKey2
2330     __ li              (keypos, 112);
2331     __ lvx             (vKey3, keypos, key);
2332     __ vec_perm        (vKey2, vKey3, keyPerm);
2333 
2334     // load the 8th round key to vKey3
2335     __ li              (keypos, 128);
2336     __ lvx             (vKey4, keypos, key);
2337     __ vec_perm        (vKey3, vKey4, keyPerm);
2338 
2339     // load the 9th round key to vKey4
2340     __ li              (keypos, 144);
2341     __ lvx             (vTmp1, keypos, key);
2342     __ vec_perm        (vKey4, vTmp1, keyPerm);
2343 
2344     // 6th - 9th rounds
2345     __ vcipher         (vRet, vRet, vKey1);
2346     __ vcipher         (vRet, vRet, vKey2);
2347     __ vcipher         (vRet, vRet, vKey3);
2348     __ vcipher         (vRet, vRet, vKey4);
2349 
2350     // load the 10th round key to vKey1
2351     __ li              (keypos, 160);
2352     __ lvx             (vKey2, keypos, key);
2353     __ vec_perm        (vKey1, vTmp1, vKey2, keyPerm);
2354 
2355     // load the 11th round key to vKey2
2356     __ li              (keypos, 176);
2357     __ lvx             (vTmp1, keypos, key);
2358     __ vec_perm        (vKey2, vTmp1, keyPerm);
2359 
2360     // if all round keys are loaded, skip next 4 rounds
2361     __ cmpwi           (CCR0, keylen, 44);
2362     __ beq             (CCR0, L_doLast);
2363 
2364     // 10th - 11th rounds
2365     __ vcipher         (vRet, vRet, vKey1);
2366     __ vcipher         (vRet, vRet, vKey2);
2367 
2368     // load the 12th round key to vKey1
2369     __ li              (keypos, 192);
2370     __ lvx             (vKey2, keypos, key);
2371     __ vec_perm        (vKey1, vTmp1, vKey2, keyPerm);
2372 
2373     // load the 13th round key to vKey2
2374     __ li              (keypos, 208);
2375     __ lvx             (vTmp1, keypos, key);
2376     __ vec_perm        (vKey2, vTmp1, keyPerm);
2377 
2378     // if all round keys are loaded, skip next 2 rounds
2379     __ cmpwi           (CCR0, keylen, 52);
2380     __ beq             (CCR0, L_doLast);
2381 
2382     // 12th - 13th rounds
2383     __ vcipher         (vRet, vRet, vKey1);
2384     __ vcipher         (vRet, vRet, vKey2);
2385 
2386     // load the 14th round key to vKey1
2387     __ li              (keypos, 224);
2388     __ lvx             (vKey2, keypos, key);
2389     __ vec_perm        (vKey1, vTmp1, vKey2, keyPerm);
2390 
2391     // load the 15th round key to vKey2
2392     __ li              (keypos, 240);
2393     __ lvx             (vTmp1, keypos, key);
2394     __ vec_perm        (vKey2, vTmp1, keyPerm);
2395 
2396     __ bind(L_doLast);
2397 
2398     // last two rounds
2399     __ vcipher         (vRet, vRet, vKey1);
2400     __ vcipherlast     (vRet, vRet, vKey2);
2401 
2402     // store result (unaligned)
2403 #ifdef VM_LITTLE_ENDIAN
2404     __ lvsl            (toPerm, to);
2405 #else
2406     __ lvsr            (toPerm, to);
2407 #endif
2408     __ vspltisb        (vTmp3, -1);
2409     __ vspltisb        (vTmp4, 0);
2410     __ lvx             (vTmp1, to);
2411     __ lvx             (vTmp2, fifteen, to);
2412 #ifdef VM_LITTLE_ENDIAN
2413     __ vperm           (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
2414     __ vxor            (toPerm, toPerm, fSplt);       // swap bytes
2415 #else
2416     __ vperm           (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
2417 #endif
2418     __ vperm           (vTmp4, vRet, vRet, toPerm);   // rotate data
2419     __ vsel            (vTmp2, vTmp4, vTmp2, vTmp3);
2420     __ vsel            (vTmp1, vTmp1, vTmp4, vTmp3);
2421     __ stvx            (vTmp2, fifteen, to);          // store this one first (may alias)
2422     __ stvx            (vTmp1, to);


2423 
2424     __ blr();
2425      return start;
2426   }
2427 
2428   // Arguments for generated stub:
2429   //   R3_ARG1   - source byte array address
2430   //   R4_ARG2   - destination byte array address
2431   //   R5_ARG3   - K (key) in little endian int array
2432   address generate_aescrypt_decryptBlock() {
2433     assert(UseAES, "need AES instructions and misaligned SSE support");
2434     StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
2435 
2436     address start = __ function_entry();
2437 
2438     Label L_doLast;
2439     Label L_do44;
2440     Label L_do52;
2441     Label L_do60;
2442 
2443     Register from           = R3_ARG1;  // source array address
2444     Register to             = R4_ARG2;  // destination array address
2445     Register key            = R5_ARG3;  // round key array
2446 
2447     Register keylen         = R8;
2448     Register temp           = R9;
2449     Register keypos         = R10;

2450     Register fifteen        = R12;
2451 
2452     VectorRegister vRet     = VR0;
2453 
2454     VectorRegister vKey1    = VR1;
2455     VectorRegister vKey2    = VR2;
2456     VectorRegister vKey3    = VR3;
2457     VectorRegister vKey4    = VR4;
2458     VectorRegister vKey5    = VR5;
2459 
2460     VectorRegister fromPerm = VR6;
2461     VectorRegister keyPerm  = VR7;
2462     VectorRegister toPerm   = VR8;
2463     VectorRegister fSplt    = VR9;
2464 
2465     VectorRegister vTmp1    = VR10;
2466     VectorRegister vTmp2    = VR11;
2467     VectorRegister vTmp3    = VR12;
2468     VectorRegister vTmp4    = VR13;
2469 




2470     __ li              (fifteen, 15);

2471 
2472     // load unaligned from[0-15] to vsRet
2473     __ lvx             (vRet, from);
2474     __ lvx             (vTmp1, fifteen, from);
2475     __ lvsl            (fromPerm, from);
2476 #ifdef VM_LITTLE_ENDIAN
2477     __ vspltisb        (fSplt, 0x0f);
2478     __ vxor            (fromPerm, fromPerm, fSplt);
2479 #endif
2480     __ vperm           (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE]
2481 
2482     // load keylen (44 or 52 or 60)
2483     __ lwz             (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
2484 
2485     // to load keys
2486     __ load_perm       (keyPerm, key);
2487 #ifdef VM_LITTLE_ENDIAN
2488     __ vxor            (vTmp2, vTmp2, vTmp2);
2489     __ vspltisb        (vTmp2, -16);
2490     __ vrld            (keyPerm, keyPerm, vTmp2);
2491     __ vrld            (keyPerm, keyPerm, vTmp2);
2492     __ vsldoi          (keyPerm, keyPerm, keyPerm, 8);
2493 #endif
2494 
2495     __ cmpwi           (CCR0, keylen, 44);
2496     __ beq             (CCR0, L_do44);
2497 
2498     __ cmpwi           (CCR0, keylen, 52);
2499     __ beq             (CCR0, L_do52);
2500 
2501     // load the 15th round key to vKey1
2502     __ li              (keypos, 240);
2503     __ lvx             (vKey1, keypos, key);
2504     __ li              (keypos, 224);
2505     __ lvx             (vKey2, keypos, key);
2506     __ vec_perm        (vKey1, vKey2, vKey1, keyPerm);
2507 
2508     // load the 14th round key to vKey2
2509     __ li              (keypos, 208);
2510     __ lvx             (vKey3, keypos, key);
2511     __ vec_perm        (vKey2, vKey3, vKey2, keyPerm);
2512 
2513     // load the 13th round key to vKey3
2514     __ li              (keypos, 192);
2515     __ lvx             (vKey4, keypos, key);
2516     __ vec_perm        (vKey3, vKey4, vKey3, keyPerm);
2517 
2518     // load the 12th round key to vKey4
2519     __ li              (keypos, 176);
2520     __ lvx             (vKey5, keypos, key);
2521     __ vec_perm        (vKey4, vKey5, vKey4, keyPerm);
2522 
2523     // load the 11th round key to vKey5
2524     __ li              (keypos, 160);
2525     __ lvx             (vTmp1, keypos, key);
2526     __ vec_perm        (vKey5, vTmp1, vKey5, keyPerm);
2527 
2528     // 1st - 5th rounds
2529     __ vxor            (vRet, vRet, vKey1);
2530     __ vncipher        (vRet, vRet, vKey2);
2531     __ vncipher        (vRet, vRet, vKey3);
2532     __ vncipher        (vRet, vRet, vKey4);
2533     __ vncipher        (vRet, vRet, vKey5);
2534 
2535     __ b               (L_doLast);
2536 
2537     __ bind            (L_do52);
2538 
2539     // load the 13th round key to vKey1
2540     __ li              (keypos, 208);
2541     __ lvx             (vKey1, keypos, key);
2542     __ li              (keypos, 192);
2543     __ lvx             (vKey2, keypos, key);
2544     __ vec_perm        (vKey1, vKey2, vKey1, keyPerm);
2545 
2546     // load the 12th round key to vKey2
2547     __ li              (keypos, 176);
2548     __ lvx             (vKey3, keypos, key);
2549     __ vec_perm        (vKey2, vKey3, vKey2, keyPerm);
2550 
2551     // load the 11th round key to vKey3
2552     __ li              (keypos, 160);
2553     __ lvx             (vTmp1, keypos, key);
2554     __ vec_perm        (vKey3, vTmp1, vKey3, keyPerm);
2555 
2556     // 1st - 3rd rounds
2557     __ vxor            (vRet, vRet, vKey1);
2558     __ vncipher        (vRet, vRet, vKey2);
2559     __ vncipher        (vRet, vRet, vKey3);
2560 
2561     __ b               (L_doLast);
2562 
2563     __ bind            (L_do44);
2564 
2565     // load the 11th round key to vKey1
2566     __ li              (keypos, 176);
2567     __ lvx             (vKey1, keypos, key);
2568     __ li              (keypos, 160);
2569     __ lvx             (vTmp1, keypos, key);
2570     __ vec_perm        (vKey1, vTmp1, vKey1, keyPerm);


2571 
2572     // 1st round
2573     __ vxor            (vRet, vRet, vKey1);
2574 
2575     __ bind            (L_doLast);
2576 
2577     // load the 10th round key to vKey1
2578     __ li              (keypos, 144);
2579     __ lvx             (vKey2, keypos, key);
2580     __ vec_perm        (vKey1, vKey2, vTmp1, keyPerm);
2581 
2582     // load the 9th round key to vKey2
2583     __ li              (keypos, 128);
2584     __ lvx             (vKey3, keypos, key);
2585     __ vec_perm        (vKey2, vKey3, vKey2, keyPerm);





2586 
2587     // load the 8th round key to vKey3
2588     __ li              (keypos, 112);
2589     __ lvx             (vKey4, keypos, key);
2590     __ vec_perm        (vKey3, vKey4, vKey3, keyPerm);
2591 
2592     // load the 7th round key to vKey4
2593     __ li              (keypos, 96);
2594     __ lvx             (vKey5, keypos, key);
2595     __ vec_perm        (vKey4, vKey5, vKey4, keyPerm);
2596 
2597     // load the 6th round key to vKey5
2598     __ li              (keypos, 80);
2599     __ lvx             (vTmp1, keypos, key);
2600     __ vec_perm        (vKey5, vTmp1, vKey5, keyPerm);
2601 
2602     // last 10th - 6th rounds
2603     __ vncipher        (vRet, vRet, vKey1);
2604     __ vncipher        (vRet, vRet, vKey2);
2605     __ vncipher        (vRet, vRet, vKey3);
2606     __ vncipher        (vRet, vRet, vKey4);
2607     __ vncipher        (vRet, vRet, vKey5);
2608 
2609     // load the 5th round key to vKey1
2610     __ li              (keypos, 64);
2611     __ lvx             (vKey2, keypos, key);
2612     __ vec_perm        (vKey1, vKey2, vTmp1, keyPerm);
2613 
2614     // load the 4th round key to vKey2
2615     __ li              (keypos, 48);
2616     __ lvx             (vKey3, keypos, key);
2617     __ vec_perm        (vKey2, vKey3, vKey2, keyPerm);
2618 
2619     // load the 3rd round key to vKey3
2620     __ li              (keypos, 32);
2621     __ lvx             (vKey4, keypos, key);
2622     __ vec_perm        (vKey3, vKey4, vKey3, keyPerm);
2623 
2624     // load the 2nd round key to vKey4
2625     __ li              (keypos, 16);
2626     __ lvx             (vKey5, keypos, key);
2627     __ vec_perm        (vKey4, vKey5, vKey4, keyPerm);
2628 
2629     // load the 1st round key to vKey5
2630     __ lvx             (vTmp1, key);
2631     __ vec_perm        (vKey5, vTmp1, vKey5, keyPerm);

2632 
2633     // last 5th - 1th rounds
2634     __ vncipher        (vRet, vRet, vKey1);
2635     __ vncipher        (vRet, vRet, vKey2);
2636     __ vncipher        (vRet, vRet, vKey3);
2637     __ vncipher        (vRet, vRet, vKey4);
2638     __ vncipherlast    (vRet, vRet, vKey5);
2639 
2640     // store result (unaligned)
2641 #ifdef VM_LITTLE_ENDIAN
2642     __ lvsl            (toPerm, to);
2643 #else
2644     __ lvsr            (toPerm, to);
2645 #endif
2646     __ vspltisb        (vTmp3, -1);
2647     __ vspltisb        (vTmp4, 0);
2648     __ lvx             (vTmp1, to);
2649     __ lvx             (vTmp2, fifteen, to);
2650 #ifdef VM_LITTLE_ENDIAN
2651     __ vperm           (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
2652     __ vxor            (toPerm, toPerm, fSplt);       // swap bytes
2653 #else
2654     __ vperm           (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
2655 #endif
2656     __ vperm           (vTmp4, vRet, vRet, toPerm);   // rotate data
2657     __ vsel            (vTmp2, vTmp4, vTmp2, vTmp3);
2658     __ vsel            (vTmp1, vTmp1, vTmp4, vTmp3);
2659     __ stvx            (vTmp2, fifteen, to);          // store this one first (may alias)
2660     __ stvx            (vTmp1, to);


2661 
2662     __ blr();
2663      return start;
2664   }
2665 
2666   address generate_sha256_implCompress(bool multi_block, const char *name) {
2667     assert(UseSHA, "need SHA instructions");
2668     StubCodeMark mark(this, "StubRoutines", name);
2669     address start = __ function_entry();
2670 
2671     __ sha256 (multi_block);
2672 
2673     __ blr();
2674     return start;
2675   }
2676 
2677   address generate_sha512_implCompress(bool multi_block, const char *name) {
2678     assert(UseSHA, "need SHA instructions");
2679     StubCodeMark mark(this, "StubRoutines", name);
2680     address start = __ function_entry();


< prev index next >