< prev index next >

src/cpu/ppc/vm/stubGenerator_ppc.cpp

Print this page
rev 10506 : 8152172: PPC64: Support AES intrinsics
Reviewed-by: kvn, mdoerr, simonis
Contributed-by: Hiroshi H Horii <HORII@jp.ibm.com>


2400                           temp, L_disjoint_plain_copy);
2401 
2402       // Fetch destination element klass from the ObjArrayKlass header.
2403       int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
2404 
2405       // The checkcast_copy loop needs two extra arguments:
2406       __ ld(R7_ARG5, ek_offset, dst_klass);   // dest elem klass
2407       __ lwz(R6_ARG4, sco_offset, R7_ARG5);   // sco of elem klass
2408       __ b(entry_checkcast_arraycopy);
2409     }
2410 
2411     __ bind(L_disjoint_plain_copy);
2412     __ b(entry_disjoint_oop_arraycopy);
2413 
2414   __ bind(L_failed);
2415     __ li(R3_RET, -1); // return -1
2416     __ blr();
2417     return start;
2418   }
2419 











































































































































































































































































































































































































































2420 
2421   void generate_arraycopy_stubs() {
2422     // Note: the disjoint stubs must be generated first, some of
2423     // the conjoint stubs use them.
2424 
2425     // non-aligned disjoint versions
2426     StubRoutines::_jbyte_disjoint_arraycopy       = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
2427     StubRoutines::_jshort_disjoint_arraycopy      = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
2428     StubRoutines::_jint_disjoint_arraycopy        = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy");
2429     StubRoutines::_jlong_disjoint_arraycopy       = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
2430     StubRoutines::_oop_disjoint_arraycopy         = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy", false);
2431     StubRoutines::_oop_disjoint_arraycopy_uninit  = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy_uninit", true);
2432 
2433     // aligned disjoint versions
2434     StubRoutines::_arrayof_jbyte_disjoint_arraycopy      = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy");
2435     StubRoutines::_arrayof_jshort_disjoint_arraycopy     = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
2436     StubRoutines::_arrayof_jint_disjoint_arraycopy       = generate_disjoint_int_copy(true, "arrayof_jint_disjoint_arraycopy");
2437     StubRoutines::_arrayof_jlong_disjoint_arraycopy      = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy");
2438     StubRoutines::_arrayof_oop_disjoint_arraycopy        = generate_disjoint_oop_copy(true, "arrayof_oop_disjoint_arraycopy", false);
2439     StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, "oop_disjoint_arraycopy_uninit", true);


2676   }
2677 
2678   void generate_all() {
2679     // Generates all stubs and initializes the entry points
2680 
2681     // These entry points require SharedInfo::stack0 to be set up in
2682     // non-core builds
2683     StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
2684     // Handle IncompatibleClassChangeError in itable stubs.
2685     StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError),  false);
2686     StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
2687 
2688     StubRoutines::_handler_for_unsafe_access_entry         = generate_handler_for_unsafe_access();
2689 
2690     // support for verify_oop (must happen after universe_init)
2691     StubRoutines::_verify_oop_subroutine_entry             = generate_verify_oop();
2692 
2693     // arraycopy stubs used by compilers
2694     generate_arraycopy_stubs();
2695 
2696     if (UseAESIntrinsics) {
2697       guarantee(!UseAESIntrinsics, "not yet implemented.");
2698     }
2699 
2700     // Safefetch stubs.
2701     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
2702                                                        &StubRoutines::_safefetch32_fault_pc,
2703                                                        &StubRoutines::_safefetch32_continuation_pc);
2704     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
2705                                                        &StubRoutines::_safefetchN_fault_pc,
2706                                                        &StubRoutines::_safefetchN_continuation_pc);
2707 
2708 #ifdef COMPILER2
2709     if (UseMultiplyToLenIntrinsic) {
2710       StubRoutines::_multiplyToLen = generate_multiplyToLen();
2711     }
2712 #endif
2713 
2714     if (UseMontgomeryMultiplyIntrinsic) {
2715       StubRoutines::_montgomeryMultiply
2716         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
2717     }
2718     if (UseMontgomerySquareIntrinsic) {
2719       StubRoutines::_montgomerySquare
2720         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
2721     }






2722   }
2723 
2724  public:
2725   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
2726     // replace the standard masm with a special one:
2727     _masm = new MacroAssembler(code);
2728     if (all) {
2729       generate_all();
2730     } else {
2731       generate_initial();
2732     }
2733   }
2734 };
2735 
2736 void StubGenerator_generate(CodeBuffer* code, bool all) {
2737   StubGenerator g(code, all);
2738 }


2400                           temp, L_disjoint_plain_copy);
2401 
2402       // Fetch destination element klass from the ObjArrayKlass header.
2403       int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
2404 
2405       // The checkcast_copy loop needs two extra arguments:
2406       __ ld(R7_ARG5, ek_offset, dst_klass);   // dest elem klass
2407       __ lwz(R6_ARG4, sco_offset, R7_ARG5);   // sco of elem klass
2408       __ b(entry_checkcast_arraycopy);
2409     }
2410 
2411     __ bind(L_disjoint_plain_copy);
2412     __ b(entry_disjoint_oop_arraycopy);
2413 
2414   __ bind(L_failed);
2415     __ li(R3_RET, -1); // return -1
2416     __ blr();
2417     return start;
2418   }
2419 
2420   // Arguments for generated stub (little endian only):
2421   //   R3_ARG1   - source byte array address
2422   //   R4_ARG2   - destination byte array address
2423   //   R5_ARG3   - round key array
2424   address generate_aescrypt_encryptBlock() {
2425     assert(UseAES, "need AES instructions and misaligned SSE support");
2426     StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
2427 
2428     address start = __ function_entry();
2429 
2430     Label L_doLast;
2431 
2432     Register from           = R3_ARG1;  // source array address
2433     Register to             = R4_ARG2;  // destination array address
2434     Register key            = R5_ARG3;  // round key array
2435 
2436     Register keylen         = R8;
2437     Register temp           = R9;
2438     Register keypos         = R10;
2439     Register hex            = R11;
2440     Register fifteen        = R12;
2441 
2442     VectorRegister vRet     = VR0;
2443 
2444     VectorRegister vKey1    = VR1;
2445     VectorRegister vKey2    = VR2;
2446     VectorRegister vKey3    = VR3;
2447     VectorRegister vKey4    = VR4;
2448 
2449     VectorRegister fromPerm = VR5;
2450     VectorRegister keyPerm  = VR6;
2451     VectorRegister toPerm   = VR7;
2452     VectorRegister fSplt    = VR8;
2453 
2454     VectorRegister vTmp1    = VR9;
2455     VectorRegister vTmp2    = VR10;
2456     VectorRegister vTmp3    = VR11;
2457     VectorRegister vTmp4    = VR12;
2458 
2459     VectorRegister vLow     = VR13;
2460     VectorRegister vHigh    = VR14;
2461 
2462     __ li              (hex, 16);
2463     __ li              (fifteen, 15);
2464     __ vspltisb        (fSplt, 0x0f);
2465 
2466     // load unaligned from[0-15] to vsRet
2467     __ lvx             (vRet, from);
2468     __ lvx             (vTmp1, fifteen, from);
2469     __ lvsl            (fromPerm, from);
2470     __ vxor            (fromPerm, fromPerm, fSplt);
2471     __ vperm           (vRet, vRet, vTmp1, fromPerm);
2472 
2473     // load keylen (44 or 52 or 60)
2474     __ lwz             (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
2475 
2476     // to load keys
2477     __ lvsr            (keyPerm, key);
2478     __ vxor            (vTmp2, vTmp2, vTmp2);
2479     __ vspltisb        (vTmp2, -16);
2480     __ vrld            (keyPerm, keyPerm, vTmp2);
2481     __ vrld            (keyPerm, keyPerm, vTmp2);
2482     __ vsldoi          (keyPerm, keyPerm, keyPerm, -8);
2483 
2484     // load the 1st round key to vKey1
2485     __ li              (keypos, 0);
2486     __ lvx             (vKey1, keypos, key);
2487     __ addi            (keypos, keypos, 16);
2488     __ lvx             (vTmp1, keypos, key);
2489     __ vperm           (vKey1, vTmp1, vKey1, keyPerm);
2490 
2491     // 1st round
2492     __ vxor (vRet, vRet, vKey1);
2493 
2494     // load the 2nd round key to vKey1
2495     __ addi            (keypos, keypos, 16);
2496     __ lvx             (vTmp2, keypos, key);
2497     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2498 
2499     // load the 3rd round key to vKey2
2500     __ addi            (keypos, keypos, 16);
2501     __ lvx             (vTmp1, keypos, key);
2502     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2503 
2504     // load the 4th round key to vKey3
2505     __ addi            (keypos, keypos, 16);
2506     __ lvx             (vTmp2, keypos, key);
2507     __ vperm           (vKey3, vTmp2, vTmp1, keyPerm);
2508 
2509     // load the 5th round key to vKey4
2510     __ addi            (keypos, keypos, 16);
2511     __ lvx             (vTmp1, keypos, key);
2512     __ vperm           (vKey4, vTmp1, vTmp2, keyPerm);
2513 
2514     // 2nd - 5th rounds
2515     __ vcipher (vRet, vRet, vKey1);
2516     __ vcipher (vRet, vRet, vKey2);
2517     __ vcipher (vRet, vRet, vKey3);
2518     __ vcipher (vRet, vRet, vKey4);
2519 
2520     // load the 6th round key to vKey1
2521     __ addi            (keypos, keypos, 16);
2522     __ lvx             (vTmp2, keypos, key);
2523     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2524 
2525     // load the 7th round key to vKey2
2526     __ addi            (keypos, keypos, 16);
2527     __ lvx             (vTmp1, keypos, key);
2528     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2529 
2530     // load the 8th round key to vKey3
2531     __ addi            (keypos, keypos, 16);
2532     __ lvx             (vTmp2, keypos, key);
2533     __ vperm           (vKey3, vTmp2, vTmp1, keyPerm);
2534 
2535     // load the 9th round key to vKey4
2536     __ addi            (keypos, keypos, 16);
2537     __ lvx             (vTmp1, keypos, key);
2538     __ vperm           (vKey4, vTmp1, vTmp2, keyPerm);
2539 
2540     // 6th - 9th rounds
2541     __ vcipher (vRet, vRet, vKey1);
2542     __ vcipher (vRet, vRet, vKey2);
2543     __ vcipher (vRet, vRet, vKey3);
2544     __ vcipher (vRet, vRet, vKey4);
2545 
2546     // load the 10th round key to vKey1
2547     __ addi            (keypos, keypos, 16);
2548     __ lvx             (vTmp2, keypos, key);
2549     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2550 
2551     // load the 11th round key to vKey2
2552     __ addi            (keypos, keypos, 16);
2553     __ lvx             (vTmp1, keypos, key);
2554     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2555 
2556     // if all round keys are loaded, skip next 4 rounds
2557     __ cmpwi           (CCR0, keylen, 44);
2558     __ beq             (CCR0, L_doLast);
2559 
2560     // 10th - 11th rounds
2561     __ vcipher (vRet, vRet, vKey1);
2562     __ vcipher (vRet, vRet, vKey2);
2563 
2564     // load the 12th round key to vKey1
2565     __ addi            (keypos, keypos, 16);
2566     __ lvx             (vTmp2, keypos, key);
2567     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2568 
2569     // load the 13th round key to vKey2
2570     __ addi            (keypos, keypos, 16);
2571     __ lvx             (vTmp1, keypos, key);
2572     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2573 
2574     // if all round keys are loaded, skip next 2 rounds
2575     __ cmpwi           (CCR0, keylen, 52);
2576     __ beq             (CCR0, L_doLast);
2577 
2578     // 12th - 13th rounds
2579     __ vcipher (vRet, vRet, vKey1);
2580     __ vcipher (vRet, vRet, vKey2);
2581 
2582     // load the 14th round key to vKey1
2583     __ addi            (keypos, keypos, 16);
2584     __ lvx             (vTmp2, keypos, key);
2585     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2586 
2587     // load the 15th round key to vKey2
2588     __ addi            (keypos, keypos, 16);
2589     __ lvx             (vTmp1, keypos, key);
2590     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2591 
2592     __ bind(L_doLast);
2593 
2594     // last two rounds
2595     __ vcipher (vRet, vRet, vKey1);
2596     __ vcipherlast (vRet, vRet, vKey2);
2597 
2598     __ neg             (temp, to);
2599     __ lvsr            (toPerm, temp);
2600     __ vspltisb        (vTmp2, -1);
2601     __ vxor            (vTmp1, vTmp1, vTmp1);
2602     __ vperm           (vTmp2, vTmp2, vTmp1, toPerm);
2603     __ vxor            (toPerm, toPerm, fSplt);
2604     __ lvx             (vTmp1, to);
2605     __ vperm           (vRet, vRet, vRet, toPerm);
2606     __ vsel            (vTmp1, vTmp1, vRet, vTmp2);
2607     __ lvx             (vTmp4, fifteen, to);
2608     __ stvx            (vTmp1, to);
2609     __ vsel            (vRet, vRet, vTmp4, vTmp2);
2610     __ stvx            (vRet, fifteen, to);
2611 
2612     __ blr();
2613      return start;
2614   }
2615 
2616   // Arguments for generated stub (little endian only):
2617   //   R3_ARG1   - source byte array address
2618   //   R4_ARG2   - destination byte array address
2619   //   R5_ARG3   - K (key) in little endian int array
2620   address generate_aescrypt_decryptBlock() {
2621     assert(UseAES, "need AES instructions and misaligned SSE support");
2622     StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
2623 
2624     address start = __ function_entry();
2625 
2626     Label L_doLast;
2627     Label L_do44;
2628     Label L_do52;
2629     Label L_do60;
2630 
2631     Register from           = R3_ARG1;  // source array address
2632     Register to             = R4_ARG2;  // destination array address
2633     Register key            = R5_ARG3;  // round key array
2634 
2635     Register keylen         = R8;
2636     Register temp           = R9;
2637     Register keypos         = R10;
2638     Register hex            = R11;
2639     Register fifteen        = R12;
2640 
2641     VectorRegister vRet     = VR0;
2642 
2643     VectorRegister vKey1    = VR1;
2644     VectorRegister vKey2    = VR2;
2645     VectorRegister vKey3    = VR3;
2646     VectorRegister vKey4    = VR4;
2647     VectorRegister vKey5    = VR5;
2648 
2649     VectorRegister fromPerm = VR6;
2650     VectorRegister keyPerm  = VR7;
2651     VectorRegister toPerm   = VR8;
2652     VectorRegister fSplt    = VR9;
2653 
2654     VectorRegister vTmp1    = VR10;
2655     VectorRegister vTmp2    = VR11;
2656     VectorRegister vTmp3    = VR12;
2657     VectorRegister vTmp4    = VR13;
2658 
2659     VectorRegister vLow     = VR14;
2660     VectorRegister vHigh    = VR15;
2661 
2662     __ li              (hex, 16);
2663     __ li              (fifteen, 15);
2664     __ vspltisb        (fSplt, 0x0f);
2665 
2666     // load unaligned from[0-15] to vsRet
2667     __ lvx             (vRet, from);
2668     __ lvx             (vTmp1, fifteen, from);
2669     __ lvsl            (fromPerm, from);
2670     __ vxor            (fromPerm, fromPerm, fSplt);
2671     __ vperm           (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE]
2672 
2673     // load keylen (44 or 52 or 60)
2674     __ lwz             (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
2675 
2676     // to load keys
2677     __ lvsr            (keyPerm, key);
2678     __ vxor            (vTmp2, vTmp2, vTmp2);
2679     __ vspltisb        (vTmp2, -16);
2680     __ vrld            (keyPerm, keyPerm, vTmp2);
2681     __ vrld            (keyPerm, keyPerm, vTmp2);
2682     __ vsldoi          (keyPerm, keyPerm, keyPerm, -8);
2683 
2684     __ cmpwi           (CCR0, keylen, 44);
2685     __ beq             (CCR0, L_do44);
2686 
2687     __ cmpwi           (CCR0, keylen, 52);
2688     __ beq             (CCR0, L_do52);
2689 
2690     // load the 15th round key to vKey11
2691     __ li              (keypos, 240);
2692     __ lvx             (vTmp1, keypos, key);
2693     __ addi            (keypos, keypos, -16);
2694     __ lvx             (vTmp2, keypos, key);
2695     __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
2696 
2697     // load the 14th round key to vKey10
2698     __ addi            (keypos, keypos, -16);
2699     __ lvx             (vTmp1, keypos, key);
2700     __ vperm           (vKey2, vTmp2, vTmp1, keyPerm);
2701 
2702     // load the 13th round key to vKey10
2703     __ addi            (keypos, keypos, -16);
2704     __ lvx             (vTmp2, keypos, key);
2705     __ vperm           (vKey3, vTmp1, vTmp2, keyPerm);
2706 
2707     // load the 12th round key to vKey10
2708     __ addi            (keypos, keypos, -16);
2709     __ lvx             (vTmp1, keypos, key);
2710     __ vperm           (vKey4, vTmp2, vTmp1, keyPerm);
2711 
2712     // load the 11th round key to vKey10
2713     __ addi            (keypos, keypos, -16);
2714     __ lvx             (vTmp2, keypos, key);
2715     __ vperm           (vKey5, vTmp1, vTmp2, keyPerm);
2716 
2717     // 1st - 5th rounds
2718     __ vxor            (vRet, vRet, vKey1);
2719     __ vncipher        (vRet, vRet, vKey2);
2720     __ vncipher        (vRet, vRet, vKey3);
2721     __ vncipher        (vRet, vRet, vKey4);
2722     __ vncipher        (vRet, vRet, vKey5);
2723 
2724     __ b               (L_doLast);
2725 
2726     __ bind            (L_do52);
2727 
2728     // load the 13th round key to vKey11
2729     __ li              (keypos, 208);
2730     __ lvx             (vTmp1, keypos, key);
2731     __ addi            (keypos, keypos, -16);
2732     __ lvx             (vTmp2, keypos, key);
2733     __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
2734 
2735     // load the 12th round key to vKey10
2736     __ addi            (keypos, keypos, -16);
2737     __ lvx             (vTmp1, keypos, key);
2738     __ vperm           (vKey2, vTmp2, vTmp1, keyPerm);
2739 
2740     // load the 11th round key to vKey10
2741     __ addi            (keypos, keypos, -16);
2742     __ lvx             (vTmp2, keypos, key);
2743     __ vperm           (vKey3, vTmp1, vTmp2, keyPerm);
2744 
2745     // 1st - 3rd rounds
2746     __ vxor            (vRet, vRet, vKey1);
2747     __ vncipher        (vRet, vRet, vKey2);
2748     __ vncipher        (vRet, vRet, vKey3);
2749 
2750     __ b               (L_doLast);
2751 
2752     __ bind            (L_do44);
2753 
2754     // load the 11th round key to vKey11
2755     __ li              (keypos, 176);
2756     __ lvx             (vTmp1, keypos, key);
2757     __ addi            (keypos, keypos, -16);
2758     __ lvx             (vTmp2, keypos, key);
2759     __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
2760 
2761     // 1st round
2762     __ vxor            (vRet, vRet, vKey1);
2763 
2764     __ bind            (L_doLast);
2765 
2766     // load the 10th round key to vKey10
2767     __ addi            (keypos, keypos, -16);
2768     __ lvx             (vTmp1, keypos, key);
2769     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2770 
2771     // load the 9th round key to vKey10
2772     __ addi            (keypos, keypos, -16);
2773     __ lvx             (vTmp2, keypos, key);
2774     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2775 
2776     // load the 8th round key to vKey10
2777     __ addi            (keypos, keypos, -16);
2778     __ lvx             (vTmp1, keypos, key);
2779     __ vperm           (vKey3, vTmp2, vTmp1, keyPerm);
2780 
2781     // load the 7th round key to vKey10
2782     __ addi            (keypos, keypos, -16);
2783     __ lvx             (vTmp2, keypos, key);
2784     __ vperm           (vKey4, vTmp1, vTmp2, keyPerm);
2785 
2786     // load the 6th round key to vKey10
2787     __ addi            (keypos, keypos, -16);
2788     __ lvx             (vTmp1, keypos, key);
2789     __ vperm           (vKey5, vTmp2, vTmp1, keyPerm);
2790 
2791     // last 10th - 6th rounds
2792     __ vncipher        (vRet, vRet, vKey1);
2793     __ vncipher        (vRet, vRet, vKey2);
2794     __ vncipher        (vRet, vRet, vKey3);
2795     __ vncipher        (vRet, vRet, vKey4);
2796     __ vncipher        (vRet, vRet, vKey5);
2797 
2798     // load the 5th round key to vKey10
2799     __ addi            (keypos, keypos, -16);
2800     __ lvx             (vTmp2, keypos, key);
2801     __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
2802 
2803     // load the 4th round key to vKey10
2804     __ addi            (keypos, keypos, -16);
2805     __ lvx             (vTmp1, keypos, key);
2806     __ vperm           (vKey2, vTmp2, vTmp1, keyPerm);
2807 
2808     // load the 3rd round key to vKey10
2809     __ addi            (keypos, keypos, -16);
2810     __ lvx             (vTmp2, keypos, key);
2811     __ vperm           (vKey3, vTmp1, vTmp2, keyPerm);
2812 
2813     // load the 2nd round key to vKey10
2814     __ addi            (keypos, keypos, -16);
2815     __ lvx             (vTmp1, keypos, key);
2816     __ vperm           (vKey4, vTmp2, vTmp1, keyPerm);
2817 
2818     // load the 1st round key to vKey10
2819     __ addi            (keypos, keypos, -16);
2820     __ lvx             (vTmp2, keypos, key);
2821     __ vperm           (vKey5, vTmp1, vTmp2, keyPerm);
2822 
2823     // last 5th - 1th rounds
2824     __ vncipher        (vRet, vRet, vKey1);
2825     __ vncipher        (vRet, vRet, vKey2);
2826     __ vncipher        (vRet, vRet, vKey3);
2827     __ vncipher        (vRet, vRet, vKey4);
2828     __ vncipherlast    (vRet, vRet, vKey5);
2829 
2830     __ neg             (temp, to);
2831     __ lvsr            (toPerm, temp);
2832     __ vspltisb        (vTmp2, -1);
2833     __ vxor            (vTmp1, vTmp1, vTmp1);
2834     __ vperm           (vTmp2, vTmp2, vTmp1, toPerm);
2835     __ vxor            (toPerm, toPerm, fSplt);
2836     __ lvx             (vTmp1, to);
2837     __ vperm           (vRet, vRet, vRet, toPerm);
2838     __ vsel            (vTmp1, vTmp1, vRet, vTmp2);
2839     __ lvx             (vTmp4, fifteen, to);
2840     __ stvx            (vTmp1, to);
2841     __ vsel            (vRet, vRet, vTmp4, vTmp2);
2842     __ stvx            (vRet, fifteen, to);
2843 
2844     __ blr();
2845      return start;
2846   }
2847 
2848   void generate_arraycopy_stubs() {
2849     // Note: the disjoint stubs must be generated first, some of
2850     // the conjoint stubs use them.
2851 
2852     // non-aligned disjoint versions
2853     StubRoutines::_jbyte_disjoint_arraycopy       = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
2854     StubRoutines::_jshort_disjoint_arraycopy      = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
2855     StubRoutines::_jint_disjoint_arraycopy        = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy");
2856     StubRoutines::_jlong_disjoint_arraycopy       = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
2857     StubRoutines::_oop_disjoint_arraycopy         = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy", false);
2858     StubRoutines::_oop_disjoint_arraycopy_uninit  = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy_uninit", true);
2859 
2860     // aligned disjoint versions
2861     StubRoutines::_arrayof_jbyte_disjoint_arraycopy      = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy");
2862     StubRoutines::_arrayof_jshort_disjoint_arraycopy     = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");
2863     StubRoutines::_arrayof_jint_disjoint_arraycopy       = generate_disjoint_int_copy(true, "arrayof_jint_disjoint_arraycopy");
2864     StubRoutines::_arrayof_jlong_disjoint_arraycopy      = generate_disjoint_long_copy(true, "arrayof_jlong_disjoint_arraycopy");
2865     StubRoutines::_arrayof_oop_disjoint_arraycopy        = generate_disjoint_oop_copy(true, "arrayof_oop_disjoint_arraycopy", false);
2866     StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(true, "oop_disjoint_arraycopy_uninit", true);


3103   }
3104 
3105   void generate_all() {
3106     // Generates all stubs and initializes the entry points
3107 
3108     // These entry points require SharedInfo::stack0 to be set up in
3109     // non-core builds
3110     StubRoutines::_throw_AbstractMethodError_entry         = generate_throw_exception("AbstractMethodError throw_exception",          CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
3111     // Handle IncompatibleClassChangeError in itable stubs.
3112     StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError),  false);
3113     StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
3114 
3115     StubRoutines::_handler_for_unsafe_access_entry         = generate_handler_for_unsafe_access();
3116 
3117     // support for verify_oop (must happen after universe_init)
3118     StubRoutines::_verify_oop_subroutine_entry             = generate_verify_oop();
3119 
3120     // arraycopy stubs used by compilers
3121     generate_arraycopy_stubs();
3122 




3123     // Safefetch stubs.
3124     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
3125                                                        &StubRoutines::_safefetch32_fault_pc,
3126                                                        &StubRoutines::_safefetch32_continuation_pc);
3127     generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
3128                                                        &StubRoutines::_safefetchN_fault_pc,
3129                                                        &StubRoutines::_safefetchN_continuation_pc);
3130 
3131 #ifdef COMPILER2
3132     if (UseMultiplyToLenIntrinsic) {
3133       StubRoutines::_multiplyToLen = generate_multiplyToLen();
3134     }
3135 #endif
3136 
3137     if (UseMontgomeryMultiplyIntrinsic) {
3138       StubRoutines::_montgomeryMultiply
3139         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
3140     }
3141     if (UseMontgomerySquareIntrinsic) {
3142       StubRoutines::_montgomerySquare
3143         = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
3144     }
3145 
3146     if (UseAES) {
3147       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
3148       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
3149     }
3150 
3151   }
3152 
3153  public:
3154   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
3155     // replace the standard masm with a special one:
3156     _masm = new MacroAssembler(code);
3157     if (all) {
3158       generate_all();
3159     } else {
3160       generate_initial();
3161     }
3162   }
3163 };
3164 
3165 void StubGenerator_generate(CodeBuffer* code, bool all) {
3166   StubGenerator g(code, all);
3167 }
< prev index next >