# HG changeset patch # User mdoerr # Date 1458301817 -3600 # Fri Mar 18 12:50:17 2016 +0100 # Node ID f382639abc3efb58b143135374c5cd65403afc7f # Parent 15928d255046313227fd4cedcf47a0ad84088665 8152172: PPC64: Support AES intrinsics Reviewed-by: kvn, mdoerr, simonis Contributed-by: horii@jp.ibm.com diff --git a/src/cpu/ppc/vm/assembler_ppc.hpp b/src/cpu/ppc/vm/assembler_ppc.hpp --- a/src/cpu/ppc/vm/assembler_ppc.hpp +++ b/src/cpu/ppc/vm/assembler_ppc.hpp @@ -589,6 +589,7 @@ VNOR_OPCODE = (4u << OPCODE_SHIFT | 1284u ), VOR_OPCODE = (4u << OPCODE_SHIFT | 1156u ), VXOR_OPCODE = (4u << OPCODE_SHIFT | 1220u ), + VRLD_OPCODE = (4u << OPCODE_SHIFT | 196u ), VRLB_OPCODE = (4u << OPCODE_SHIFT | 4u ), VRLW_OPCODE = (4u << OPCODE_SHIFT | 132u ), VRLH_OPCODE = (4u << OPCODE_SHIFT | 68u ), @@ -1918,6 +1919,7 @@ inline void vnor( VectorRegister d, VectorRegister a, VectorRegister b); inline void vor( VectorRegister d, VectorRegister a, VectorRegister b); inline void vxor( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vrld( VectorRegister d, VectorRegister a, VectorRegister b); inline void vrlb( VectorRegister d, VectorRegister a, VectorRegister b); inline void vrlw( VectorRegister d, VectorRegister a, VectorRegister b); inline void vrlh( VectorRegister d, VectorRegister a, VectorRegister b); diff --git a/src/cpu/ppc/vm/assembler_ppc.inline.hpp b/src/cpu/ppc/vm/assembler_ppc.inline.hpp --- a/src/cpu/ppc/vm/assembler_ppc.inline.hpp +++ b/src/cpu/ppc/vm/assembler_ppc.inline.hpp @@ -739,6 +739,7 @@ inline void Assembler::vnor( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VNOR_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vor( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VOR_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vxor( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VXOR_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vrld( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VRLD_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vrlb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VRLB_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vrlw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VRLW_OPCODE | vrt(d) | vra(a) | vrb(b)); } inline void Assembler::vrlh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VRLH_OPCODE | vrt(d) | vra(a) | vrb(b)); } diff --git a/src/cpu/ppc/vm/stubGenerator_ppc.cpp b/src/cpu/ppc/vm/stubGenerator_ppc.cpp --- a/src/cpu/ppc/vm/stubGenerator_ppc.cpp +++ b/src/cpu/ppc/vm/stubGenerator_ppc.cpp @@ -1961,6 +1961,434 @@ return start; } + // Arguments for generated stub (little endian only): + // R3_ARG1 - source byte array address + // R4_ARG2 - destination byte array address + // R5_ARG3 - round key array + address generate_aescrypt_encryptBlock() { + assert(UseAES, "need AES instructions and misaligned SSE support"); + StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); + + address start = __ function_entry(); + + Label L_doLast; + + Register from = R3_ARG1; // source array address + Register to = R4_ARG2; // destination array address + Register key = R5_ARG3; // round key array + + Register keylen = R8; + Register temp = R9; + Register keypos = R10; + Register hex = R11; + Register fifteen = R12; + + VectorRegister vRet = VR0; + + VectorRegister vKey1 = VR1; + VectorRegister vKey2 = VR2; + VectorRegister vKey3 = VR3; + VectorRegister vKey4 = VR4; + + VectorRegister fromPerm = VR5; + VectorRegister keyPerm = VR6; + VectorRegister toPerm = VR7; + VectorRegister fSplt = VR8; + + VectorRegister vTmp1 = VR9; + VectorRegister vTmp2 = VR10; + VectorRegister vTmp3 = VR11; + VectorRegister vTmp4 = VR12; + + VectorRegister vLow = VR13; + VectorRegister vHigh = VR14; + + __ li (hex, 16); + __ li (fifteen, 15); + __ vspltisb (fSplt, 0x0f); + + // load unaligned from[0-15] to vsRet + __ lvx (vRet, from); + __ lvx (vTmp1, fifteen, from); + __ lvsl (fromPerm, from); + __ vxor (fromPerm, fromPerm, fSplt); + __ vperm (vRet, vRet, vTmp1, fromPerm); + + // load keylen (44 or 52 or 60) + __ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key); + + // to load keys + __ lvsr (keyPerm, key); + __ vxor (vTmp2, vTmp2, vTmp2); + __ vspltisb (vTmp2, -16); + __ vrld (keyPerm, keyPerm, vTmp2); + __ vrld (keyPerm, keyPerm, vTmp2); + __ vsldoi (keyPerm, keyPerm, keyPerm, -8); + + // load the 1st round key to vKey1 + __ li (keypos, 0); + __ lvx (vKey1, keypos, key); + __ addi (keypos, keypos, 16); + __ lvx (vTmp1, keypos, key); + __ vperm (vKey1, vTmp1, vKey1, keyPerm); + + // 1st round + __ vxor (vRet, vRet, vKey1); + + // load the 2nd round key to vKey1 + __ addi (keypos, keypos, 16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey1, vTmp2, vTmp1, keyPerm); + + // load the 3rd round key to vKey2 + __ addi (keypos, keypos, 16); + __ lvx (vTmp1, keypos, key); + __ vperm (vKey2, vTmp1, vTmp2, keyPerm); + + // load the 4th round key to vKey3 + __ addi (keypos, keypos, 16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey3, vTmp2, vTmp1, keyPerm); + + // load the 5th round key to vKey4 + __ addi (keypos, keypos, 16); + __ lvx (vTmp1, keypos, key); + __ vperm (vKey4, vTmp1, vTmp2, keyPerm); + + // 2nd - 5th rounds + __ vcipher (vRet, vRet, vKey1); + __ vcipher (vRet, vRet, vKey2); + __ vcipher (vRet, vRet, vKey3); + __ vcipher (vRet, vRet, vKey4); + + // load the 6th round key to vKey1 + __ addi (keypos, keypos, 16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey1, vTmp2, vTmp1, keyPerm); + + // load the 7th round key to vKey2 + __ addi (keypos, keypos, 16); + __ lvx (vTmp1, keypos, key); + __ vperm (vKey2, vTmp1, vTmp2, keyPerm); + + // load the 8th round key to vKey3 + __ addi (keypos, keypos, 16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey3, vTmp2, vTmp1, keyPerm); + + // load the 9th round key to vKey4 + __ addi (keypos, keypos, 16); + __ lvx (vTmp1, keypos, key); + __ vperm (vKey4, vTmp1, vTmp2, keyPerm); + + // 6th - 9th rounds + __ vcipher (vRet, vRet, vKey1); + __ vcipher (vRet, vRet, vKey2); + __ vcipher (vRet, vRet, vKey3); + __ vcipher (vRet, vRet, vKey4); + + // load the 10th round key to vKey1 + __ addi (keypos, keypos, 16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey1, vTmp2, vTmp1, keyPerm); + + // load the 11th round key to vKey2 + __ addi (keypos, keypos, 16); + __ lvx (vTmp1, keypos, key); + __ vperm (vKey2, vTmp1, vTmp2, keyPerm); + + // if all round keys are loaded, skip next 4 rounds + __ cmpwi (CCR0, keylen, 44); + __ beq (CCR0, L_doLast); + + // 10th - 11th rounds + __ vcipher (vRet, vRet, vKey1); + __ vcipher (vRet, vRet, vKey2); + + // load the 12th round key to vKey1 + __ addi (keypos, keypos, 16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey1, vTmp2, vTmp1, keyPerm); + + // load the 13th round key to vKey2 + __ addi (keypos, keypos, 16); + __ lvx (vTmp1, keypos, key); + __ vperm (vKey2, vTmp1, vTmp2, keyPerm); + + // if all round keys are loaded, skip next 2 rounds + __ cmpwi (CCR0, keylen, 52); + __ beq (CCR0, L_doLast); + + // 12th - 13th rounds + __ vcipher (vRet, vRet, vKey1); + __ vcipher (vRet, vRet, vKey2); + + // load the 14th round key to vKey1 + __ addi (keypos, keypos, 16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey1, vTmp2, vTmp1, keyPerm); + + // load the 15th round key to vKey2 + __ addi (keypos, keypos, 16); + __ lvx (vTmp1, keypos, key); + __ vperm (vKey2, vTmp1, vTmp2, keyPerm); + + __ bind(L_doLast); + + // last two rounds + __ vcipher (vRet, vRet, vKey1); + __ vcipherlast (vRet, vRet, vKey2); + + __ neg (temp, to); + __ lvsr (toPerm, temp); + __ vspltisb (vTmp2, -1); + __ vxor (vTmp1, vTmp1, vTmp1); + __ vperm (vTmp2, vTmp2, vTmp1, toPerm); + __ vxor (toPerm, toPerm, fSplt); + __ lvx (vTmp1, to); + __ vperm (vRet, vRet, vRet, toPerm); + __ vsel (vTmp1, vTmp1, vRet, vTmp2); + __ lvx (vTmp4, fifteen, to); + __ stvx (vTmp1, to); + __ vsel (vRet, vRet, vTmp4, vTmp2); + __ stvx (vRet, fifteen, to); + + __ blr(); + return start; + } + + // Arguments for generated stub (little endian only): + // R3_ARG1 - source byte array address + // R4_ARG2 - destination byte array address + // R5_ARG3 - K (key) in little endian int array + address generate_aescrypt_decryptBlock() { + assert(UseAES, "need AES instructions and misaligned SSE support"); + StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); + + address start = __ function_entry(); + + Label L_doLast; + Label L_do44; + Label L_do52; + Label L_do60; + + Register from = R3_ARG1; // source array address + Register to = R4_ARG2; // destination array address + Register key = R5_ARG3; // round key array + + Register keylen = R8; + Register temp = R9; + Register keypos = R10; + Register hex = R11; + Register fifteen = R12; + + VectorRegister vRet = VR0; + + VectorRegister vKey1 = VR1; + VectorRegister vKey2 = VR2; + VectorRegister vKey3 = VR3; + VectorRegister vKey4 = VR4; + VectorRegister vKey5 = VR5; + + VectorRegister fromPerm = VR6; + VectorRegister keyPerm = VR7; + VectorRegister toPerm = VR8; + VectorRegister fSplt = VR9; + + VectorRegister vTmp1 = VR10; + VectorRegister vTmp2 = VR11; + VectorRegister vTmp3 = VR12; + VectorRegister vTmp4 = VR13; + + VectorRegister vLow = VR14; + VectorRegister vHigh = VR15; + + __ li (hex, 16); + __ li (fifteen, 15); + __ vspltisb (fSplt, 0x0f); + + // load unaligned from[0-15] to vsRet + __ lvx (vRet, from); + __ lvx (vTmp1, fifteen, from); + __ lvsl (fromPerm, from); + __ vxor (fromPerm, fromPerm, fSplt); + __ vperm (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE] + + // load keylen (44 or 52 or 60) + __ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key); + + // to load keys + __ lvsr (keyPerm, key); + __ vxor (vTmp2, vTmp2, vTmp2); + __ vspltisb (vTmp2, -16); + __ vrld (keyPerm, keyPerm, vTmp2); + __ vrld (keyPerm, keyPerm, vTmp2); + __ vsldoi (keyPerm, keyPerm, keyPerm, -8); + + __ cmpwi (CCR0, keylen, 44); + __ beq (CCR0, L_do44); + + __ cmpwi (CCR0, keylen, 52); + __ beq (CCR0, L_do52); + + // load the 15th round key to vKey11 + __ li (keypos, 240); + __ lvx (vTmp1, keypos, key); + __ addi (keypos, keypos, -16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey1, vTmp1, vTmp2, keyPerm); + + // load the 14th round key to vKey10 + __ addi (keypos, keypos, -16); + __ lvx (vTmp1, keypos, key); + __ vperm (vKey2, vTmp2, vTmp1, keyPerm); + + // load the 13th round key to vKey10 + __ addi (keypos, keypos, -16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey3, vTmp1, vTmp2, keyPerm); + + // load the 12th round key to vKey10 + __ addi (keypos, keypos, -16); + __ lvx (vTmp1, keypos, key); + __ vperm (vKey4, vTmp2, vTmp1, keyPerm); + + // load the 11th round key to vKey10 + __ addi (keypos, keypos, -16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey5, vTmp1, vTmp2, keyPerm); + + // 1st - 5th rounds + __ vxor (vRet, vRet, vKey1); + __ vncipher (vRet, vRet, vKey2); + __ vncipher (vRet, vRet, vKey3); + __ vncipher (vRet, vRet, vKey4); + __ vncipher (vRet, vRet, vKey5); + + __ b (L_doLast); + + __ bind (L_do52); + + // load the 13th round key to vKey11 + __ li (keypos, 208); + __ lvx (vTmp1, keypos, key); + __ addi (keypos, keypos, -16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey1, vTmp1, vTmp2, keyPerm); + + // load the 12th round key to vKey10 + __ addi (keypos, keypos, -16); + __ lvx (vTmp1, keypos, key); + __ vperm (vKey2, vTmp2, vTmp1, keyPerm); + + // load the 11th round key to vKey10 + __ addi (keypos, keypos, -16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey3, vTmp1, vTmp2, keyPerm); + + // 1st - 3rd rounds + __ vxor (vRet, vRet, vKey1); + __ vncipher (vRet, vRet, vKey2); + __ vncipher (vRet, vRet, vKey3); + + __ b (L_doLast); + + __ bind (L_do44); + + // load the 11th round key to vKey11 + __ li (keypos, 176); + __ lvx (vTmp1, keypos, key); + __ addi (keypos, keypos, -16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey1, vTmp1, vTmp2, keyPerm); + + // 1st round + __ vxor (vRet, vRet, vKey1); + + __ bind (L_doLast); + + // load the 10th round key to vKey10 + __ addi (keypos, keypos, -16); + __ lvx (vTmp1, keypos, key); + __ vperm (vKey1, vTmp2, vTmp1, keyPerm); + + // load the 9th round key to vKey10 + __ addi (keypos, keypos, -16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey2, vTmp1, vTmp2, keyPerm); + + // load the 8th round key to vKey10 + __ addi (keypos, keypos, -16); + __ lvx (vTmp1, keypos, key); + __ vperm (vKey3, vTmp2, vTmp1, keyPerm); + + // load the 7th round key to vKey10 + __ addi (keypos, keypos, -16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey4, vTmp1, vTmp2, keyPerm); + + // load the 6th round key to vKey10 + __ addi (keypos, keypos, -16); + __ lvx (vTmp1, keypos, key); + __ vperm (vKey5, vTmp2, vTmp1, keyPerm); + + // last 10th - 6th rounds + __ vncipher (vRet, vRet, vKey1); + __ vncipher (vRet, vRet, vKey2); + __ vncipher (vRet, vRet, vKey3); + __ vncipher (vRet, vRet, vKey4); + __ vncipher (vRet, vRet, vKey5); + + // load the 5th round key to vKey10 + __ addi (keypos, keypos, -16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey1, vTmp1, vTmp2, keyPerm); + + // load the 4th round key to vKey10 + __ addi (keypos, keypos, -16); + __ lvx (vTmp1, keypos, key); + __ vperm (vKey2, vTmp2, vTmp1, keyPerm); + + // load the 3rd round key to vKey10 + __ addi (keypos, keypos, -16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey3, vTmp1, vTmp2, keyPerm); + + // load the 2nd round key to vKey10 + __ addi (keypos, keypos, -16); + __ lvx (vTmp1, keypos, key); + __ vperm (vKey4, vTmp2, vTmp1, keyPerm); + + // load the 1st round key to vKey10 + __ addi (keypos, keypos, -16); + __ lvx (vTmp2, keypos, key); + __ vperm (vKey5, vTmp1, vTmp2, keyPerm); + + // last 5th - 1th rounds + __ vncipher (vRet, vRet, vKey1); + __ vncipher (vRet, vRet, vKey2); + __ vncipher (vRet, vRet, vKey3); + __ vncipher (vRet, vRet, vKey4); + __ vncipherlast (vRet, vRet, vKey5); + + __ neg (temp, to); + __ lvsr (toPerm, temp); + __ vspltisb (vTmp2, -1); + __ vxor (vTmp1, vTmp1, vTmp1); + __ vperm (vTmp2, vTmp2, vTmp1, toPerm); + __ vxor (toPerm, toPerm, fSplt); + __ lvx (vTmp1, to); + __ vperm (vRet, vRet, vRet, toPerm); + __ vsel (vTmp1, vTmp1, vRet, vTmp2); + __ lvx (vTmp4, fifteen, to); + __ stvx (vTmp1, to); + __ vsel (vRet, vRet, vTmp4, vTmp2); + __ stvx (vRet, fifteen, to); + + __ blr(); + return start; + } + void generate_arraycopy_stubs() { // Note: the disjoint stubs must be generated first, some of // the conjoint stubs use them. @@ -2083,10 +2511,6 @@ // arraycopy stubs used by compilers generate_arraycopy_stubs(); - if (UseAESIntrinsics) { - guarantee(!UseAESIntrinsics, "not yet implemented."); - } - // Safefetch stubs. generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc, @@ -2094,6 +2518,12 @@ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, &StubRoutines::_safefetchN_fault_pc, &StubRoutines::_safefetchN_continuation_pc); + + if (UseAESIntrinsics) { + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); + StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); + } + } public: diff --git a/src/cpu/ppc/vm/vm_version_ppc.cpp b/src/cpu/ppc/vm/vm_version_ppc.cpp --- a/src/cpu/ppc/vm/vm_version_ppc.cpp +++ b/src/cpu/ppc/vm/vm_version_ppc.cpp @@ -102,7 +102,7 @@ // Create and print feature-string. char buf[(num_features+1) * 16]; // Max 16 chars per feature. jio_snprintf(buf, sizeof(buf), - "ppc64%s%s%s%s%s%s%s%s", + "ppc64%s%s%s%s%s%s%s%s%s", (has_fsqrt() ? " fsqrt" : ""), (has_isel() ? " isel" : ""), (has_lxarxeh() ? " lxarxeh" : ""), @@ -111,7 +111,8 @@ (has_popcntb() ? " popcntb" : ""), (has_popcntw() ? " popcntw" : ""), (has_fcfids() ? " fcfids" : ""), - (has_vand() ? " vand" : "") + (has_vand() ? " vand" : ""), + (has_vcipher() ? " aes" : "") // Make sure number of %s matches num_features! ); _features_str = strdup(buf); @@ -156,6 +157,28 @@ } // The AES intrinsic stubs require AES instruction support. +#if defined(VM_LITTLE_ENDIAN) + if (has_vcipher()) { + if (FLAG_IS_DEFAULT(UseAES)) { + UseAES = true; + } + } else if (UseAES) { + if (!FLAG_IS_DEFAULT(UseAES)) + warning("AES instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseAES, false); + } + + if (UseAES && has_vcipher()) { + if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { + UseAESIntrinsics = true; + } + } else if (UseAESIntrinsics) { + if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) + warning("AES intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + +#else if (UseAES) { warning("AES instructions are not available on this CPU"); FLAG_SET_DEFAULT(UseAES, false); @@ -165,6 +188,7 @@ warning("AES intrinsics are not available on this CPU"); FLAG_SET_DEFAULT(UseAESIntrinsics, false); } +#endif if (UseSHA) { warning("SHA instructions are not available on this CPU"); @@ -452,6 +476,7 @@ a->popcntw(R7, R5); // code[7] -> popcntw a->fcfids(F3, F4); // code[8] -> fcfids a->vand(VR0, VR0, VR0); // code[9] -> vand + a->vcipher(VR0, VR1, VR2); // code[10] -> vcipher a->blr(); // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it. @@ -495,6 +520,7 @@ if (code[feature_cntr++]) features |= popcntw_m; if (code[feature_cntr++]) features |= fcfids_m; if (code[feature_cntr++]) features |= vand_m; + if (code[feature_cntr++]) features |= vcipher_m; // Print the detection code. if (PrintAssembly) { diff --git a/src/cpu/ppc/vm/vm_version_ppc.hpp b/src/cpu/ppc/vm/vm_version_ppc.hpp --- a/src/cpu/ppc/vm/vm_version_ppc.hpp +++ b/src/cpu/ppc/vm/vm_version_ppc.hpp @@ -42,6 +42,7 @@ fcfids, vand, dcba, + vcipher, num_features // last entry to count features }; enum Feature_Flag_Set { @@ -56,6 +57,7 @@ fcfids_m = (1 << fcfids ), vand_m = (1 << vand ), dcba_m = (1 << dcba ), + vcipher_m = (1 << vcipher), all_features_m = -1 }; static int _features; @@ -83,6 +85,7 @@ static bool has_fcfids() { return (_features & fcfids_m) != 0; } static bool has_vand() { return (_features & vand_m) != 0; } static bool has_dcba() { return (_features & dcba_m) != 0; } + static bool has_vcipher() { return (_features & vcipher_m) != 0; } static const char* cpu_features() { return _features_str; } diff --git a/src/share/vm/opto/graphKit.cpp b/src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp +++ b/src/share/vm/opto/graphKit.cpp @@ -1683,6 +1683,9 @@ const Type* elemtype = arytype->elem(); BasicType elembt = elemtype->array_element_basic_type(); Node* adr = array_element_address(ary, idx, elembt, arytype->size()); + if (elembt == T_NARROWOOP) { + elembt = T_OBJECT; // To satisfy switch in LoadNode::make() + } Node* ld = make_load(ctl, adr, elemtype, elembt, arytype, MemNode::unordered); return ld; } diff --git a/src/share/vm/opto/library_call.cpp b/src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp +++ b/src/share/vm/opto/library_call.cpp @@ -6459,7 +6459,20 @@ //------------------------------get_key_start_from_aescrypt_object----------------------- Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) { +#ifdef PPC64 + // MixColumns for decryption can be reduced by preprocessing MixColumns with round keys. + // Intel's extention is based on this optimization and AESCrypt generates round keys by preprocessing MixColumns. + // However, ppc64 vncipher processes MixColumns and requires the same round keys with encryption. + // The ppc64 stubs of encryption and decryption use the same round keys (sessionK[0]). + Node* objSessionK = load_field_from_object(aescrypt_object, "sessionK", "[[I", /*is_exact*/ false); + assert (objSessionK != NULL, "wrong version of com.sun.crypto.provider.AESCrypt"); + if (objSessionK == NULL) { + return (Node *) NULL; + } + Node* objAESCryptKey = load_array_element(control(), objSessionK, intcon(0), TypeAryPtr::OOPS); +#else Node* objAESCryptKey = load_field_from_object(aescrypt_object, "K", "[I", /*is_exact*/ false); +#endif // PPC64 assert (objAESCryptKey != NULL, "wrong version of com.sun.crypto.provider.AESCrypt"); if (objAESCryptKey == NULL) return (Node *) NULL;