< prev index next >
src/cpu/ppc/vm/stubGenerator_ppc.cpp
Print this page
*** 1959,1969 ****
gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
return start;
}
! // Arguments for generated stub (little endian only):
// R3_ARG1 - source byte array address
// R4_ARG2 - destination byte array address
// R5_ARG3 - round key array
address generate_aescrypt_encryptBlock() {
assert(UseAES, "need AES instructions and misaligned SSE support");
--- 1959,1969 ----
gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
return start;
}
! // Arguments for generated stub:
// R3_ARG1 - source byte array address
// R4_ARG2 - destination byte array address
// R5_ARG3 - round key array
address generate_aescrypt_encryptBlock() {
assert(UseAES, "need AES instructions and misaligned SSE support");
*** 1978,1988 ****
Register key = R5_ARG3; // round key array
Register keylen = R8;
Register temp = R9;
Register keypos = R10;
- Register hex = R11;
Register fifteen = R12;
VectorRegister vRet = VR0;
VectorRegister vKey1 = VR1;
--- 1978,1987 ----
*** 1998,2165 ****
VectorRegister vTmp1 = VR9;
VectorRegister vTmp2 = VR10;
VectorRegister vTmp3 = VR11;
VectorRegister vTmp4 = VR12;
- VectorRegister vLow = VR13;
- VectorRegister vHigh = VR14;
-
- __ li (hex, 16);
__ li (fifteen, 15);
- __ vspltisb (fSplt, 0x0f);
// load unaligned from[0-15] to vsRet
__ lvx (vRet, from);
__ lvx (vTmp1, fifteen, from);
__ lvsl (fromPerm, from);
__ vxor (fromPerm, fromPerm, fSplt);
__ vperm (vRet, vRet, vTmp1, fromPerm);
// load keylen (44 or 52 or 60)
__ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
// to load keys
! __ lvsr (keyPerm, key);
! __ vxor (vTmp2, vTmp2, vTmp2);
__ vspltisb (vTmp2, -16);
__ vrld (keyPerm, keyPerm, vTmp2);
__ vrld (keyPerm, keyPerm, vTmp2);
! __ vsldoi (keyPerm, keyPerm, keyPerm, -8);
! // load the 1st round key to vKey1
! __ li (keypos, 0);
__ lvx (vKey1, keypos, key);
! __ addi (keypos, keypos, 16);
! __ lvx (vTmp1, keypos, key);
! __ vperm (vKey1, vTmp1, vKey1, keyPerm);
// 1st round
! __ vxor (vRet, vRet, vKey1);
// load the 2nd round key to vKey1
! __ addi (keypos, keypos, 16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey1, vTmp2, vTmp1, keyPerm);
// load the 3rd round key to vKey2
! __ addi (keypos, keypos, 16);
! __ lvx (vTmp1, keypos, key);
! __ vperm (vKey2, vTmp1, vTmp2, keyPerm);
// load the 4th round key to vKey3
! __ addi (keypos, keypos, 16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey3, vTmp2, vTmp1, keyPerm);
// load the 5th round key to vKey4
! __ addi (keypos, keypos, 16);
__ lvx (vTmp1, keypos, key);
! __ vperm (vKey4, vTmp1, vTmp2, keyPerm);
// 2nd - 5th rounds
__ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2);
__ vcipher (vRet, vRet, vKey3);
__ vcipher (vRet, vRet, vKey4);
// load the 6th round key to vKey1
! __ addi (keypos, keypos, 16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey1, vTmp2, vTmp1, keyPerm);
// load the 7th round key to vKey2
! __ addi (keypos, keypos, 16);
! __ lvx (vTmp1, keypos, key);
! __ vperm (vKey2, vTmp1, vTmp2, keyPerm);
// load the 8th round key to vKey3
! __ addi (keypos, keypos, 16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey3, vTmp2, vTmp1, keyPerm);
// load the 9th round key to vKey4
! __ addi (keypos, keypos, 16);
__ lvx (vTmp1, keypos, key);
! __ vperm (vKey4, vTmp1, vTmp2, keyPerm);
// 6th - 9th rounds
__ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2);
__ vcipher (vRet, vRet, vKey3);
__ vcipher (vRet, vRet, vKey4);
// load the 10th round key to vKey1
! __ addi (keypos, keypos, 16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey1, vTmp2, vTmp1, keyPerm);
// load the 11th round key to vKey2
! __ addi (keypos, keypos, 16);
__ lvx (vTmp1, keypos, key);
! __ vperm (vKey2, vTmp1, vTmp2, keyPerm);
// if all round keys are loaded, skip next 4 rounds
__ cmpwi (CCR0, keylen, 44);
__ beq (CCR0, L_doLast);
// 10th - 11th rounds
__ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2);
// load the 12th round key to vKey1
! __ addi (keypos, keypos, 16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey1, vTmp2, vTmp1, keyPerm);
// load the 13th round key to vKey2
! __ addi (keypos, keypos, 16);
__ lvx (vTmp1, keypos, key);
! __ vperm (vKey2, vTmp1, vTmp2, keyPerm);
// if all round keys are loaded, skip next 2 rounds
__ cmpwi (CCR0, keylen, 52);
__ beq (CCR0, L_doLast);
// 12th - 13th rounds
__ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2);
// load the 14th round key to vKey1
! __ addi (keypos, keypos, 16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey1, vTmp2, vTmp1, keyPerm);
// load the 15th round key to vKey2
! __ addi (keypos, keypos, 16);
__ lvx (vTmp1, keypos, key);
! __ vperm (vKey2, vTmp1, vTmp2, keyPerm);
__ bind(L_doLast);
// last two rounds
__ vcipher (vRet, vRet, vKey1);
__ vcipherlast (vRet, vRet, vKey2);
! __ neg (temp, to);
! __ lvsr (toPerm, temp);
! __ vspltisb (vTmp2, -1);
! __ vxor (vTmp1, vTmp1, vTmp1);
! __ vperm (vTmp2, vTmp2, vTmp1, toPerm);
! __ vxor (toPerm, toPerm, fSplt);
__ lvx (vTmp1, to);
! __ vperm (vRet, vRet, vRet, toPerm);
! __ vsel (vTmp1, vTmp1, vRet, vTmp2);
! __ lvx (vTmp4, fifteen, to);
__ stvx (vTmp1, to);
- __ vsel (vRet, vRet, vTmp4, vTmp2);
- __ stvx (vRet, fifteen, to);
__ blr();
return start;
}
! // Arguments for generated stub (little endian only):
// R3_ARG1 - source byte array address
// R4_ARG2 - destination byte array address
// R5_ARG3 - K (key) in little endian int array
address generate_aescrypt_decryptBlock() {
assert(UseAES, "need AES instructions and misaligned SSE support");
--- 1997,2170 ----
VectorRegister vTmp1 = VR9;
VectorRegister vTmp2 = VR10;
VectorRegister vTmp3 = VR11;
VectorRegister vTmp4 = VR12;
__ li (fifteen, 15);
// load unaligned from[0-15] to vsRet
__ lvx (vRet, from);
__ lvx (vTmp1, fifteen, from);
__ lvsl (fromPerm, from);
+ #ifdef VM_LITTLE_ENDIAN
+ __ vspltisb (fSplt, 0x0f);
__ vxor (fromPerm, fromPerm, fSplt);
+ #endif
__ vperm (vRet, vRet, vTmp1, fromPerm);
// load keylen (44 or 52 or 60)
__ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
// to load keys
! __ load_perm (keyPerm, key);
! #ifdef VM_LITTLE_ENDIAN
__ vspltisb (vTmp2, -16);
__ vrld (keyPerm, keyPerm, vTmp2);
__ vrld (keyPerm, keyPerm, vTmp2);
! __ vsldoi (keyPerm, keyPerm, keyPerm, 8);
! #endif
! // load the 1st round key to vTmp1
! __ lvx (vTmp1, key);
! __ li (keypos, 16);
__ lvx (vKey1, keypos, key);
! __ vec_perm (vTmp1, vKey1, keyPerm);
// 1st round
! __ vxor (vRet, vRet, vTmp1);
// load the 2nd round key to vKey1
! __ li (keypos, 32);
! __ lvx (vKey2, keypos, key);
! __ vec_perm (vKey1, vKey2, keyPerm);
// load the 3rd round key to vKey2
! __ li (keypos, 48);
! __ lvx (vKey3, keypos, key);
! __ vec_perm (vKey2, vKey3, keyPerm);
// load the 4th round key to vKey3
! __ li (keypos, 64);
! __ lvx (vKey4, keypos, key);
! __ vec_perm (vKey3, vKey4, keyPerm);
// load the 5th round key to vKey4
! __ li (keypos, 80);
__ lvx (vTmp1, keypos, key);
! __ vec_perm (vKey4, vTmp1, keyPerm);
// 2nd - 5th rounds
__ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2);
__ vcipher (vRet, vRet, vKey3);
__ vcipher (vRet, vRet, vKey4);
// load the 6th round key to vKey1
! __ li (keypos, 96);
! __ lvx (vKey2, keypos, key);
! __ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 7th round key to vKey2
! __ li (keypos, 112);
! __ lvx (vKey3, keypos, key);
! __ vec_perm (vKey2, vKey3, keyPerm);
// load the 8th round key to vKey3
! __ li (keypos, 128);
! __ lvx (vKey4, keypos, key);
! __ vec_perm (vKey3, vKey4, keyPerm);
// load the 9th round key to vKey4
! __ li (keypos, 144);
__ lvx (vTmp1, keypos, key);
! __ vec_perm (vKey4, vTmp1, keyPerm);
// 6th - 9th rounds
__ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2);
__ vcipher (vRet, vRet, vKey3);
__ vcipher (vRet, vRet, vKey4);
// load the 10th round key to vKey1
! __ li (keypos, 160);
! __ lvx (vKey2, keypos, key);
! __ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 11th round key to vKey2
! __ li (keypos, 176);
__ lvx (vTmp1, keypos, key);
! __ vec_perm (vKey2, vTmp1, keyPerm);
// if all round keys are loaded, skip next 4 rounds
__ cmpwi (CCR0, keylen, 44);
__ beq (CCR0, L_doLast);
// 10th - 11th rounds
__ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2);
// load the 12th round key to vKey1
! __ li (keypos, 192);
! __ lvx (vKey2, keypos, key);
! __ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 13th round key to vKey2
! __ li (keypos, 208);
__ lvx (vTmp1, keypos, key);
! __ vec_perm (vKey2, vTmp1, keyPerm);
// if all round keys are loaded, skip next 2 rounds
__ cmpwi (CCR0, keylen, 52);
__ beq (CCR0, L_doLast);
// 12th - 13th rounds
__ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2);
// load the 14th round key to vKey1
! __ li (keypos, 224);
! __ lvx (vKey2, keypos, key);
! __ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 15th round key to vKey2
! __ li (keypos, 240);
__ lvx (vTmp1, keypos, key);
! __ vec_perm (vKey2, vTmp1, keyPerm);
__ bind(L_doLast);
// last two rounds
__ vcipher (vRet, vRet, vKey1);
__ vcipherlast (vRet, vRet, vKey2);
! // store result (unaligned)
! #ifdef VM_LITTLE_ENDIAN
! __ lvsl (toPerm, to);
! #else
! __ lvsr (toPerm, to);
! #endif
! __ vspltisb (vTmp3, -1);
! __ vspltisb (vTmp4, 0);
__ lvx (vTmp1, to);
! __ lvx (vTmp2, fifteen, to);
! #ifdef VM_LITTLE_ENDIAN
! __ vperm (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
! __ vxor (toPerm, toPerm, fSplt); // swap bytes
! #else
! __ vperm (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
! #endif
! __ vperm (vTmp4, vRet, vRet, toPerm); // rotate data
! __ vsel (vTmp2, vTmp4, vTmp2, vTmp3);
! __ vsel (vTmp1, vTmp1, vTmp4, vTmp3);
! __ stvx (vTmp2, fifteen, to); // store this one first (may alias)
__ stvx (vTmp1, to);
__ blr();
return start;
}
! // Arguments for generated stub:
// R3_ARG1 - source byte array address
// R4_ARG2 - destination byte array address
// R5_ARG3 - K (key) in little endian int array
address generate_aescrypt_decryptBlock() {
assert(UseAES, "need AES instructions and misaligned SSE support");
*** 2177,2187 ****
Register key = R5_ARG3; // round key array
Register keylen = R8;
Register temp = R9;
Register keypos = R10;
- Register hex = R11;
Register fifteen = R12;
VectorRegister vRet = VR0;
VectorRegister vKey1 = VR1;
--- 2182,2191 ----
*** 2198,2264 ****
VectorRegister vTmp1 = VR10;
VectorRegister vTmp2 = VR11;
VectorRegister vTmp3 = VR12;
VectorRegister vTmp4 = VR13;
- VectorRegister vLow = VR14;
- VectorRegister vHigh = VR15;
-
- __ li (hex, 16);
__ li (fifteen, 15);
- __ vspltisb (fSplt, 0x0f);
// load unaligned from[0-15] to vsRet
__ lvx (vRet, from);
__ lvx (vTmp1, fifteen, from);
__ lvsl (fromPerm, from);
__ vxor (fromPerm, fromPerm, fSplt);
__ vperm (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE]
// load keylen (44 or 52 or 60)
__ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
// to load keys
! __ lvsr (keyPerm, key);
__ vxor (vTmp2, vTmp2, vTmp2);
__ vspltisb (vTmp2, -16);
__ vrld (keyPerm, keyPerm, vTmp2);
__ vrld (keyPerm, keyPerm, vTmp2);
! __ vsldoi (keyPerm, keyPerm, keyPerm, -8);
__ cmpwi (CCR0, keylen, 44);
__ beq (CCR0, L_do44);
__ cmpwi (CCR0, keylen, 52);
__ beq (CCR0, L_do52);
! // load the 15th round key to vKey11
__ li (keypos, 240);
! __ lvx (vTmp1, keypos, key);
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey1, vTmp1, vTmp2, keyPerm);
! // load the 14th round key to vKey10
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp1, keypos, key);
! __ vperm (vKey2, vTmp2, vTmp1, keyPerm);
! // load the 13th round key to vKey10
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey3, vTmp1, vTmp2, keyPerm);
! // load the 12th round key to vKey10
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp1, keypos, key);
! __ vperm (vKey4, vTmp2, vTmp1, keyPerm);
! // load the 11th round key to vKey10
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey5, vTmp1, vTmp2, keyPerm);
// 1st - 5th rounds
__ vxor (vRet, vRet, vKey1);
__ vncipher (vRet, vRet, vKey2);
__ vncipher (vRet, vRet, vKey3);
--- 2202,2268 ----
VectorRegister vTmp1 = VR10;
VectorRegister vTmp2 = VR11;
VectorRegister vTmp3 = VR12;
VectorRegister vTmp4 = VR13;
__ li (fifteen, 15);
// load unaligned from[0-15] to vsRet
__ lvx (vRet, from);
__ lvx (vTmp1, fifteen, from);
__ lvsl (fromPerm, from);
+ #ifdef VM_LITTLE_ENDIAN
+ __ vspltisb (fSplt, 0x0f);
__ vxor (fromPerm, fromPerm, fSplt);
+ #endif
__ vperm (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE]
// load keylen (44 or 52 or 60)
__ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
// to load keys
! __ load_perm (keyPerm, key);
! #ifdef VM_LITTLE_ENDIAN
__ vxor (vTmp2, vTmp2, vTmp2);
__ vspltisb (vTmp2, -16);
__ vrld (keyPerm, keyPerm, vTmp2);
__ vrld (keyPerm, keyPerm, vTmp2);
! __ vsldoi (keyPerm, keyPerm, keyPerm, 8);
! #endif
__ cmpwi (CCR0, keylen, 44);
__ beq (CCR0, L_do44);
__ cmpwi (CCR0, keylen, 52);
__ beq (CCR0, L_do52);
! // load the 15th round key to vKey1
__ li (keypos, 240);
! __ lvx (vKey1, keypos, key);
! __ li (keypos, 224);
! __ lvx (vKey2, keypos, key);
! __ vec_perm (vKey1, vKey2, vKey1, keyPerm);
! // load the 14th round key to vKey2
! __ li (keypos, 208);
! __ lvx (vKey3, keypos, key);
! __ vec_perm (vKey2, vKey3, vKey2, keyPerm);
! // load the 13th round key to vKey3
! __ li (keypos, 192);
! __ lvx (vKey4, keypos, key);
! __ vec_perm (vKey3, vKey4, vKey3, keyPerm);
! // load the 12th round key to vKey4
! __ li (keypos, 176);
! __ lvx (vKey5, keypos, key);
! __ vec_perm (vKey4, vKey5, vKey4, keyPerm);
! // load the 11th round key to vKey5
! __ li (keypos, 160);
! __ lvx (vTmp1, keypos, key);
! __ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
// 1st - 5th rounds
__ vxor (vRet, vRet, vKey1);
__ vncipher (vRet, vRet, vKey2);
__ vncipher (vRet, vRet, vKey3);
*** 2267,2391 ****
__ b (L_doLast);
__ bind (L_do52);
! // load the 13th round key to vKey11
__ li (keypos, 208);
! __ lvx (vTmp1, keypos, key);
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey1, vTmp1, vTmp2, keyPerm);
! // load the 12th round key to vKey10
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp1, keypos, key);
! __ vperm (vKey2, vTmp2, vTmp1, keyPerm);
! // load the 11th round key to vKey10
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey3, vTmp1, vTmp2, keyPerm);
// 1st - 3rd rounds
__ vxor (vRet, vRet, vKey1);
__ vncipher (vRet, vRet, vKey2);
__ vncipher (vRet, vRet, vKey3);
__ b (L_doLast);
__ bind (L_do44);
! // load the 11th round key to vKey11
__ li (keypos, 176);
__ lvx (vTmp1, keypos, key);
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey1, vTmp1, vTmp2, keyPerm);
// 1st round
__ vxor (vRet, vRet, vKey1);
__ bind (L_doLast);
! // load the 10th round key to vKey10
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp1, keypos, key);
! __ vperm (vKey1, vTmp2, vTmp1, keyPerm);
!
! // load the 9th round key to vKey10
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey2, vTmp1, vTmp2, keyPerm);
!
! // load the 8th round key to vKey10
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp1, keypos, key);
! __ vperm (vKey3, vTmp2, vTmp1, keyPerm);
! // load the 7th round key to vKey10
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey4, vTmp1, vTmp2, keyPerm);
! // load the 6th round key to vKey10
! __ addi (keypos, keypos, -16);
__ lvx (vTmp1, keypos, key);
! __ vperm (vKey5, vTmp2, vTmp1, keyPerm);
// last 10th - 6th rounds
__ vncipher (vRet, vRet, vKey1);
__ vncipher (vRet, vRet, vKey2);
__ vncipher (vRet, vRet, vKey3);
__ vncipher (vRet, vRet, vKey4);
__ vncipher (vRet, vRet, vKey5);
! // load the 5th round key to vKey10
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey1, vTmp1, vTmp2, keyPerm);
!
! // load the 4th round key to vKey10
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp1, keypos, key);
! __ vperm (vKey2, vTmp2, vTmp1, keyPerm);
!
! // load the 3rd round key to vKey10
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey3, vTmp1, vTmp2, keyPerm);
!
! // load the 2nd round key to vKey10
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp1, keypos, key);
! __ vperm (vKey4, vTmp2, vTmp1, keyPerm);
!
! // load the 1st round key to vKey10
! __ addi (keypos, keypos, -16);
! __ lvx (vTmp2, keypos, key);
! __ vperm (vKey5, vTmp1, vTmp2, keyPerm);
// last 5th - 1th rounds
__ vncipher (vRet, vRet, vKey1);
__ vncipher (vRet, vRet, vKey2);
__ vncipher (vRet, vRet, vKey3);
__ vncipher (vRet, vRet, vKey4);
__ vncipherlast (vRet, vRet, vKey5);
! __ neg (temp, to);
! __ lvsr (toPerm, temp);
! __ vspltisb (vTmp2, -1);
! __ vxor (vTmp1, vTmp1, vTmp1);
! __ vperm (vTmp2, vTmp2, vTmp1, toPerm);
! __ vxor (toPerm, toPerm, fSplt);
__ lvx (vTmp1, to);
! __ vperm (vRet, vRet, vRet, toPerm);
! __ vsel (vTmp1, vTmp1, vRet, vTmp2);
! __ lvx (vTmp4, fifteen, to);
__ stvx (vTmp1, to);
- __ vsel (vRet, vRet, vTmp4, vTmp2);
- __ stvx (vRet, fifteen, to);
__ blr();
return start;
}
--- 2271,2402 ----
__ b (L_doLast);
__ bind (L_do52);
! // load the 13th round key to vKey1
__ li (keypos, 208);
! __ lvx (vKey1, keypos, key);
! __ li (keypos, 192);
! __ lvx (vKey2, keypos, key);
! __ vec_perm (vKey1, vKey2, vKey1, keyPerm);
! // load the 12th round key to vKey2
! __ li (keypos, 176);
! __ lvx (vKey3, keypos, key);
! __ vec_perm (vKey2, vKey3, vKey2, keyPerm);
! // load the 11th round key to vKey3
! __ li (keypos, 160);
! __ lvx (vTmp1, keypos, key);
! __ vec_perm (vKey3, vTmp1, vKey3, keyPerm);
// 1st - 3rd rounds
__ vxor (vRet, vRet, vKey1);
__ vncipher (vRet, vRet, vKey2);
__ vncipher (vRet, vRet, vKey3);
__ b (L_doLast);
__ bind (L_do44);
! // load the 11th round key to vKey1
__ li (keypos, 176);
+ __ lvx (vKey1, keypos, key);
+ __ li (keypos, 160);
__ lvx (vTmp1, keypos, key);
! __ vec_perm (vKey1, vTmp1, vKey1, keyPerm);
// 1st round
__ vxor (vRet, vRet, vKey1);
__ bind (L_doLast);
! // load the 10th round key to vKey1
! __ li (keypos, 144);
! __ lvx (vKey2, keypos, key);
! __ vec_perm (vKey1, vKey2, vTmp1, keyPerm);
!
! // load the 9th round key to vKey2
! __ li (keypos, 128);
! __ lvx (vKey3, keypos, key);
! __ vec_perm (vKey2, vKey3, vKey2, keyPerm);
! // load the 8th round key to vKey3
! __ li (keypos, 112);
! __ lvx (vKey4, keypos, key);
! __ vec_perm (vKey3, vKey4, vKey3, keyPerm);
!
! // load the 7th round key to vKey4
! __ li (keypos, 96);
! __ lvx (vKey5, keypos, key);
! __ vec_perm (vKey4, vKey5, vKey4, keyPerm);
! // load the 6th round key to vKey5
! __ li (keypos, 80);
__ lvx (vTmp1, keypos, key);
! __ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
// last 10th - 6th rounds
__ vncipher (vRet, vRet, vKey1);
__ vncipher (vRet, vRet, vKey2);
__ vncipher (vRet, vRet, vKey3);
__ vncipher (vRet, vRet, vKey4);
__ vncipher (vRet, vRet, vKey5);
! // load the 5th round key to vKey1
! __ li (keypos, 64);
! __ lvx (vKey2, keypos, key);
! __ vec_perm (vKey1, vKey2, vTmp1, keyPerm);
!
! // load the 4th round key to vKey2
! __ li (keypos, 48);
! __ lvx (vKey3, keypos, key);
! __ vec_perm (vKey2, vKey3, vKey2, keyPerm);
!
! // load the 3rd round key to vKey3
! __ li (keypos, 32);
! __ lvx (vKey4, keypos, key);
! __ vec_perm (vKey3, vKey4, vKey3, keyPerm);
!
! // load the 2nd round key to vKey4
! __ li (keypos, 16);
! __ lvx (vKey5, keypos, key);
! __ vec_perm (vKey4, vKey5, vKey4, keyPerm);
!
! // load the 1st round key to vKey5
! __ lvx (vTmp1, key);
! __ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
// last 5th - 1th rounds
__ vncipher (vRet, vRet, vKey1);
__ vncipher (vRet, vRet, vKey2);
__ vncipher (vRet, vRet, vKey3);
__ vncipher (vRet, vRet, vKey4);
__ vncipherlast (vRet, vRet, vKey5);
! // store result (unaligned)
! #ifdef VM_LITTLE_ENDIAN
! __ lvsl (toPerm, to);
! #else
! __ lvsr (toPerm, to);
! #endif
! __ vspltisb (vTmp3, -1);
! __ vspltisb (vTmp4, 0);
__ lvx (vTmp1, to);
! __ lvx (vTmp2, fifteen, to);
! #ifdef VM_LITTLE_ENDIAN
! __ vperm (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
! __ vxor (toPerm, toPerm, fSplt); // swap bytes
! #else
! __ vperm (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
! #endif
! __ vperm (vTmp4, vRet, vRet, toPerm); // rotate data
! __ vsel (vTmp2, vTmp4, vTmp2, vTmp3);
! __ vsel (vTmp1, vTmp1, vTmp4, vTmp3);
! __ stvx (vTmp2, fifteen, to); // store this one first (may alias)
__ stvx (vTmp1, to);
__ blr();
return start;
}
< prev index next >