1944 address start = __ function_entry(); 1945 1946 gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7); 1947 1948 // save some arguments, disjoint_long_copy_core destroys them. 1949 // needed for post barrier 1950 __ mr(R9_ARG7, R4_ARG2); 1951 __ mr(R10_ARG8, R5_ARG3); 1952 1953 if (UseCompressedOops) { 1954 generate_disjoint_int_copy_core(aligned); 1955 } else { 1956 generate_disjoint_long_copy_core(aligned); 1957 } 1958 1959 gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false); 1960 1961 return start; 1962 } 1963 1964 // Arguments for generated stub (little endian only): 1965 // R3_ARG1 - source byte array address 1966 // R4_ARG2 - destination byte array address 1967 // R5_ARG3 - round key array 1968 address generate_aescrypt_encryptBlock() { 1969 assert(UseAES, "need AES instructions and misaligned SSE support"); 1970 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); 1971 1972 address start = __ function_entry(); 1973 1974 Label L_doLast; 1975 1976 Register from = R3_ARG1; // source array address 1977 Register to = R4_ARG2; // destination array address 1978 Register key = R5_ARG3; // round key array 1979 1980 Register keylen = R8; 1981 Register temp = R9; 1982 Register keypos = R10; 1983 Register hex = R11; 1984 Register fifteen = R12; 1985 1986 VectorRegister vRet = VR0; 1987 1988 VectorRegister vKey1 = VR1; 1989 VectorRegister vKey2 = VR2; 1990 VectorRegister vKey3 = VR3; 1991 VectorRegister vKey4 = VR4; 1992 1993 VectorRegister fromPerm = VR5; 1994 VectorRegister keyPerm = VR6; 1995 VectorRegister toPerm = VR7; 1996 VectorRegister fSplt = VR8; 1997 1998 VectorRegister vTmp1 = VR9; 1999 VectorRegister vTmp2 = VR10; 2000 VectorRegister vTmp3 = VR11; 2001 VectorRegister vTmp4 = VR12; 2002 2003 VectorRegister vLow = VR13; 2004 VectorRegister vHigh = VR14; 2005 2006 __ li (hex, 16); 2007 __ li (fifteen, 15); 2008 __ vspltisb (fSplt, 0x0f); 2009 2010 // load unaligned from[0-15] to vsRet 2011 __ lvx (vRet, from); 2012 __ lvx (vTmp1, fifteen, from); 2013 __ lvsl (fromPerm, from); 2014 __ vxor (fromPerm, fromPerm, fSplt); 2015 __ vperm (vRet, vRet, vTmp1, fromPerm); 2016 2017 // load keylen (44 or 52 or 60) 2018 __ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key); 2019 2020 // to load keys 2021 __ lvsr (keyPerm, key); 2022 __ vxor (vTmp2, vTmp2, vTmp2); 2023 __ vspltisb (vTmp2, -16); 2024 __ vrld (keyPerm, keyPerm, vTmp2); 2025 __ vrld (keyPerm, keyPerm, vTmp2); 2026 __ vsldoi (keyPerm, keyPerm, keyPerm, -8); 2027 2028 // load the 1st round key to vKey1 2029 __ li (keypos, 0); 2030 __ lvx (vKey1, keypos, key); 2031 __ addi (keypos, keypos, 16); 2032 __ lvx (vTmp1, keypos, key); 2033 __ vperm (vKey1, vTmp1, vKey1, keyPerm); 2034 2035 // 1st round 2036 __ vxor (vRet, vRet, vKey1); 2037 2038 // load the 2nd round key to vKey1 2039 __ addi (keypos, keypos, 16); 2040 __ lvx (vTmp2, keypos, key); 2041 __ vperm (vKey1, vTmp2, vTmp1, keyPerm); 2042 2043 // load the 3rd round key to vKey2 2044 __ addi (keypos, keypos, 16); 2045 __ lvx (vTmp1, keypos, key); 2046 __ vperm (vKey2, vTmp1, vTmp2, keyPerm); 2047 2048 // load the 4th round key to vKey3 2049 __ addi (keypos, keypos, 16); 2050 __ lvx (vTmp2, keypos, key); 2051 __ vperm (vKey3, vTmp2, vTmp1, keyPerm); 2052 2053 // load the 5th round key to vKey4 2054 __ addi (keypos, keypos, 16); 2055 __ lvx (vTmp1, keypos, key); 2056 __ vperm (vKey4, vTmp1, vTmp2, keyPerm); 2057 2058 // 2nd - 5th rounds 2059 __ vcipher (vRet, vRet, vKey1); 2060 __ vcipher (vRet, vRet, vKey2); 2061 __ vcipher (vRet, vRet, vKey3); 2062 __ vcipher (vRet, vRet, vKey4); 2063 2064 // load the 6th round key to vKey1 2065 __ addi (keypos, keypos, 16); 2066 __ lvx (vTmp2, keypos, key); 2067 __ vperm (vKey1, vTmp2, vTmp1, keyPerm); 2068 2069 // load the 7th round key to vKey2 2070 __ addi (keypos, keypos, 16); 2071 __ lvx (vTmp1, keypos, key); 2072 __ vperm (vKey2, vTmp1, vTmp2, keyPerm); 2073 2074 // load the 8th round key to vKey3 2075 __ addi (keypos, keypos, 16); 2076 __ lvx (vTmp2, keypos, key); 2077 __ vperm (vKey3, vTmp2, vTmp1, keyPerm); 2078 2079 // load the 9th round key to vKey4 2080 __ addi (keypos, keypos, 16); 2081 __ lvx (vTmp1, keypos, key); 2082 __ vperm (vKey4, vTmp1, vTmp2, keyPerm); 2083 2084 // 6th - 9th rounds 2085 __ vcipher (vRet, vRet, vKey1); 2086 __ vcipher (vRet, vRet, vKey2); 2087 __ vcipher (vRet, vRet, vKey3); 2088 __ vcipher (vRet, vRet, vKey4); 2089 2090 // load the 10th round key to vKey1 2091 __ addi (keypos, keypos, 16); 2092 __ lvx (vTmp2, keypos, key); 2093 __ vperm (vKey1, vTmp2, vTmp1, keyPerm); 2094 2095 // load the 11th round key to vKey2 2096 __ addi (keypos, keypos, 16); 2097 __ lvx (vTmp1, keypos, key); 2098 __ vperm (vKey2, vTmp1, vTmp2, keyPerm); 2099 2100 // if all round keys are loaded, skip next 4 rounds 2101 __ cmpwi (CCR0, keylen, 44); 2102 __ beq (CCR0, L_doLast); 2103 2104 // 10th - 11th rounds 2105 __ vcipher (vRet, vRet, vKey1); 2106 __ vcipher (vRet, vRet, vKey2); 2107 2108 // load the 12th round key to vKey1 2109 __ addi (keypos, keypos, 16); 2110 __ lvx (vTmp2, keypos, key); 2111 __ vperm (vKey1, vTmp2, vTmp1, keyPerm); 2112 2113 // load the 13th round key to vKey2 2114 __ addi (keypos, keypos, 16); 2115 __ lvx (vTmp1, keypos, key); 2116 __ vperm (vKey2, vTmp1, vTmp2, keyPerm); 2117 2118 // if all round keys are loaded, skip next 2 rounds 2119 __ cmpwi (CCR0, keylen, 52); 2120 __ beq (CCR0, L_doLast); 2121 2122 // 12th - 13th rounds 2123 __ vcipher (vRet, vRet, vKey1); 2124 __ vcipher (vRet, vRet, vKey2); 2125 2126 // load the 14th round key to vKey1 2127 __ addi (keypos, keypos, 16); 2128 __ lvx (vTmp2, keypos, key); 2129 __ vperm (vKey1, vTmp2, vTmp1, keyPerm); 2130 2131 // load the 15th round key to vKey2 2132 __ addi (keypos, keypos, 16); 2133 __ lvx (vTmp1, keypos, key); 2134 __ vperm (vKey2, vTmp1, vTmp2, keyPerm); 2135 2136 __ bind(L_doLast); 2137 2138 // last two rounds 2139 __ vcipher (vRet, vRet, vKey1); 2140 __ vcipherlast (vRet, vRet, vKey2); 2141 2142 __ neg (temp, to); 2143 __ lvsr (toPerm, temp); 2144 __ vspltisb (vTmp2, -1); 2145 __ vxor (vTmp1, vTmp1, vTmp1); 2146 __ vperm (vTmp2, vTmp2, vTmp1, toPerm); 2147 __ vxor (toPerm, toPerm, fSplt); 2148 __ lvx (vTmp1, to); 2149 __ vperm (vRet, vRet, vRet, toPerm); 2150 __ vsel (vTmp1, vTmp1, vRet, vTmp2); 2151 __ lvx (vTmp4, fifteen, to); 2152 __ stvx (vTmp1, to); 2153 __ vsel (vRet, vRet, vTmp4, vTmp2); 2154 __ stvx (vRet, fifteen, to); 2155 2156 __ blr(); 2157 return start; 2158 } 2159 2160 // Arguments for generated stub (little endian only): 2161 // R3_ARG1 - source byte array address 2162 // R4_ARG2 - destination byte array address 2163 // R5_ARG3 - K (key) in little endian int array 2164 address generate_aescrypt_decryptBlock() { 2165 assert(UseAES, "need AES instructions and misaligned SSE support"); 2166 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); 2167 2168 address start = __ function_entry(); 2169 2170 Label L_doLast; 2171 Label L_do44; 2172 Label L_do52; 2173 Label L_do60; 2174 2175 Register from = R3_ARG1; // source array address 2176 Register to = R4_ARG2; // destination array address 2177 Register key = R5_ARG3; // round key array 2178 2179 Register keylen = R8; 2180 Register temp = R9; 2181 Register keypos = R10; 2182 Register hex = R11; 2183 Register fifteen = R12; 2184 2185 VectorRegister vRet = VR0; 2186 2187 VectorRegister vKey1 = VR1; 2188 VectorRegister vKey2 = VR2; 2189 VectorRegister vKey3 = VR3; 2190 VectorRegister vKey4 = VR4; 2191 VectorRegister vKey5 = VR5; 2192 2193 VectorRegister fromPerm = VR6; 2194 VectorRegister keyPerm = VR7; 2195 VectorRegister toPerm = VR8; 2196 VectorRegister fSplt = VR9; 2197 2198 VectorRegister vTmp1 = VR10; 2199 VectorRegister vTmp2 = VR11; 2200 VectorRegister vTmp3 = VR12; 2201 VectorRegister vTmp4 = VR13; 2202 2203 VectorRegister vLow = VR14; 2204 VectorRegister vHigh = VR15; 2205 2206 __ li (hex, 16); 2207 __ li (fifteen, 15); 2208 __ vspltisb (fSplt, 0x0f); 2209 2210 // load unaligned from[0-15] to vsRet 2211 __ lvx (vRet, from); 2212 __ lvx (vTmp1, fifteen, from); 2213 __ lvsl (fromPerm, from); 2214 __ vxor (fromPerm, fromPerm, fSplt); 2215 __ vperm (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE] 2216 2217 // load keylen (44 or 52 or 60) 2218 __ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key); 2219 2220 // to load keys 2221 __ lvsr (keyPerm, key); 2222 __ vxor (vTmp2, vTmp2, vTmp2); 2223 __ vspltisb (vTmp2, -16); 2224 __ vrld (keyPerm, keyPerm, vTmp2); 2225 __ vrld (keyPerm, keyPerm, vTmp2); 2226 __ vsldoi (keyPerm, keyPerm, keyPerm, -8); 2227 2228 __ cmpwi (CCR0, keylen, 44); 2229 __ beq (CCR0, L_do44); 2230 2231 __ cmpwi (CCR0, keylen, 52); 2232 __ beq (CCR0, L_do52); 2233 2234 // load the 15th round key to vKey11 2235 __ li (keypos, 240); 2236 __ lvx (vTmp1, keypos, key); 2237 __ addi (keypos, keypos, -16); 2238 __ lvx (vTmp2, keypos, key); 2239 __ vperm (vKey1, vTmp1, vTmp2, keyPerm); 2240 2241 // load the 14th round key to vKey10 2242 __ addi (keypos, keypos, -16); 2243 __ lvx (vTmp1, keypos, key); 2244 __ vperm (vKey2, vTmp2, vTmp1, keyPerm); 2245 2246 // load the 13th round key to vKey10 2247 __ addi (keypos, keypos, -16); 2248 __ lvx (vTmp2, keypos, key); 2249 __ vperm (vKey3, vTmp1, vTmp2, keyPerm); 2250 2251 // load the 12th round key to vKey10 2252 __ addi (keypos, keypos, -16); 2253 __ lvx (vTmp1, keypos, key); 2254 __ vperm (vKey4, vTmp2, vTmp1, keyPerm); 2255 2256 // load the 11th round key to vKey10 2257 __ addi (keypos, keypos, -16); 2258 __ lvx (vTmp2, keypos, key); 2259 __ vperm (vKey5, vTmp1, vTmp2, keyPerm); 2260 2261 // 1st - 5th rounds 2262 __ vxor (vRet, vRet, vKey1); 2263 __ vncipher (vRet, vRet, vKey2); 2264 __ vncipher (vRet, vRet, vKey3); 2265 __ vncipher (vRet, vRet, vKey4); 2266 __ vncipher (vRet, vRet, vKey5); 2267 2268 __ b (L_doLast); 2269 2270 __ bind (L_do52); 2271 2272 // load the 13th round key to vKey11 2273 __ li (keypos, 208); 2274 __ lvx (vTmp1, keypos, key); 2275 __ addi (keypos, keypos, -16); 2276 __ lvx (vTmp2, keypos, key); 2277 __ vperm (vKey1, vTmp1, vTmp2, keyPerm); 2278 2279 // load the 12th round key to vKey10 2280 __ addi (keypos, keypos, -16); 2281 __ lvx (vTmp1, keypos, key); 2282 __ vperm (vKey2, vTmp2, vTmp1, keyPerm); 2283 2284 // load the 11th round key to vKey10 2285 __ addi (keypos, keypos, -16); 2286 __ lvx (vTmp2, keypos, key); 2287 __ vperm (vKey3, vTmp1, vTmp2, keyPerm); 2288 2289 // 1st - 3rd rounds 2290 __ vxor (vRet, vRet, vKey1); 2291 __ vncipher (vRet, vRet, vKey2); 2292 __ vncipher (vRet, vRet, vKey3); 2293 2294 __ b (L_doLast); 2295 2296 __ bind (L_do44); 2297 2298 // load the 11th round key to vKey11 2299 __ li (keypos, 176); 2300 __ lvx (vTmp1, keypos, key); 2301 __ addi (keypos, keypos, -16); 2302 __ lvx (vTmp2, keypos, key); 2303 __ vperm (vKey1, vTmp1, vTmp2, keyPerm); 2304 2305 // 1st round 2306 __ vxor (vRet, vRet, vKey1); 2307 2308 __ bind (L_doLast); 2309 2310 // load the 10th round key to vKey10 2311 __ addi (keypos, keypos, -16); 2312 __ lvx (vTmp1, keypos, key); 2313 __ vperm (vKey1, vTmp2, vTmp1, keyPerm); 2314 2315 // load the 9th round key to vKey10 2316 __ addi (keypos, keypos, -16); 2317 __ lvx (vTmp2, keypos, key); 2318 __ vperm (vKey2, vTmp1, vTmp2, keyPerm); 2319 2320 // load the 8th round key to vKey10 2321 __ addi (keypos, keypos, -16); 2322 __ lvx (vTmp1, keypos, key); 2323 __ vperm (vKey3, vTmp2, vTmp1, keyPerm); 2324 2325 // load the 7th round key to vKey10 2326 __ addi (keypos, keypos, -16); 2327 __ lvx (vTmp2, keypos, key); 2328 __ vperm (vKey4, vTmp1, vTmp2, keyPerm); 2329 2330 // load the 6th round key to vKey10 2331 __ addi (keypos, keypos, -16); 2332 __ lvx (vTmp1, keypos, key); 2333 __ vperm (vKey5, vTmp2, vTmp1, keyPerm); 2334 2335 // last 10th - 6th rounds 2336 __ vncipher (vRet, vRet, vKey1); 2337 __ vncipher (vRet, vRet, vKey2); 2338 __ vncipher (vRet, vRet, vKey3); 2339 __ vncipher (vRet, vRet, vKey4); 2340 __ vncipher (vRet, vRet, vKey5); 2341 2342 // load the 5th round key to vKey10 2343 __ addi (keypos, keypos, -16); 2344 __ lvx (vTmp2, keypos, key); 2345 __ vperm (vKey1, vTmp1, vTmp2, keyPerm); 2346 2347 // load the 4th round key to vKey10 2348 __ addi (keypos, keypos, -16); 2349 __ lvx (vTmp1, keypos, key); 2350 __ vperm (vKey2, vTmp2, vTmp1, keyPerm); 2351 2352 // load the 3rd round key to vKey10 2353 __ addi (keypos, keypos, -16); 2354 __ lvx (vTmp2, keypos, key); 2355 __ vperm (vKey3, vTmp1, vTmp2, keyPerm); 2356 2357 // load the 2nd round key to vKey10 2358 __ addi (keypos, keypos, -16); 2359 __ lvx (vTmp1, keypos, key); 2360 __ vperm (vKey4, vTmp2, vTmp1, keyPerm); 2361 2362 // load the 1st round key to vKey10 2363 __ addi (keypos, keypos, -16); 2364 __ lvx (vTmp2, keypos, key); 2365 __ vperm (vKey5, vTmp1, vTmp2, keyPerm); 2366 2367 // last 5th - 1th rounds 2368 __ vncipher (vRet, vRet, vKey1); 2369 __ vncipher (vRet, vRet, vKey2); 2370 __ vncipher (vRet, vRet, vKey3); 2371 __ vncipher (vRet, vRet, vKey4); 2372 __ vncipherlast (vRet, vRet, vKey5); 2373 2374 __ neg (temp, to); 2375 __ lvsr (toPerm, temp); 2376 __ vspltisb (vTmp2, -1); 2377 __ vxor (vTmp1, vTmp1, vTmp1); 2378 __ vperm (vTmp2, vTmp2, vTmp1, toPerm); 2379 __ vxor (toPerm, toPerm, fSplt); 2380 __ lvx (vTmp1, to); 2381 __ vperm (vRet, vRet, vRet, toPerm); 2382 __ vsel (vTmp1, vTmp1, vRet, vTmp2); 2383 __ lvx (vTmp4, fifteen, to); 2384 __ stvx (vTmp1, to); 2385 __ vsel (vRet, vRet, vTmp4, vTmp2); 2386 __ stvx (vRet, fifteen, to); 2387 2388 __ blr(); 2389 return start; 2390 } 2391 2392 void generate_arraycopy_stubs() { 2393 // Note: the disjoint stubs must be generated first, some of 2394 // the conjoint stubs use them. 2395 2396 // non-aligned disjoint versions 2397 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); 2398 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); 2399 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy"); 2400 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy"); 2401 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy", false); 2402 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy_uninit", true); 2403 2404 // aligned disjoint versions 2405 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy"); 2406 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy"); | 1944 address start = __ function_entry(); 1945 1946 gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7); 1947 1948 // save some arguments, disjoint_long_copy_core destroys them. 1949 // needed for post barrier 1950 __ mr(R9_ARG7, R4_ARG2); 1951 __ mr(R10_ARG8, R5_ARG3); 1952 1953 if (UseCompressedOops) { 1954 generate_disjoint_int_copy_core(aligned); 1955 } else { 1956 generate_disjoint_long_copy_core(aligned); 1957 } 1958 1959 gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false); 1960 1961 return start; 1962 } 1963 1964 // Arguments for generated stub: 1965 // R3_ARG1 - source byte array address 1966 // R4_ARG2 - destination byte array address 1967 // R5_ARG3 - round key array 1968 address generate_aescrypt_encryptBlock() { 1969 assert(UseAES, "need AES instructions and misaligned SSE support"); 1970 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); 1971 1972 address start = __ function_entry(); 1973 1974 Label L_doLast; 1975 1976 Register from = R3_ARG1; // source array address 1977 Register to = R4_ARG2; // destination array address 1978 Register key = R5_ARG3; // round key array 1979 1980 Register keylen = R8; 1981 Register temp = R9; 1982 Register keypos = R10; 1983 Register fifteen = R12; 1984 1985 VectorRegister vRet = VR0; 1986 1987 VectorRegister vKey1 = VR1; 1988 VectorRegister vKey2 = VR2; 1989 VectorRegister vKey3 = VR3; 1990 VectorRegister vKey4 = VR4; 1991 1992 VectorRegister fromPerm = VR5; 1993 VectorRegister keyPerm = VR6; 1994 VectorRegister toPerm = VR7; 1995 VectorRegister fSplt = VR8; 1996 1997 VectorRegister vTmp1 = VR9; 1998 VectorRegister vTmp2 = VR10; 1999 VectorRegister vTmp3 = VR11; 2000 VectorRegister vTmp4 = VR12; 2001 2002 __ li (fifteen, 15); 2003 2004 // load unaligned from[0-15] to vsRet 2005 __ lvx (vRet, from); 2006 __ lvx (vTmp1, fifteen, from); 2007 __ lvsl (fromPerm, from); 2008 #ifdef VM_LITTLE_ENDIAN 2009 __ vspltisb (fSplt, 0x0f); 2010 __ vxor (fromPerm, fromPerm, fSplt); 2011 #endif 2012 __ vperm (vRet, vRet, vTmp1, fromPerm); 2013 2014 // load keylen (44 or 52 or 60) 2015 __ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key); 2016 2017 // to load keys 2018 __ load_perm (keyPerm, key); 2019 #ifdef VM_LITTLE_ENDIAN 2020 __ vspltisb (vTmp2, -16); 2021 __ vrld (keyPerm, keyPerm, vTmp2); 2022 __ vrld (keyPerm, keyPerm, vTmp2); 2023 __ vsldoi (keyPerm, keyPerm, keyPerm, 8); 2024 #endif 2025 2026 // load the 1st round key to vTmp1 2027 __ lvx (vTmp1, key); 2028 __ li (keypos, 16); 2029 __ lvx (vKey1, keypos, key); 2030 __ vec_perm (vTmp1, vKey1, keyPerm); 2031 2032 // 1st round 2033 __ vxor (vRet, vRet, vTmp1); 2034 2035 // load the 2nd round key to vKey1 2036 __ li (keypos, 32); 2037 __ lvx (vKey2, keypos, key); 2038 __ vec_perm (vKey1, vKey2, keyPerm); 2039 2040 // load the 3rd round key to vKey2 2041 __ li (keypos, 48); 2042 __ lvx (vKey3, keypos, key); 2043 __ vec_perm (vKey2, vKey3, keyPerm); 2044 2045 // load the 4th round key to vKey3 2046 __ li (keypos, 64); 2047 __ lvx (vKey4, keypos, key); 2048 __ vec_perm (vKey3, vKey4, keyPerm); 2049 2050 // load the 5th round key to vKey4 2051 __ li (keypos, 80); 2052 __ lvx (vTmp1, keypos, key); 2053 __ vec_perm (vKey4, vTmp1, keyPerm); 2054 2055 // 2nd - 5th rounds 2056 __ vcipher (vRet, vRet, vKey1); 2057 __ vcipher (vRet, vRet, vKey2); 2058 __ vcipher (vRet, vRet, vKey3); 2059 __ vcipher (vRet, vRet, vKey4); 2060 2061 // load the 6th round key to vKey1 2062 __ li (keypos, 96); 2063 __ lvx (vKey2, keypos, key); 2064 __ vec_perm (vKey1, vTmp1, vKey2, keyPerm); 2065 2066 // load the 7th round key to vKey2 2067 __ li (keypos, 112); 2068 __ lvx (vKey3, keypos, key); 2069 __ vec_perm (vKey2, vKey3, keyPerm); 2070 2071 // load the 8th round key to vKey3 2072 __ li (keypos, 128); 2073 __ lvx (vKey4, keypos, key); 2074 __ vec_perm (vKey3, vKey4, keyPerm); 2075 2076 // load the 9th round key to vKey4 2077 __ li (keypos, 144); 2078 __ lvx (vTmp1, keypos, key); 2079 __ vec_perm (vKey4, vTmp1, keyPerm); 2080 2081 // 6th - 9th rounds 2082 __ vcipher (vRet, vRet, vKey1); 2083 __ vcipher (vRet, vRet, vKey2); 2084 __ vcipher (vRet, vRet, vKey3); 2085 __ vcipher (vRet, vRet, vKey4); 2086 2087 // load the 10th round key to vKey1 2088 __ li (keypos, 160); 2089 __ lvx (vKey2, keypos, key); 2090 __ vec_perm (vKey1, vTmp1, vKey2, keyPerm); 2091 2092 // load the 11th round key to vKey2 2093 __ li (keypos, 176); 2094 __ lvx (vTmp1, keypos, key); 2095 __ vec_perm (vKey2, vTmp1, keyPerm); 2096 2097 // if all round keys are loaded, skip next 4 rounds 2098 __ cmpwi (CCR0, keylen, 44); 2099 __ beq (CCR0, L_doLast); 2100 2101 // 10th - 11th rounds 2102 __ vcipher (vRet, vRet, vKey1); 2103 __ vcipher (vRet, vRet, vKey2); 2104 2105 // load the 12th round key to vKey1 2106 __ li (keypos, 192); 2107 __ lvx (vKey2, keypos, key); 2108 __ vec_perm (vKey1, vTmp1, vKey2, keyPerm); 2109 2110 // load the 13th round key to vKey2 2111 __ li (keypos, 208); 2112 __ lvx (vTmp1, keypos, key); 2113 __ vec_perm (vKey2, vTmp1, keyPerm); 2114 2115 // if all round keys are loaded, skip next 2 rounds 2116 __ cmpwi (CCR0, keylen, 52); 2117 __ beq (CCR0, L_doLast); 2118 2119 // 12th - 13th rounds 2120 __ vcipher (vRet, vRet, vKey1); 2121 __ vcipher (vRet, vRet, vKey2); 2122 2123 // load the 14th round key to vKey1 2124 __ li (keypos, 224); 2125 __ lvx (vKey2, keypos, key); 2126 __ vec_perm (vKey1, vTmp1, vKey2, keyPerm); 2127 2128 // load the 15th round key to vKey2 2129 __ li (keypos, 240); 2130 __ lvx (vTmp1, keypos, key); 2131 __ vec_perm (vKey2, vTmp1, keyPerm); 2132 2133 __ bind(L_doLast); 2134 2135 // last two rounds 2136 __ vcipher (vRet, vRet, vKey1); 2137 __ vcipherlast (vRet, vRet, vKey2); 2138 2139 // store result (unaligned) 2140 #ifdef VM_LITTLE_ENDIAN 2141 __ lvsl (toPerm, to); 2142 #else 2143 __ lvsr (toPerm, to); 2144 #endif 2145 __ vspltisb (vTmp3, -1); 2146 __ vspltisb (vTmp4, 0); 2147 __ lvx (vTmp1, to); 2148 __ lvx (vTmp2, fifteen, to); 2149 #ifdef VM_LITTLE_ENDIAN 2150 __ vperm (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask 2151 __ vxor (toPerm, toPerm, fSplt); // swap bytes 2152 #else 2153 __ vperm (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask 2154 #endif 2155 __ vperm (vTmp4, vRet, vRet, toPerm); // rotate data 2156 __ vsel (vTmp2, vTmp4, vTmp2, vTmp3); 2157 __ vsel (vTmp1, vTmp1, vTmp4, vTmp3); 2158 __ stvx (vTmp2, fifteen, to); // store this one first (may alias) 2159 __ stvx (vTmp1, to); 2160 2161 __ blr(); 2162 return start; 2163 } 2164 2165 // Arguments for generated stub: 2166 // R3_ARG1 - source byte array address 2167 // R4_ARG2 - destination byte array address 2168 // R5_ARG3 - K (key) in little endian int array 2169 address generate_aescrypt_decryptBlock() { 2170 assert(UseAES, "need AES instructions and misaligned SSE support"); 2171 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); 2172 2173 address start = __ function_entry(); 2174 2175 Label L_doLast; 2176 Label L_do44; 2177 Label L_do52; 2178 Label L_do60; 2179 2180 Register from = R3_ARG1; // source array address 2181 Register to = R4_ARG2; // destination array address 2182 Register key = R5_ARG3; // round key array 2183 2184 Register keylen = R8; 2185 Register temp = R9; 2186 Register keypos = R10; 2187 Register fifteen = R12; 2188 2189 VectorRegister vRet = VR0; 2190 2191 VectorRegister vKey1 = VR1; 2192 VectorRegister vKey2 = VR2; 2193 VectorRegister vKey3 = VR3; 2194 VectorRegister vKey4 = VR4; 2195 VectorRegister vKey5 = VR5; 2196 2197 VectorRegister fromPerm = VR6; 2198 VectorRegister keyPerm = VR7; 2199 VectorRegister toPerm = VR8; 2200 VectorRegister fSplt = VR9; 2201 2202 VectorRegister vTmp1 = VR10; 2203 VectorRegister vTmp2 = VR11; 2204 VectorRegister vTmp3 = VR12; 2205 VectorRegister vTmp4 = VR13; 2206 2207 __ li (fifteen, 15); 2208 2209 // load unaligned from[0-15] to vsRet 2210 __ lvx (vRet, from); 2211 __ lvx (vTmp1, fifteen, from); 2212 __ lvsl (fromPerm, from); 2213 #ifdef VM_LITTLE_ENDIAN 2214 __ vspltisb (fSplt, 0x0f); 2215 __ vxor (fromPerm, fromPerm, fSplt); 2216 #endif 2217 __ vperm (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE] 2218 2219 // load keylen (44 or 52 or 60) 2220 __ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key); 2221 2222 // to load keys 2223 __ load_perm (keyPerm, key); 2224 #ifdef VM_LITTLE_ENDIAN 2225 __ vxor (vTmp2, vTmp2, vTmp2); 2226 __ vspltisb (vTmp2, -16); 2227 __ vrld (keyPerm, keyPerm, vTmp2); 2228 __ vrld (keyPerm, keyPerm, vTmp2); 2229 __ vsldoi (keyPerm, keyPerm, keyPerm, 8); 2230 #endif 2231 2232 __ cmpwi (CCR0, keylen, 44); 2233 __ beq (CCR0, L_do44); 2234 2235 __ cmpwi (CCR0, keylen, 52); 2236 __ beq (CCR0, L_do52); 2237 2238 // load the 15th round key to vKey1 2239 __ li (keypos, 240); 2240 __ lvx (vKey1, keypos, key); 2241 __ li (keypos, 224); 2242 __ lvx (vKey2, keypos, key); 2243 __ vec_perm (vKey1, vKey2, vKey1, keyPerm); 2244 2245 // load the 14th round key to vKey2 2246 __ li (keypos, 208); 2247 __ lvx (vKey3, keypos, key); 2248 __ vec_perm (vKey2, vKey3, vKey2, keyPerm); 2249 2250 // load the 13th round key to vKey3 2251 __ li (keypos, 192); 2252 __ lvx (vKey4, keypos, key); 2253 __ vec_perm (vKey3, vKey4, vKey3, keyPerm); 2254 2255 // load the 12th round key to vKey4 2256 __ li (keypos, 176); 2257 __ lvx (vKey5, keypos, key); 2258 __ vec_perm (vKey4, vKey5, vKey4, keyPerm); 2259 2260 // load the 11th round key to vKey5 2261 __ li (keypos, 160); 2262 __ lvx (vTmp1, keypos, key); 2263 __ vec_perm (vKey5, vTmp1, vKey5, keyPerm); 2264 2265 // 1st - 5th rounds 2266 __ vxor (vRet, vRet, vKey1); 2267 __ vncipher (vRet, vRet, vKey2); 2268 __ vncipher (vRet, vRet, vKey3); 2269 __ vncipher (vRet, vRet, vKey4); 2270 __ vncipher (vRet, vRet, vKey5); 2271 2272 __ b (L_doLast); 2273 2274 __ bind (L_do52); 2275 2276 // load the 13th round key to vKey1 2277 __ li (keypos, 208); 2278 __ lvx (vKey1, keypos, key); 2279 __ li (keypos, 192); 2280 __ lvx (vKey2, keypos, key); 2281 __ vec_perm (vKey1, vKey2, vKey1, keyPerm); 2282 2283 // load the 12th round key to vKey2 2284 __ li (keypos, 176); 2285 __ lvx (vKey3, keypos, key); 2286 __ vec_perm (vKey2, vKey3, vKey2, keyPerm); 2287 2288 // load the 11th round key to vKey3 2289 __ li (keypos, 160); 2290 __ lvx (vTmp1, keypos, key); 2291 __ vec_perm (vKey3, vTmp1, vKey3, keyPerm); 2292 2293 // 1st - 3rd rounds 2294 __ vxor (vRet, vRet, vKey1); 2295 __ vncipher (vRet, vRet, vKey2); 2296 __ vncipher (vRet, vRet, vKey3); 2297 2298 __ b (L_doLast); 2299 2300 __ bind (L_do44); 2301 2302 // load the 11th round key to vKey1 2303 __ li (keypos, 176); 2304 __ lvx (vKey1, keypos, key); 2305 __ li (keypos, 160); 2306 __ lvx (vTmp1, keypos, key); 2307 __ vec_perm (vKey1, vTmp1, vKey1, keyPerm); 2308 2309 // 1st round 2310 __ vxor (vRet, vRet, vKey1); 2311 2312 __ bind (L_doLast); 2313 2314 // load the 10th round key to vKey1 2315 __ li (keypos, 144); 2316 __ lvx (vKey2, keypos, key); 2317 __ vec_perm (vKey1, vKey2, vTmp1, keyPerm); 2318 2319 // load the 9th round key to vKey2 2320 __ li (keypos, 128); 2321 __ lvx (vKey3, keypos, key); 2322 __ vec_perm (vKey2, vKey3, vKey2, keyPerm); 2323 2324 // load the 8th round key to vKey3 2325 __ li (keypos, 112); 2326 __ lvx (vKey4, keypos, key); 2327 __ vec_perm (vKey3, vKey4, vKey3, keyPerm); 2328 2329 // load the 7th round key to vKey4 2330 __ li (keypos, 96); 2331 __ lvx (vKey5, keypos, key); 2332 __ vec_perm (vKey4, vKey5, vKey4, keyPerm); 2333 2334 // load the 6th round key to vKey5 2335 __ li (keypos, 80); 2336 __ lvx (vTmp1, keypos, key); 2337 __ vec_perm (vKey5, vTmp1, vKey5, keyPerm); 2338 2339 // last 10th - 6th rounds 2340 __ vncipher (vRet, vRet, vKey1); 2341 __ vncipher (vRet, vRet, vKey2); 2342 __ vncipher (vRet, vRet, vKey3); 2343 __ vncipher (vRet, vRet, vKey4); 2344 __ vncipher (vRet, vRet, vKey5); 2345 2346 // load the 5th round key to vKey1 2347 __ li (keypos, 64); 2348 __ lvx (vKey2, keypos, key); 2349 __ vec_perm (vKey1, vKey2, vTmp1, keyPerm); 2350 2351 // load the 4th round key to vKey2 2352 __ li (keypos, 48); 2353 __ lvx (vKey3, keypos, key); 2354 __ vec_perm (vKey2, vKey3, vKey2, keyPerm); 2355 2356 // load the 3rd round key to vKey3 2357 __ li (keypos, 32); 2358 __ lvx (vKey4, keypos, key); 2359 __ vec_perm (vKey3, vKey4, vKey3, keyPerm); 2360 2361 // load the 2nd round key to vKey4 2362 __ li (keypos, 16); 2363 __ lvx (vKey5, keypos, key); 2364 __ vec_perm (vKey4, vKey5, vKey4, keyPerm); 2365 2366 // load the 1st round key to vKey5 2367 __ lvx (vTmp1, key); 2368 __ vec_perm (vKey5, vTmp1, vKey5, keyPerm); 2369 2370 // last 5th - 1th rounds 2371 __ vncipher (vRet, vRet, vKey1); 2372 __ vncipher (vRet, vRet, vKey2); 2373 __ vncipher (vRet, vRet, vKey3); 2374 __ vncipher (vRet, vRet, vKey4); 2375 __ vncipherlast (vRet, vRet, vKey5); 2376 2377 // store result (unaligned) 2378 #ifdef VM_LITTLE_ENDIAN 2379 __ lvsl (toPerm, to); 2380 #else 2381 __ lvsr (toPerm, to); 2382 #endif 2383 __ vspltisb (vTmp3, -1); 2384 __ vspltisb (vTmp4, 0); 2385 __ lvx (vTmp1, to); 2386 __ lvx (vTmp2, fifteen, to); 2387 #ifdef VM_LITTLE_ENDIAN 2388 __ vperm (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask 2389 __ vxor (toPerm, toPerm, fSplt); // swap bytes 2390 #else 2391 __ vperm (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask 2392 #endif 2393 __ vperm (vTmp4, vRet, vRet, toPerm); // rotate data 2394 __ vsel (vTmp2, vTmp4, vTmp2, vTmp3); 2395 __ vsel (vTmp1, vTmp1, vTmp4, vTmp3); 2396 __ stvx (vTmp2, fifteen, to); // store this one first (may alias) 2397 __ stvx (vTmp1, to); 2398 2399 __ blr(); 2400 return start; 2401 } 2402 2403 void generate_arraycopy_stubs() { 2404 // Note: the disjoint stubs must be generated first, some of 2405 // the conjoint stubs use them. 2406 2407 // non-aligned disjoint versions 2408 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); 2409 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); 2410 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy"); 2411 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy"); 2412 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy", false); 2413 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy_uninit", true); 2414 2415 // aligned disjoint versions 2416 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy"); 2417 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy"); |