< prev index next >

src/cpu/ppc/vm/stubGenerator_ppc.cpp

Print this page




1944     address start = __ function_entry();
1945 
1946     gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7);
1947 
1948     // save some arguments, disjoint_long_copy_core destroys them.
1949     // needed for post barrier
1950     __ mr(R9_ARG7, R4_ARG2);
1951     __ mr(R10_ARG8, R5_ARG3);
1952 
1953     if (UseCompressedOops) {
1954       generate_disjoint_int_copy_core(aligned);
1955     } else {
1956       generate_disjoint_long_copy_core(aligned);
1957     }
1958 
1959     gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
1960 
1961     return start;
1962   }
1963 
1964   // Arguments for generated stub (little endian only):
1965   //   R3_ARG1   - source byte array address
1966   //   R4_ARG2   - destination byte array address
1967   //   R5_ARG3   - round key array
1968   address generate_aescrypt_encryptBlock() {
1969     assert(UseAES, "need AES instructions and misaligned SSE support");
1970     StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
1971 
1972     address start = __ function_entry();
1973 
1974     Label L_doLast;
1975 
1976     Register from           = R3_ARG1;  // source array address
1977     Register to             = R4_ARG2;  // destination array address
1978     Register key            = R5_ARG3;  // round key array
1979 
1980     Register keylen         = R8;
1981     Register temp           = R9;
1982     Register keypos         = R10;
1983     Register hex            = R11;
1984     Register fifteen        = R12;
1985 
1986     VectorRegister vRet     = VR0;
1987 
1988     VectorRegister vKey1    = VR1;
1989     VectorRegister vKey2    = VR2;
1990     VectorRegister vKey3    = VR3;
1991     VectorRegister vKey4    = VR4;
1992 
1993     VectorRegister fromPerm = VR5;
1994     VectorRegister keyPerm  = VR6;
1995     VectorRegister toPerm   = VR7;
1996     VectorRegister fSplt    = VR8;
1997 
1998     VectorRegister vTmp1    = VR9;
1999     VectorRegister vTmp2    = VR10;
2000     VectorRegister vTmp3    = VR11;
2001     VectorRegister vTmp4    = VR12;
2002 
2003     VectorRegister vLow     = VR13;
2004     VectorRegister vHigh    = VR14;
2005 
2006     __ li              (hex, 16);
2007     __ li              (fifteen, 15);
2008     __ vspltisb        (fSplt, 0x0f);
2009 
2010     // load unaligned from[0-15] to vsRet
2011     __ lvx             (vRet, from);
2012     __ lvx             (vTmp1, fifteen, from);
2013     __ lvsl            (fromPerm, from);


2014     __ vxor            (fromPerm, fromPerm, fSplt);

2015     __ vperm           (vRet, vRet, vTmp1, fromPerm);
2016 
2017     // load keylen (44 or 52 or 60)
2018     __ lwz             (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
2019 
2020     // to load keys
2021     __ lvsr            (keyPerm, key);
2022     __ vxor            (vTmp2, vTmp2, vTmp2);
2023     __ vspltisb        (vTmp2, -16);
2024     __ vrld            (keyPerm, keyPerm, vTmp2);
2025     __ vrld            (keyPerm, keyPerm, vTmp2);
2026     __ vsldoi          (keyPerm, keyPerm, keyPerm, -8);

2027 
2028     // load the 1st round key to vKey1
2029     __ li              (keypos, 0);

2030     __ lvx             (vKey1, keypos, key);
2031     __ addi            (keypos, keypos, 16);
2032     __ lvx             (vTmp1, keypos, key);
2033     __ vperm           (vKey1, vTmp1, vKey1, keyPerm);
2034 
2035     // 1st round
2036     __ vxor (vRet, vRet, vKey1);
2037 
2038     // load the 2nd round key to vKey1
2039     __ addi            (keypos, keypos, 16);
2040     __ lvx             (vTmp2, keypos, key);
2041     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2042 
2043     // load the 3rd round key to vKey2
2044     __ addi            (keypos, keypos, 16);
2045     __ lvx             (vTmp1, keypos, key);
2046     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2047 
2048     // load the 4th round key to vKey3
2049     __ addi            (keypos, keypos, 16);
2050     __ lvx             (vTmp2, keypos, key);
2051     __ vperm           (vKey3, vTmp2, vTmp1, keyPerm);
2052 
2053     // load the 5th round key to vKey4
2054     __ addi            (keypos, keypos, 16);
2055     __ lvx             (vTmp1, keypos, key);
2056     __ vperm           (vKey4, vTmp1, vTmp2, keyPerm);
2057 
2058     // 2nd - 5th rounds
2059     __ vcipher (vRet, vRet, vKey1);
2060     __ vcipher (vRet, vRet, vKey2);
2061     __ vcipher (vRet, vRet, vKey3);
2062     __ vcipher (vRet, vRet, vKey4);
2063 
2064     // load the 6th round key to vKey1
2065     __ addi            (keypos, keypos, 16);
2066     __ lvx             (vTmp2, keypos, key);
2067     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2068 
2069     // load the 7th round key to vKey2
2070     __ addi            (keypos, keypos, 16);
2071     __ lvx             (vTmp1, keypos, key);
2072     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2073 
2074     // load the 8th round key to vKey3
2075     __ addi            (keypos, keypos, 16);
2076     __ lvx             (vTmp2, keypos, key);
2077     __ vperm           (vKey3, vTmp2, vTmp1, keyPerm);
2078 
2079     // load the 9th round key to vKey4
2080     __ addi            (keypos, keypos, 16);
2081     __ lvx             (vTmp1, keypos, key);
2082     __ vperm           (vKey4, vTmp1, vTmp2, keyPerm);
2083 
2084     // 6th - 9th rounds
2085     __ vcipher (vRet, vRet, vKey1);
2086     __ vcipher (vRet, vRet, vKey2);
2087     __ vcipher (vRet, vRet, vKey3);
2088     __ vcipher (vRet, vRet, vKey4);
2089 
2090     // load the 10th round key to vKey1
2091     __ addi            (keypos, keypos, 16);
2092     __ lvx             (vTmp2, keypos, key);
2093     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2094 
2095     // load the 11th round key to vKey2
2096     __ addi            (keypos, keypos, 16);
2097     __ lvx             (vTmp1, keypos, key);
2098     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2099 
2100     // if all round keys are loaded, skip next 4 rounds
2101     __ cmpwi           (CCR0, keylen, 44);
2102     __ beq             (CCR0, L_doLast);
2103 
2104     // 10th - 11th rounds
2105     __ vcipher (vRet, vRet, vKey1);
2106     __ vcipher (vRet, vRet, vKey2);
2107 
2108     // load the 12th round key to vKey1
2109     __ addi            (keypos, keypos, 16);
2110     __ lvx             (vTmp2, keypos, key);
2111     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2112 
2113     // load the 13th round key to vKey2
2114     __ addi            (keypos, keypos, 16);
2115     __ lvx             (vTmp1, keypos, key);
2116     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2117 
2118     // if all round keys are loaded, skip next 2 rounds
2119     __ cmpwi           (CCR0, keylen, 52);
2120     __ beq             (CCR0, L_doLast);
2121 
2122     // 12th - 13th rounds
2123     __ vcipher (vRet, vRet, vKey1);
2124     __ vcipher (vRet, vRet, vKey2);
2125 
2126     // load the 14th round key to vKey1
2127     __ addi            (keypos, keypos, 16);
2128     __ lvx             (vTmp2, keypos, key);
2129     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2130 
2131     // load the 15th round key to vKey2
2132     __ addi            (keypos, keypos, 16);
2133     __ lvx             (vTmp1, keypos, key);
2134     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2135 
2136     __ bind(L_doLast);
2137 
2138     // last two rounds
2139     __ vcipher (vRet, vRet, vKey1);
2140     __ vcipherlast (vRet, vRet, vKey2);
2141 
2142     __ neg             (temp, to);
2143     __ lvsr            (toPerm, temp);
2144     __ vspltisb        (vTmp2, -1);
2145     __ vxor            (vTmp1, vTmp1, vTmp1);
2146     __ vperm           (vTmp2, vTmp2, vTmp1, toPerm);
2147     __ vxor            (toPerm, toPerm, fSplt);


2148     __ lvx             (vTmp1, to);
2149     __ vperm           (vRet, vRet, vRet, toPerm);
2150     __ vsel            (vTmp1, vTmp1, vRet, vTmp2);
2151     __ lvx             (vTmp4, fifteen, to);








2152     __ stvx            (vTmp1, to);
2153     __ vsel            (vRet, vRet, vTmp4, vTmp2);
2154     __ stvx            (vRet, fifteen, to);
2155 
2156     __ blr();
2157      return start;
2158   }
2159 
2160   // Arguments for generated stub (little endian only):
2161   //   R3_ARG1   - source byte array address
2162   //   R4_ARG2   - destination byte array address
2163   //   R5_ARG3   - K (key) in little endian int array
2164   address generate_aescrypt_decryptBlock() {
2165     assert(UseAES, "need AES instructions and misaligned SSE support");
2166     StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
2167 
2168     address start = __ function_entry();
2169 
2170     Label L_doLast;
2171     Label L_do44;
2172     Label L_do52;
2173     Label L_do60;
2174 
2175     Register from           = R3_ARG1;  // source array address
2176     Register to             = R4_ARG2;  // destination array address
2177     Register key            = R5_ARG3;  // round key array
2178 
2179     Register keylen         = R8;
2180     Register temp           = R9;
2181     Register keypos         = R10;
2182     Register hex            = R11;
2183     Register fifteen        = R12;
2184 
2185     VectorRegister vRet     = VR0;
2186 
2187     VectorRegister vKey1    = VR1;
2188     VectorRegister vKey2    = VR2;
2189     VectorRegister vKey3    = VR3;
2190     VectorRegister vKey4    = VR4;
2191     VectorRegister vKey5    = VR5;
2192 
2193     VectorRegister fromPerm = VR6;
2194     VectorRegister keyPerm  = VR7;
2195     VectorRegister toPerm   = VR8;
2196     VectorRegister fSplt    = VR9;
2197 
2198     VectorRegister vTmp1    = VR10;
2199     VectorRegister vTmp2    = VR11;
2200     VectorRegister vTmp3    = VR12;
2201     VectorRegister vTmp4    = VR13;
2202 
2203     VectorRegister vLow     = VR14;
2204     VectorRegister vHigh    = VR15;
2205 
2206     __ li              (hex, 16);
2207     __ li              (fifteen, 15);
2208     __ vspltisb        (fSplt, 0x0f);
2209 
2210     // load unaligned from[0-15] to vsRet
2211     __ lvx             (vRet, from);
2212     __ lvx             (vTmp1, fifteen, from);
2213     __ lvsl            (fromPerm, from);


2214     __ vxor            (fromPerm, fromPerm, fSplt);

2215     __ vperm           (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE]
2216 
2217     // load keylen (44 or 52 or 60)
2218     __ lwz             (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
2219 
2220     // to load keys
2221     __ lvsr            (keyPerm, key);

2222     __ vxor            (vTmp2, vTmp2, vTmp2);
2223     __ vspltisb        (vTmp2, -16);
2224     __ vrld            (keyPerm, keyPerm, vTmp2);
2225     __ vrld            (keyPerm, keyPerm, vTmp2);
2226     __ vsldoi          (keyPerm, keyPerm, keyPerm, -8);

2227 
2228     __ cmpwi           (CCR0, keylen, 44);
2229     __ beq             (CCR0, L_do44);
2230 
2231     __ cmpwi           (CCR0, keylen, 52);
2232     __ beq             (CCR0, L_do52);
2233 
2234     // load the 15th round key to vKey11
2235     __ li              (keypos, 240);
2236     __ lvx             (vTmp1, keypos, key);
2237     __ addi            (keypos, keypos, -16);
2238     __ lvx             (vTmp2, keypos, key);
2239     __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
2240 
2241     // load the 14th round key to vKey10
2242     __ addi            (keypos, keypos, -16);
2243     __ lvx             (vTmp1, keypos, key);
2244     __ vperm           (vKey2, vTmp2, vTmp1, keyPerm);
2245 
2246     // load the 13th round key to vKey10
2247     __ addi            (keypos, keypos, -16);
2248     __ lvx             (vTmp2, keypos, key);
2249     __ vperm           (vKey3, vTmp1, vTmp2, keyPerm);
2250 
2251     // load the 12th round key to vKey10
2252     __ addi            (keypos, keypos, -16);
2253     __ lvx             (vTmp1, keypos, key);
2254     __ vperm           (vKey4, vTmp2, vTmp1, keyPerm);
2255 
2256     // load the 11th round key to vKey10
2257     __ addi            (keypos, keypos, -16);
2258     __ lvx             (vTmp2, keypos, key);
2259     __ vperm           (vKey5, vTmp1, vTmp2, keyPerm);
2260 
2261     // 1st - 5th rounds
2262     __ vxor            (vRet, vRet, vKey1);
2263     __ vncipher        (vRet, vRet, vKey2);
2264     __ vncipher        (vRet, vRet, vKey3);
2265     __ vncipher        (vRet, vRet, vKey4);
2266     __ vncipher        (vRet, vRet, vKey5);
2267 
2268     __ b               (L_doLast);
2269 
2270     __ bind            (L_do52);
2271 
2272     // load the 13th round key to vKey11
2273     __ li              (keypos, 208);
2274     __ lvx             (vTmp1, keypos, key);
2275     __ addi            (keypos, keypos, -16);
2276     __ lvx             (vTmp2, keypos, key);
2277     __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
2278 
2279     // load the 12th round key to vKey10
2280     __ addi            (keypos, keypos, -16);
2281     __ lvx             (vTmp1, keypos, key);
2282     __ vperm           (vKey2, vTmp2, vTmp1, keyPerm);
2283 
2284     // load the 11th round key to vKey10
2285     __ addi            (keypos, keypos, -16);
2286     __ lvx             (vTmp2, keypos, key);
2287     __ vperm           (vKey3, vTmp1, vTmp2, keyPerm);
2288 
2289     // 1st - 3rd rounds
2290     __ vxor            (vRet, vRet, vKey1);
2291     __ vncipher        (vRet, vRet, vKey2);
2292     __ vncipher        (vRet, vRet, vKey3);
2293 
2294     __ b               (L_doLast);
2295 
2296     __ bind            (L_do44);
2297 
2298     // load the 11th round key to vKey11
2299     __ li              (keypos, 176);


2300     __ lvx             (vTmp1, keypos, key);
2301     __ addi            (keypos, keypos, -16);
2302     __ lvx             (vTmp2, keypos, key);
2303     __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
2304 
2305     // 1st round
2306     __ vxor            (vRet, vRet, vKey1);
2307 
2308     __ bind            (L_doLast);
2309 
2310     // load the 10th round key to vKey10
2311     __ addi            (keypos, keypos, -16);
2312     __ lvx             (vTmp1, keypos, key);
2313     __ vperm           (vKey1, vTmp2, vTmp1, keyPerm);
2314 
2315     // load the 9th round key to vKey10
2316     __ addi            (keypos, keypos, -16);
2317     __ lvx             (vTmp2, keypos, key);
2318     __ vperm           (vKey2, vTmp1, vTmp2, keyPerm);
2319 
2320     // load the 8th round key to vKey10
2321     __ addi            (keypos, keypos, -16);
2322     __ lvx             (vTmp1, keypos, key);
2323     __ vperm           (vKey3, vTmp2, vTmp1, keyPerm);
2324 
2325     // load the 7th round key to vKey10
2326     __ addi            (keypos, keypos, -16);
2327     __ lvx             (vTmp2, keypos, key);
2328     __ vperm           (vKey4, vTmp1, vTmp2, keyPerm);





2329 
2330     // load the 6th round key to vKey10
2331     __ addi            (keypos, keypos, -16);
2332     __ lvx             (vTmp1, keypos, key);
2333     __ vperm           (vKey5, vTmp2, vTmp1, keyPerm);
2334 
2335     // last 10th - 6th rounds
2336     __ vncipher        (vRet, vRet, vKey1);
2337     __ vncipher        (vRet, vRet, vKey2);
2338     __ vncipher        (vRet, vRet, vKey3);
2339     __ vncipher        (vRet, vRet, vKey4);
2340     __ vncipher        (vRet, vRet, vKey5);
2341 
2342     // load the 5th round key to vKey10
2343     __ addi            (keypos, keypos, -16);
2344     __ lvx             (vTmp2, keypos, key);
2345     __ vperm           (vKey1, vTmp1, vTmp2, keyPerm);
2346 
2347     // load the 4th round key to vKey10
2348     __ addi            (keypos, keypos, -16);
2349     __ lvx             (vTmp1, keypos, key);
2350     __ vperm           (vKey2, vTmp2, vTmp1, keyPerm);
2351 
2352     // load the 3rd round key to vKey10
2353     __ addi            (keypos, keypos, -16);
2354     __ lvx             (vTmp2, keypos, key);
2355     __ vperm           (vKey3, vTmp1, vTmp2, keyPerm);
2356 
2357     // load the 2nd round key to vKey10
2358     __ addi            (keypos, keypos, -16);
2359     __ lvx             (vTmp1, keypos, key);
2360     __ vperm           (vKey4, vTmp2, vTmp1, keyPerm);
2361 
2362     // load the 1st round key to vKey10
2363     __ addi            (keypos, keypos, -16);
2364     __ lvx             (vTmp2, keypos, key);
2365     __ vperm           (vKey5, vTmp1, vTmp2, keyPerm);
2366 
2367     // last 5th - 1th rounds
2368     __ vncipher        (vRet, vRet, vKey1);
2369     __ vncipher        (vRet, vRet, vKey2);
2370     __ vncipher        (vRet, vRet, vKey3);
2371     __ vncipher        (vRet, vRet, vKey4);
2372     __ vncipherlast    (vRet, vRet, vKey5);
2373 
2374     __ neg             (temp, to);
2375     __ lvsr            (toPerm, temp);
2376     __ vspltisb        (vTmp2, -1);
2377     __ vxor            (vTmp1, vTmp1, vTmp1);
2378     __ vperm           (vTmp2, vTmp2, vTmp1, toPerm);
2379     __ vxor            (toPerm, toPerm, fSplt);


2380     __ lvx             (vTmp1, to);
2381     __ vperm           (vRet, vRet, vRet, toPerm);
2382     __ vsel            (vTmp1, vTmp1, vRet, vTmp2);
2383     __ lvx             (vTmp4, fifteen, to);








2384     __ stvx            (vTmp1, to);
2385     __ vsel            (vRet, vRet, vTmp4, vTmp2);
2386     __ stvx            (vRet, fifteen, to);
2387 
2388     __ blr();
2389      return start;
2390   }
2391 
2392   void generate_arraycopy_stubs() {
2393     // Note: the disjoint stubs must be generated first, some of
2394     // the conjoint stubs use them.
2395 
2396     // non-aligned disjoint versions
2397     StubRoutines::_jbyte_disjoint_arraycopy       = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
2398     StubRoutines::_jshort_disjoint_arraycopy      = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
2399     StubRoutines::_jint_disjoint_arraycopy        = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy");
2400     StubRoutines::_jlong_disjoint_arraycopy       = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
2401     StubRoutines::_oop_disjoint_arraycopy         = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy", false);
2402     StubRoutines::_oop_disjoint_arraycopy_uninit  = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy_uninit", true);
2403 
2404     // aligned disjoint versions
2405     StubRoutines::_arrayof_jbyte_disjoint_arraycopy      = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy");
2406     StubRoutines::_arrayof_jshort_disjoint_arraycopy     = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");




1944     address start = __ function_entry();
1945 
1946     gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7);
1947 
1948     // save some arguments, disjoint_long_copy_core destroys them.
1949     // needed for post barrier
1950     __ mr(R9_ARG7, R4_ARG2);
1951     __ mr(R10_ARG8, R5_ARG3);
1952 
1953     if (UseCompressedOops) {
1954       generate_disjoint_int_copy_core(aligned);
1955     } else {
1956       generate_disjoint_long_copy_core(aligned);
1957     }
1958 
1959     gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
1960 
1961     return start;
1962   }
1963 
1964   // Arguments for generated stub:
1965   //   R3_ARG1   - source byte array address
1966   //   R4_ARG2   - destination byte array address
1967   //   R5_ARG3   - round key array
1968   address generate_aescrypt_encryptBlock() {
1969     assert(UseAES, "need AES instructions and misaligned SSE support");
1970     StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
1971 
1972     address start = __ function_entry();
1973 
1974     Label L_doLast;
1975 
1976     Register from           = R3_ARG1;  // source array address
1977     Register to             = R4_ARG2;  // destination array address
1978     Register key            = R5_ARG3;  // round key array
1979 
1980     Register keylen         = R8;
1981     Register temp           = R9;
1982     Register keypos         = R10;

1983     Register fifteen        = R12;
1984 
1985     VectorRegister vRet     = VR0;
1986 
1987     VectorRegister vKey1    = VR1;
1988     VectorRegister vKey2    = VR2;
1989     VectorRegister vKey3    = VR3;
1990     VectorRegister vKey4    = VR4;
1991 
1992     VectorRegister fromPerm = VR5;
1993     VectorRegister keyPerm  = VR6;
1994     VectorRegister toPerm   = VR7;
1995     VectorRegister fSplt    = VR8;
1996 
1997     VectorRegister vTmp1    = VR9;
1998     VectorRegister vTmp2    = VR10;
1999     VectorRegister vTmp3    = VR11;
2000     VectorRegister vTmp4    = VR12;
2001 




2002     __ li              (fifteen, 15);

2003 
2004     // load unaligned from[0-15] to vsRet
2005     __ lvx             (vRet, from);
2006     __ lvx             (vTmp1, fifteen, from);
2007     __ lvsl            (fromPerm, from);
2008 #ifdef VM_LITTLE_ENDIAN
2009     __ vspltisb        (fSplt, 0x0f);
2010     __ vxor            (fromPerm, fromPerm, fSplt);
2011 #endif
2012     __ vperm           (vRet, vRet, vTmp1, fromPerm);
2013 
2014     // load keylen (44 or 52 or 60)
2015     __ lwz             (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
2016 
2017     // to load keys
2018     __ load_perm       (keyPerm, key);
2019 #ifdef VM_LITTLE_ENDIAN
2020     __ vspltisb        (vTmp2, -16);
2021     __ vrld            (keyPerm, keyPerm, vTmp2);
2022     __ vrld            (keyPerm, keyPerm, vTmp2);
2023     __ vsldoi          (keyPerm, keyPerm, keyPerm, 8);
2024 #endif
2025 
2026     // load the 1st round key to vTmp1
2027     __ lvx             (vTmp1, key);
2028     __ li              (keypos, 16);
2029     __ lvx             (vKey1, keypos, key);
2030     __ vec_perm        (vTmp1, vKey1, keyPerm);


2031 
2032     // 1st round
2033     __ vxor            (vRet, vRet, vTmp1);
2034 
2035     // load the 2nd round key to vKey1
2036     __ li              (keypos, 32);
2037     __ lvx             (vKey2, keypos, key);
2038     __ vec_perm        (vKey1, vKey2, keyPerm);
2039 
2040     // load the 3rd round key to vKey2
2041     __ li              (keypos, 48);
2042     __ lvx             (vKey3, keypos, key);
2043     __ vec_perm        (vKey2, vKey3, keyPerm);
2044 
2045     // load the 4th round key to vKey3
2046     __ li              (keypos, 64);
2047     __ lvx             (vKey4, keypos, key);
2048     __ vec_perm        (vKey3, vKey4, keyPerm);
2049 
2050     // load the 5th round key to vKey4
2051     __ li              (keypos, 80);
2052     __ lvx             (vTmp1, keypos, key);
2053     __ vec_perm        (vKey4, vTmp1, keyPerm);
2054 
2055     // 2nd - 5th rounds
2056     __ vcipher         (vRet, vRet, vKey1);
2057     __ vcipher         (vRet, vRet, vKey2);
2058     __ vcipher         (vRet, vRet, vKey3);
2059     __ vcipher         (vRet, vRet, vKey4);
2060 
2061     // load the 6th round key to vKey1
2062     __ li              (keypos, 96);
2063     __ lvx             (vKey2, keypos, key);
2064     __ vec_perm        (vKey1, vTmp1, vKey2, keyPerm);
2065 
2066     // load the 7th round key to vKey2
2067     __ li              (keypos, 112);
2068     __ lvx             (vKey3, keypos, key);
2069     __ vec_perm        (vKey2, vKey3, keyPerm);
2070 
2071     // load the 8th round key to vKey3
2072     __ li              (keypos, 128);
2073     __ lvx             (vKey4, keypos, key);
2074     __ vec_perm        (vKey3, vKey4, keyPerm);
2075 
2076     // load the 9th round key to vKey4
2077     __ li              (keypos, 144);
2078     __ lvx             (vTmp1, keypos, key);
2079     __ vec_perm        (vKey4, vTmp1, keyPerm);
2080 
2081     // 6th - 9th rounds
2082     __ vcipher         (vRet, vRet, vKey1);
2083     __ vcipher         (vRet, vRet, vKey2);
2084     __ vcipher         (vRet, vRet, vKey3);
2085     __ vcipher         (vRet, vRet, vKey4);
2086 
2087     // load the 10th round key to vKey1
2088     __ li              (keypos, 160);
2089     __ lvx             (vKey2, keypos, key);
2090     __ vec_perm        (vKey1, vTmp1, vKey2, keyPerm);
2091 
2092     // load the 11th round key to vKey2
2093     __ li              (keypos, 176);
2094     __ lvx             (vTmp1, keypos, key);
2095     __ vec_perm        (vKey2, vTmp1, keyPerm);
2096 
2097     // if all round keys are loaded, skip next 4 rounds
2098     __ cmpwi           (CCR0, keylen, 44);
2099     __ beq             (CCR0, L_doLast);
2100 
2101     // 10th - 11th rounds
2102     __ vcipher         (vRet, vRet, vKey1);
2103     __ vcipher         (vRet, vRet, vKey2);
2104 
2105     // load the 12th round key to vKey1
2106     __ li              (keypos, 192);
2107     __ lvx             (vKey2, keypos, key);
2108     __ vec_perm        (vKey1, vTmp1, vKey2, keyPerm);
2109 
2110     // load the 13th round key to vKey2
2111     __ li              (keypos, 208);
2112     __ lvx             (vTmp1, keypos, key);
2113     __ vec_perm        (vKey2, vTmp1, keyPerm);
2114 
2115     // if all round keys are loaded, skip next 2 rounds
2116     __ cmpwi           (CCR0, keylen, 52);
2117     __ beq             (CCR0, L_doLast);
2118 
2119     // 12th - 13th rounds
2120     __ vcipher         (vRet, vRet, vKey1);
2121     __ vcipher         (vRet, vRet, vKey2);
2122 
2123     // load the 14th round key to vKey1
2124     __ li              (keypos, 224);
2125     __ lvx             (vKey2, keypos, key);
2126     __ vec_perm        (vKey1, vTmp1, vKey2, keyPerm);
2127 
2128     // load the 15th round key to vKey2
2129     __ li              (keypos, 240);
2130     __ lvx             (vTmp1, keypos, key);
2131     __ vec_perm        (vKey2, vTmp1, keyPerm);
2132 
2133     __ bind(L_doLast);
2134 
2135     // last two rounds
2136     __ vcipher         (vRet, vRet, vKey1);
2137     __ vcipherlast     (vRet, vRet, vKey2);
2138 
2139     // store result (unaligned)
2140 #ifdef VM_LITTLE_ENDIAN
2141     __ lvsl            (toPerm, to);
2142 #else
2143     __ lvsr            (toPerm, to);
2144 #endif
2145     __ vspltisb        (vTmp3, -1);
2146     __ vspltisb        (vTmp4, 0);
2147     __ lvx             (vTmp1, to);
2148     __ lvx             (vTmp2, fifteen, to);
2149 #ifdef VM_LITTLE_ENDIAN
2150     __ vperm           (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
2151     __ vxor            (toPerm, toPerm, fSplt);       // swap bytes
2152 #else
2153     __ vperm           (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
2154 #endif
2155     __ vperm           (vTmp4, vRet, vRet, toPerm);   // rotate data
2156     __ vsel            (vTmp2, vTmp4, vTmp2, vTmp3);
2157     __ vsel            (vTmp1, vTmp1, vTmp4, vTmp3);
2158     __ stvx            (vTmp2, fifteen, to);          // store this one first (may alias)
2159     __ stvx            (vTmp1, to);


2160 
2161     __ blr();
2162      return start;
2163   }
2164 
2165   // Arguments for generated stub:
2166   //   R3_ARG1   - source byte array address
2167   //   R4_ARG2   - destination byte array address
2168   //   R5_ARG3   - K (key) in little endian int array
2169   address generate_aescrypt_decryptBlock() {
2170     assert(UseAES, "need AES instructions and misaligned SSE support");
2171     StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
2172 
2173     address start = __ function_entry();
2174 
2175     Label L_doLast;
2176     Label L_do44;
2177     Label L_do52;
2178     Label L_do60;
2179 
2180     Register from           = R3_ARG1;  // source array address
2181     Register to             = R4_ARG2;  // destination array address
2182     Register key            = R5_ARG3;  // round key array
2183 
2184     Register keylen         = R8;
2185     Register temp           = R9;
2186     Register keypos         = R10;

2187     Register fifteen        = R12;
2188 
2189     VectorRegister vRet     = VR0;
2190 
2191     VectorRegister vKey1    = VR1;
2192     VectorRegister vKey2    = VR2;
2193     VectorRegister vKey3    = VR3;
2194     VectorRegister vKey4    = VR4;
2195     VectorRegister vKey5    = VR5;
2196 
2197     VectorRegister fromPerm = VR6;
2198     VectorRegister keyPerm  = VR7;
2199     VectorRegister toPerm   = VR8;
2200     VectorRegister fSplt    = VR9;
2201 
2202     VectorRegister vTmp1    = VR10;
2203     VectorRegister vTmp2    = VR11;
2204     VectorRegister vTmp3    = VR12;
2205     VectorRegister vTmp4    = VR13;
2206 




2207     __ li              (fifteen, 15);

2208 
2209     // load unaligned from[0-15] to vsRet
2210     __ lvx             (vRet, from);
2211     __ lvx             (vTmp1, fifteen, from);
2212     __ lvsl            (fromPerm, from);
2213 #ifdef VM_LITTLE_ENDIAN
2214     __ vspltisb        (fSplt, 0x0f);
2215     __ vxor            (fromPerm, fromPerm, fSplt);
2216 #endif
2217     __ vperm           (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE]
2218 
2219     // load keylen (44 or 52 or 60)
2220     __ lwz             (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
2221 
2222     // to load keys
2223     __ load_perm       (keyPerm, key);
2224 #ifdef VM_LITTLE_ENDIAN
2225     __ vxor            (vTmp2, vTmp2, vTmp2);
2226     __ vspltisb        (vTmp2, -16);
2227     __ vrld            (keyPerm, keyPerm, vTmp2);
2228     __ vrld            (keyPerm, keyPerm, vTmp2);
2229     __ vsldoi          (keyPerm, keyPerm, keyPerm, 8);
2230 #endif
2231 
2232     __ cmpwi           (CCR0, keylen, 44);
2233     __ beq             (CCR0, L_do44);
2234 
2235     __ cmpwi           (CCR0, keylen, 52);
2236     __ beq             (CCR0, L_do52);
2237 
2238     // load the 15th round key to vKey1
2239     __ li              (keypos, 240);
2240     __ lvx             (vKey1, keypos, key);
2241     __ li              (keypos, 224);
2242     __ lvx             (vKey2, keypos, key);
2243     __ vec_perm        (vKey1, vKey2, vKey1, keyPerm);
2244 
2245     // load the 14th round key to vKey2
2246     __ li              (keypos, 208);
2247     __ lvx             (vKey3, keypos, key);
2248     __ vec_perm        (vKey2, vKey3, vKey2, keyPerm);
2249 
2250     // load the 13th round key to vKey3
2251     __ li              (keypos, 192);
2252     __ lvx             (vKey4, keypos, key);
2253     __ vec_perm        (vKey3, vKey4, vKey3, keyPerm);
2254 
2255     // load the 12th round key to vKey4
2256     __ li              (keypos, 176);
2257     __ lvx             (vKey5, keypos, key);
2258     __ vec_perm        (vKey4, vKey5, vKey4, keyPerm);
2259 
2260     // load the 11th round key to vKey5
2261     __ li              (keypos, 160);
2262     __ lvx             (vTmp1, keypos, key);
2263     __ vec_perm        (vKey5, vTmp1, vKey5, keyPerm);
2264 
2265     // 1st - 5th rounds
2266     __ vxor            (vRet, vRet, vKey1);
2267     __ vncipher        (vRet, vRet, vKey2);
2268     __ vncipher        (vRet, vRet, vKey3);
2269     __ vncipher        (vRet, vRet, vKey4);
2270     __ vncipher        (vRet, vRet, vKey5);
2271 
2272     __ b               (L_doLast);
2273 
2274     __ bind            (L_do52);
2275 
2276     // load the 13th round key to vKey1
2277     __ li              (keypos, 208);
2278     __ lvx             (vKey1, keypos, key);
2279     __ li              (keypos, 192);
2280     __ lvx             (vKey2, keypos, key);
2281     __ vec_perm        (vKey1, vKey2, vKey1, keyPerm);
2282 
2283     // load the 12th round key to vKey2
2284     __ li              (keypos, 176);
2285     __ lvx             (vKey3, keypos, key);
2286     __ vec_perm        (vKey2, vKey3, vKey2, keyPerm);
2287 
2288     // load the 11th round key to vKey3
2289     __ li              (keypos, 160);
2290     __ lvx             (vTmp1, keypos, key);
2291     __ vec_perm        (vKey3, vTmp1, vKey3, keyPerm);
2292 
2293     // 1st - 3rd rounds
2294     __ vxor            (vRet, vRet, vKey1);
2295     __ vncipher        (vRet, vRet, vKey2);
2296     __ vncipher        (vRet, vRet, vKey3);
2297 
2298     __ b               (L_doLast);
2299 
2300     __ bind            (L_do44);
2301 
2302     // load the 11th round key to vKey1
2303     __ li              (keypos, 176);
2304     __ lvx             (vKey1, keypos, key);
2305     __ li              (keypos, 160);
2306     __ lvx             (vTmp1, keypos, key);
2307     __ vec_perm        (vKey1, vTmp1, vKey1, keyPerm);


2308 
2309     // 1st round
2310     __ vxor            (vRet, vRet, vKey1);
2311 
2312     __ bind            (L_doLast);
2313 
2314     // load the 10th round key to vKey1
2315     __ li              (keypos, 144);
2316     __ lvx             (vKey2, keypos, key);
2317     __ vec_perm        (vKey1, vKey2, vTmp1, keyPerm);
2318 
2319     // load the 9th round key to vKey2
2320     __ li              (keypos, 128);
2321     __ lvx             (vKey3, keypos, key);
2322     __ vec_perm        (vKey2, vKey3, vKey2, keyPerm);





2323 
2324     // load the 8th round key to vKey3
2325     __ li              (keypos, 112);
2326     __ lvx             (vKey4, keypos, key);
2327     __ vec_perm        (vKey3, vKey4, vKey3, keyPerm);
2328 
2329     // load the 7th round key to vKey4
2330     __ li              (keypos, 96);
2331     __ lvx             (vKey5, keypos, key);
2332     __ vec_perm        (vKey4, vKey5, vKey4, keyPerm);
2333 
2334     // load the 6th round key to vKey5
2335     __ li              (keypos, 80);
2336     __ lvx             (vTmp1, keypos, key);
2337     __ vec_perm        (vKey5, vTmp1, vKey5, keyPerm);
2338 
2339     // last 10th - 6th rounds
2340     __ vncipher        (vRet, vRet, vKey1);
2341     __ vncipher        (vRet, vRet, vKey2);
2342     __ vncipher        (vRet, vRet, vKey3);
2343     __ vncipher        (vRet, vRet, vKey4);
2344     __ vncipher        (vRet, vRet, vKey5);
2345 
2346     // load the 5th round key to vKey1
2347     __ li              (keypos, 64);
2348     __ lvx             (vKey2, keypos, key);
2349     __ vec_perm        (vKey1, vKey2, vTmp1, keyPerm);
2350 
2351     // load the 4th round key to vKey2
2352     __ li              (keypos, 48);
2353     __ lvx             (vKey3, keypos, key);
2354     __ vec_perm        (vKey2, vKey3, vKey2, keyPerm);
2355 
2356     // load the 3rd round key to vKey3
2357     __ li              (keypos, 32);
2358     __ lvx             (vKey4, keypos, key);
2359     __ vec_perm        (vKey3, vKey4, vKey3, keyPerm);
2360 
2361     // load the 2nd round key to vKey4
2362     __ li              (keypos, 16);
2363     __ lvx             (vKey5, keypos, key);
2364     __ vec_perm        (vKey4, vKey5, vKey4, keyPerm);
2365 
2366     // load the 1st round key to vKey5
2367     __ lvx             (vTmp1, key);
2368     __ vec_perm        (vKey5, vTmp1, vKey5, keyPerm);

2369 
2370     // last 5th - 1th rounds
2371     __ vncipher        (vRet, vRet, vKey1);
2372     __ vncipher        (vRet, vRet, vKey2);
2373     __ vncipher        (vRet, vRet, vKey3);
2374     __ vncipher        (vRet, vRet, vKey4);
2375     __ vncipherlast    (vRet, vRet, vKey5);
2376 
2377     // store result (unaligned)
2378 #ifdef VM_LITTLE_ENDIAN
2379     __ lvsl            (toPerm, to);
2380 #else
2381     __ lvsr            (toPerm, to);
2382 #endif
2383     __ vspltisb        (vTmp3, -1);
2384     __ vspltisb        (vTmp4, 0);
2385     __ lvx             (vTmp1, to);
2386     __ lvx             (vTmp2, fifteen, to);
2387 #ifdef VM_LITTLE_ENDIAN
2388     __ vperm           (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
2389     __ vxor            (toPerm, toPerm, fSplt);       // swap bytes
2390 #else
2391     __ vperm           (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
2392 #endif
2393     __ vperm           (vTmp4, vRet, vRet, toPerm);   // rotate data
2394     __ vsel            (vTmp2, vTmp4, vTmp2, vTmp3);
2395     __ vsel            (vTmp1, vTmp1, vTmp4, vTmp3);
2396     __ stvx            (vTmp2, fifteen, to);          // store this one first (may alias)
2397     __ stvx            (vTmp1, to);


2398 
2399     __ blr();
2400      return start;
2401   }
2402 
2403   void generate_arraycopy_stubs() {
2404     // Note: the disjoint stubs must be generated first, some of
2405     // the conjoint stubs use them.
2406 
2407     // non-aligned disjoint versions
2408     StubRoutines::_jbyte_disjoint_arraycopy       = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
2409     StubRoutines::_jshort_disjoint_arraycopy      = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
2410     StubRoutines::_jint_disjoint_arraycopy        = generate_disjoint_int_copy(false, "jint_disjoint_arraycopy");
2411     StubRoutines::_jlong_disjoint_arraycopy       = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
2412     StubRoutines::_oop_disjoint_arraycopy         = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy", false);
2413     StubRoutines::_oop_disjoint_arraycopy_uninit  = generate_disjoint_oop_copy(false, "oop_disjoint_arraycopy_uninit", true);
2414 
2415     // aligned disjoint versions
2416     StubRoutines::_arrayof_jbyte_disjoint_arraycopy      = generate_disjoint_byte_copy(true, "arrayof_jbyte_disjoint_arraycopy");
2417     StubRoutines::_arrayof_jshort_disjoint_arraycopy     = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy");


< prev index next >