--- /dev/null 2021-01-19 17:38:25.908523431 +0000 +++ new/test/compiler/c2/cr6340864/TestByteVect.java 2021-01-25 19:32:18.555916012 +0000 @@ -0,0 +1,1491 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 6340864 + * @summary Implement vectorization optimizations in hotspot-server + * + * @run main/othervm/timeout=400 -Xbatch -Xmx128m compiler.c2.cr6340864.TestByteVect + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=8 compiler.c2.cr6340864.TestByteVect + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=16 compiler.c2.cr6340864.TestByteVect + * @run main/othervm/timeout=400 -Xbatch -Xmx128m -XX:MaxVectorSize=32 compiler.c2.cr6340864.TestByteVect + */ + +package compiler.c2.cr6340864; + +public class TestByteVect { + private static final int ARRLEN = 997; + private static final int ITERS = 11000; + private static final int ADD_INIT = 63; + private static final int BIT_MASK = 0xB7; + private static final int VALUE = 3; + private static final int SHIFT = 8; + + public static void main(String args[]) { + System.out.println("Testing Byte vectors"); + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + byte[] a0 = new byte[ARRLEN]; + byte[] a1 = new byte[ARRLEN]; + byte[] a2 = new byte[ARRLEN]; + byte[] a3 = new byte[ARRLEN]; + byte[] a4 = new byte[ARRLEN]; + short[] p2 = new short[ARRLEN/2]; + int[] p4 = new int[ARRLEN/4]; + long[] p8 = new long[ARRLEN/8]; + // Initialize + int gold_sum = 0; + for (int i=0; i>>VALUE)); + } + test_srlv(a0, a1, VALUE); + for (int i=0; i>>VALUE)); + } + + test_srac(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav(a0, a1, VALUE); + for (int i=0; i>VALUE)); + } + + test_sllc_n(a0, a1); + for (int i=0; i>>(-VALUE))); + } + test_srlv(a0, a1, -VALUE); + for (int i=0; i>>(-VALUE))); + } + + test_srac_n(a0, a1); + for (int i=0; i>(-VALUE))); + } + test_srav(a0, a1, -VALUE); + for (int i=0; i>(-VALUE))); + } + + test_sllc_o(a0, a1); + for (int i=0; i>>SHIFT)); + } + test_srlv(a0, a1, SHIFT); + for (int i=0; i>>SHIFT)); + } + + test_srac_o(a0, a1); + for (int i=0; i>SHIFT)); + } + test_srav(a0, a1, SHIFT); + for (int i=0; i>SHIFT)); + } + + test_sllc_on(a0, a1); + for (int i=0; i>>(-SHIFT))); + } + test_srlv(a0, a1, -SHIFT); + for (int i=0; i>>(-SHIFT))); + } + + test_srac_on(a0, a1); + for (int i=0; i>(-SHIFT))); + } + test_srav(a0, a1, -SHIFT); + for (int i=0; i>(-SHIFT))); + } + + test_sllc_add(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_add(a0, a1, ADD_INIT); + for (int i=0; i>>VALUE)); + } + + test_srac_add(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_add(a0, a1, ADD_INIT); + for (int i=0; i>VALUE)); + } + + test_sllc_and(a0, a1); + for (int i=0; i>>VALUE)); + } + test_srlv_and(a0, a1, BIT_MASK); + for (int i=0; i>>VALUE)); + } + + test_srac_and(a0, a1); + for (int i=0; i>VALUE)); + } + test_srav_and(a0, a1, BIT_MASK); + for (int i=0; i>VALUE)); + } + + test_pack2(p2, a1); + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i>>VALUE); + } + } + static void test_srlc_n(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>>(-VALUE)); + } + } + static void test_srlc_o(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>>SHIFT); + } + } + static void test_srlc_on(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>>(-SHIFT)); + } + } + static void test_srlv(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>>b); + } + } + static void test_srlc_add(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] + ADD_INIT)>>>VALUE); + } + } + static void test_srlv_add(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] + b)>>>VALUE); + } + } + static void test_srlc_and(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] & BIT_MASK)>>>VALUE); + } + } + static void test_srlv_and(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] & b)>>>VALUE); + } + } + + static void test_srac(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>VALUE); + } + } + static void test_srac_n(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>(-VALUE)); + } + } + static void test_srac_o(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>SHIFT); + } + } + static void test_srac_on(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>(-SHIFT)); + } + } + static void test_srav(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)(a1[i]>>b); + } + } + static void test_srac_add(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] + ADD_INIT)>>VALUE); + } + } + static void test_srav_add(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] + b)>>VALUE); + } + } + static void test_srac_and(byte[] a0, byte[] a1) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] & BIT_MASK)>>VALUE); + } + } + static void test_srav_and(byte[] a0, byte[] a1, int b) { + for (int i = 0; i < a0.length; i+=1) { + a0[i] = (byte)((a1[i] & b)>>VALUE); + } + } + + static void test_pack2(short[] p2, byte[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + short l0 = (short)a1[i*2+0]; + short l1 = (short)a1[i*2+1]; + p2[i] = (short)((l1 << 8) | (l0 & 0xFF)); + } + } + static void test_unpack2(byte[] a0, short[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + short l = p2[i]; + a0[i*2+0] = (byte)(l & 0xFF); + a0[i*2+1] = (byte)(l >> 8); + } + } + static void test_pack2_swap(short[] p2, byte[] a1) { + if (p2.length*2 > a1.length) return; + for (int i = 0; i < p2.length; i+=1) { + short l0 = (short)a1[i*2+0]; + short l1 = (short)a1[i*2+1]; + p2[i] = (short)((l0 << 8) | (l1 & 0xFF)); + } + } + static void test_unpack2_swap(byte[] a0, short[] p2) { + if (p2.length*2 > a0.length) return; + for (int i = 0; i < p2.length; i+=1) { + short l = p2[i]; + a0[i*2+0] = (byte)(l >> 8); + a0[i*2+1] = (byte)(l & 0xFF); + } + } + + static void test_pack4(int[] p4, byte[] a1) { + if (p4.length*4 > a1.length) return; + for (int i = 0; i < p4.length; i+=1) { + int l0 = (int)a1[i*4+0]; + int l1 = (int)a1[i*4+1]; + int l2 = (int)a1[i*4+2]; + int l3 = (int)a1[i*4+3]; + p4[i] = (l0 & 0xFF) | + ((l1 & 0xFF) << 8) | + ((l2 & 0xFF) << 16) | + ((l3 & 0xFF) << 24); + } + } + static void test_unpack4(byte[] a0, int[] p4) { + if (p4.length*4 > a0.length) return; + for (int i = 0; i < p4.length; i+=1) { + int l = p4[i]; + a0[i*4+0] = (byte)(l & 0xFF); + a0[i*4+1] = (byte)(l >> 8); + a0[i*4+2] = (byte)(l >> 16); + a0[i*4+3] = (byte)(l >> 24); + } + } + static void test_pack4_swap(int[] p4, byte[] a1) { + if (p4.length*4 > a1.length) return; + for (int i = 0; i < p4.length; i+=1) { + int l0 = (int)a1[i*4+0]; + int l1 = (int)a1[i*4+1]; + int l2 = (int)a1[i*4+2]; + int l3 = (int)a1[i*4+3]; + p4[i] = (l3 & 0xFF) | + ((l2 & 0xFF) << 8) | + ((l1 & 0xFF) << 16) | + ((l0 & 0xFF) << 24); + } + } + static void test_unpack4_swap(byte[] a0, int[] p4) { + if (p4.length*4 > a0.length) return; + for (int i = 0; i < p4.length; i+=1) { + int l = p4[i]; + a0[i*4+0] = (byte)(l >> 24); + a0[i*4+1] = (byte)(l >> 16); + a0[i*4+2] = (byte)(l >> 8); + a0[i*4+3] = (byte)(l & 0xFF); + } + } + + static void test_pack8(long[] p8, byte[] a1) { + if (p8.length*8 > a1.length) return; + for (int i = 0; i < p8.length; i+=1) { + long l0 = (long)a1[i*8+0]; + long l1 = (long)a1[i*8+1]; + long l2 = (long)a1[i*8+2]; + long l3 = (long)a1[i*8+3]; + long l4 = (long)a1[i*8+4]; + long l5 = (long)a1[i*8+5]; + long l6 = (long)a1[i*8+6]; + long l7 = (long)a1[i*8+7]; + p8[i] = (l0 & 0xFFl) | + ((l1 & 0xFFl) << 8) | + ((l2 & 0xFFl) << 16) | + ((l3 & 0xFFl) << 24) | + ((l4 & 0xFFl) << 32) | + ((l5 & 0xFFl) << 40) | + ((l6 & 0xFFl) << 48) | + ((l7 & 0xFFl) << 56); + } + } + static void test_unpack8(byte[] a0, long[] p8) { + if (p8.length*8 > a0.length) return; + for (int i = 0; i < p8.length; i+=1) { + long l = p8[i]; + a0[i*8+0] = (byte)(l & 0xFFl); + a0[i*8+1] = (byte)(l >> 8); + a0[i*8+2] = (byte)(l >> 16); + a0[i*8+3] = (byte)(l >> 24); + a0[i*8+4] = (byte)(l >> 32); + a0[i*8+5] = (byte)(l >> 40); + a0[i*8+6] = (byte)(l >> 48); + a0[i*8+7] = (byte)(l >> 56); + } + } + static void test_pack8_swap(long[] p8, byte[] a1) { + if (p8.length*8 > a1.length) return; + for (int i = 0; i < p8.length; i+=1) { + long l0 = (long)a1[i*8+0]; + long l1 = (long)a1[i*8+1]; + long l2 = (long)a1[i*8+2]; + long l3 = (long)a1[i*8+3]; + long l4 = (long)a1[i*8+4]; + long l5 = (long)a1[i*8+5]; + long l6 = (long)a1[i*8+6]; + long l7 = (long)a1[i*8+7]; + p8[i] = (l7 & 0xFFl) | + ((l6 & 0xFFl) << 8) | + ((l5 & 0xFFl) << 16) | + ((l4 & 0xFFl) << 24) | + ((l3 & 0xFFl) << 32) | + ((l2 & 0xFFl) << 40) | + ((l1 & 0xFFl) << 48) | + ((l0 & 0xFFl) << 56); + } + } + static void test_unpack8_swap(byte[] a0, long[] p8) { + if (p8.length*8 > a0.length) return; + for (int i = 0; i < p8.length; i+=1) { + long l = p8[i]; + a0[i*8+0] = (byte)(l >> 56); + a0[i*8+1] = (byte)(l >> 48); + a0[i*8+2] = (byte)(l >> 40); + a0[i*8+3] = (byte)(l >> 32); + a0[i*8+4] = (byte)(l >> 24); + a0[i*8+5] = (byte)(l >> 16); + a0[i*8+6] = (byte)(l >> 8); + a0[i*8+7] = (byte)(l & 0xFFl); + } + } + + static int verify(String text, int i, byte elem, byte val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + + static int verify(String text, int i, short elem, short val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + elem + " != " + val); + return 1; + } + return 0; + } + + static int verify(String text, int i, int elem, int val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + Integer.toHexString(elem) + " != " + Integer.toHexString(val)); + return 1; + } + return 0; + } + + static int verify(String text, int i, long elem, long val) { + if (elem != val) { + System.err.println(text + "[" + i + "] = " + Long.toHexString(elem) + " != " + Long.toHexString(val)); + return 1; + } + return 0; + } +}