< prev index next >

test/jdk/jdk/incubator/vector/benchmark/src/main/java/benchmark/jdk/incubator/vector/SumOfUnsignedBytes.java

Print this page
rev 55589 : Species-phase2
rev 55594 : tests and benchmark changes


  40 @Warmup(iterations = 3, time = 1)
  41 @Measurement(iterations = 5, time = 1)
  42 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  43 @State(Scope.Benchmark)
  44 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  45 public class SumOfUnsignedBytes extends AbstractVectorBenchmark {
  46 
  47     @Param({"64", "1024", "65536"})
  48     int size;
  49 
  50     private byte[] data;
  51 
  52     @Setup
  53     public void init() {
  54         size = size + size % 32; // FIXME: process tails
  55         data = fillByte(size, i -> (byte)(int)i);
  56 
  57         int sum = scalar();
  58         assertEquals(vectorInt(),   sum);
  59         assertEquals(vectorShort(), sum);
  60         assertEquals(vectorByte(),  sum);
  61         assertEquals(vectorSAD(),   sum);
  62     }
  63 
  64     @Benchmark
  65     public int scalar() {
  66         int sum = 0;
  67         for (int i = 0; i < data.length; i++) {
  68             sum += data[i] & 0xFF;
  69         }
  70         return sum;
  71     }
  72 
  73     // 1. 32-bit accumulators
  74     @Benchmark
  75     public int vectorInt() {
  76         final var lobyte_mask = I256.broadcast(0x000000FF);
  77 
  78         var acc = IntVector.zero(I256);
  79         for (int i = 0; i < data.length; i += B256.length()) {
  80             var vb = ByteVector.fromArray(B256, data, i);
  81             var vi = (IntVector)vb.reinterpret(I256);
  82             for (int j = 0; j < 4; j++) {
  83                 var tj = vi.shiftR(j * 8).and(lobyte_mask);
  84                 acc = acc.add(tj);
  85             }
  86         }
  87         return (int)Integer.toUnsignedLong(acc.addAll());
  88     }
  89 
  90     // 2. 16-bit accumulators
  91     @Benchmark
  92     public int vectorShort() {
  93         final var lobyte_mask = S256.broadcast((short) 0x00FF);
  94 
  95         // FIXME: overflow
  96         var acc = ShortVector.zero(S256);
  97         for (int i = 0; i < data.length; i += B256.length()) {
  98             var vb = ByteVector.fromArray(B256, data, i);
  99             var vs = (ShortVector)vb.reinterpret(S256);
 100             for (int j = 0; j < 2; j++) {
 101                 var tj = vs.shiftR(j * 8).and(lobyte_mask);
 102                 acc = acc.add(tj);
 103             }
 104         }
 105 
 106         int mid = S128.length();
 107         var accLo = ((IntVector)(acc             .reshape(S128).cast(I256))).and(0xFFFF); // low half as ints
 108         var accHi = ((IntVector)(acc.shiftEL(mid).reshape(S128).cast(I256))).and(0xFFFF); // high half as ints
 109         return accLo.addAll() + accHi.addAll();
 110     }
 111 

 112     // 3. 8-bit halves (MISSING: _mm_adds_epu8)
 113     @Benchmark
 114     public int vectorByte() {
 115         int window = 256;
 116         var acc_hi  = IntVector.zero(I256);
 117         var acc8_lo = ByteVector.zero(B256);
 118         for (int i = 0; i < data.length; i += window) {
 119             var acc8_hi = ByteVector.zero(B256);
 120             int limit = Math.min(window, data.length - i);
 121             for (int j = 0; j < limit; j += B256.length()) {
 122                 var vb = ByteVector.fromArray(B256, data, i + j);
 123 
 124                 var t0 = acc8_lo.add(vb);
 125                 var t1 = addSaturated(acc8_lo, vb); // MISSING
 126                 var overflow = t0.notEqual(t1);
 127 
 128                 acc8_lo = t0;
 129                 acc8_hi = acc8_hi.add((byte) 1, overflow);
 130             }
 131             acc_hi = acc_hi.add(sum(acc8_hi));
 132         }
 133         return sum(acc8_lo)
 134                 .add(acc_hi.mul(256)) // overflow
 135                 .addAll();
 136     }
 137 
 138     // 4. Sum Of Absolute Differences (SAD) (MISSING: VPSADBW, _mm256_sad_epu8)
 139     public int vectorSAD() {
 140         var acc = IntVector.zero(I256);
 141         for (int i = 0; i < data.length; i += B256.length()) {
 142             var v = ByteVector.fromArray(B256, data, i);
 143             var sad = sumOfAbsoluteDifferences(v, ByteVector.zero(B256)); // MISSING
 144             acc = acc.add(sad);
 145         }
 146         return acc.addAll();
 147     }
 148 
 149     // Helpers
 150 
 151     static ByteVector addSaturated(ByteVector va, ByteVector vb) {
 152         return ByteVectorHelper.map(va, vb, (i, a, b) -> {
 153             if ((a & 0xFF) + (b & 0xFF) < 0xFF) {
 154                 return (byte) (a + b);
 155             } else {
 156                 return (byte)0xFF;
 157             }
 158         });
 159     }
 160 
 161     IntVector sumOfAbsoluteDifferences(ByteVector va, ByteVector vb) {
 162         var vc = ByteVectorHelper.map(va, vb, (i, a, b) -> {
 163             if ((a & 0xFF) > (b & 0xFF)) {
 164                 return (byte)(a - b);
 165             } else {
 166                 return (byte)(b - a);
 167             }
 168         });
 169         return sum(vc);
 170     }
 171 }


  40 @Warmup(iterations = 3, time = 1)
  41 @Measurement(iterations = 5, time = 1)
  42 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  43 @State(Scope.Benchmark)
  44 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  45 public class SumOfUnsignedBytes extends AbstractVectorBenchmark {
  46 
  47     @Param({"64", "1024", "65536"})
  48     int size;
  49 
  50     private byte[] data;
  51 
  52     @Setup
  53     public void init() {
  54         size = size + size % 32; // FIXME: process tails
  55         data = fillByte(size, i -> (byte)(int)i);
  56 
  57         int sum = scalar();
  58         assertEquals(vectorInt(),   sum);
  59         assertEquals(vectorShort(), sum);
  60         //assertEquals(vectorByte(),  sum);
  61         //assertEquals(vectorSAD(),   sum);
  62     }
  63 
  64     @Benchmark
  65     public int scalar() {
  66         int sum = 0;
  67         for (int i = 0; i < data.length; i++) {
  68             sum += data[i] & 0xFF;
  69         }
  70         return sum;
  71     }
  72 
  73     // 1. 32-bit accumulators
  74     @Benchmark
  75     public int vectorInt() {
  76         final var lobyte_mask = IntVector.broadcast(I256, 0x000000FF);
  77 
  78         var acc = IntVector.zero(I256);
  79         for (int i = 0; i < data.length; i += B256.length()) {
  80             var vb = ByteVector.fromArray(B256, data, i);
  81             var vi = (IntVector)vb.reinterpret(I256);
  82             for (int j = 0; j < 4; j++) {
  83                 var tj = vi.shiftR(j * 8).and(lobyte_mask);
  84                 acc = acc.add(tj);
  85             }
  86         }
  87         return (int)Integer.toUnsignedLong(acc.addAll());
  88     }
  89 
  90     // 2. 16-bit accumulators
  91     @Benchmark
  92     public int vectorShort() {
  93         final var lobyte_mask = ShortVector.broadcast(S256, (short) 0x00FF);
  94 
  95         // FIXME: overflow
  96         var acc = ShortVector.zero(S256);
  97         for (int i = 0; i < data.length; i += B256.length()) {
  98             var vb = ByteVector.fromArray(B256, data, i);
  99             var vs = (ShortVector)vb.reinterpret(S256);
 100             for (int j = 0; j < 2; j++) {
 101                 var tj = vs.shiftR(j * 8).and(lobyte_mask);
 102                 acc = acc.add(tj);
 103             }
 104         }
 105 
 106         int mid = S128.length();
 107         var accLo = ((IntVector)(acc             .reshape(S128).cast(I256))).and(0xFFFF); // low half as ints
 108         var accHi = ((IntVector)(acc.shiftEL(mid).reshape(S128).cast(I256))).and(0xFFFF); // high half as ints
 109         return accLo.addAll() + accHi.addAll();
 110     }
 111 
 112     /*
 113     // 3. 8-bit halves (MISSING: _mm_adds_epu8)
 114     @Benchmark
 115     public int vectorByte() {
 116         int window = 256;
 117         var acc_hi  = IntVector.zero(I256);
 118         var acc8_lo = ByteVector.zero(B256);
 119         for (int i = 0; i < data.length; i += window) {
 120             var acc8_hi = ByteVector.zero(B256);
 121             int limit = Math.min(window, data.length - i);
 122             for (int j = 0; j < limit; j += B256.length()) {
 123                 var vb = ByteVector.fromArray(B256, data, i + j);
 124 
 125                 var t0 = acc8_lo.add(vb);
 126                 var t1 = addSaturated(acc8_lo, vb); // MISSING
 127                 var overflow = t0.notEqual(t1);
 128 
 129                 acc8_lo = t0;
 130                 acc8_hi = acc8_hi.add((byte) 1, overflow);
 131             }
 132             acc_hi = acc_hi.add(sum(acc8_hi));
 133         }
 134         return sum(acc8_lo)
 135                 .add(acc_hi.mul(256)) // overflow
 136                 .addAll();
 137     }
 138 
 139     // 4. Sum Of Absolute Differences (SAD) (MISSING: VPSADBW, _mm256_sad_epu8)
 140     public int vectorSAD() {
 141         var acc = IntVector.zero(I256);
 142         for (int i = 0; i < data.length; i += B256.length()) {
 143             var v = ByteVector.fromArray(B256, data, i);
 144             var sad = sumOfAbsoluteDifferences(v, ByteVector.zero(B256)); // MISSING
 145             acc = acc.add(sad);
 146         }
 147         return acc.addAll();
 148     } */
 149 
 150     // Helpers
 151     /*
 152     static ByteVector addSaturated(ByteVector va, ByteVector vb) {
 153         return ByteVectorHelper.map(va, vb, (i, a, b) -> {
 154             if ((a & 0xFF) + (b & 0xFF) < 0xFF) {
 155                 return (byte) (a + b);
 156             } else {
 157                 return (byte)0xFF;
 158             }
 159         });
 160     }
 161 
 162     IntVector sumOfAbsoluteDifferences(ByteVector va, ByteVector vb) {
 163         var vc = ByteVectorHelper.map(va, vb, (i, a, b) -> {
 164             if ((a & 0xFF) > (b & 0xFF)) {
 165                 return (byte)(a - b);
 166             } else {
 167                 return (byte)(b - a);
 168             }
 169         });
 170         return sum(vc);
 171     } */
 172 }
< prev index next >