< prev index next >

test/jdk/jdk/incubator/vector/benchmark/src/main/java/benchmark/jdk/incubator/vector/SumOfUnsignedBytes.java

Print this page
rev 55894 : 8222897: [vector] Renaming of shift, rotate operations. Few other api changes.
Summary: Renaming of shift, rotate operations. Few other api changes.
Reviewed-by: jrose, briangoetz


  63 
  64     @Benchmark
  65     public int scalar() {
  66         int sum = 0;
  67         for (int i = 0; i < data.length; i++) {
  68             sum += data[i] & 0xFF;
  69         }
  70         return sum;
  71     }
  72 
  73     // 1. 32-bit accumulators
  74     @Benchmark
  75     public int vectorInt() {
  76         final var lobyte_mask = IntVector.broadcast(I256, 0x000000FF);
  77 
  78         var acc = IntVector.zero(I256);
  79         for (int i = 0; i < data.length; i += B256.length()) {
  80             var vb = ByteVector.fromArray(B256, data, i);
  81             var vi = (IntVector)vb.reinterpret(I256);
  82             for (int j = 0; j < 4; j++) {
  83                 var tj = vi.shiftR(j * 8).and(lobyte_mask);
  84                 acc = acc.add(tj);
  85             }
  86         }
  87         return (int)Integer.toUnsignedLong(acc.addAll());
  88     }
  89 
  90     // 2. 16-bit accumulators
  91     @Benchmark
  92     public int vectorShort() {
  93         final var lobyte_mask = ShortVector.broadcast(S256, (short) 0x00FF);
  94 
  95         // FIXME: overflow
  96         var acc = ShortVector.zero(S256);
  97         for (int i = 0; i < data.length; i += B256.length()) {
  98             var vb = ByteVector.fromArray(B256, data, i);
  99             var vs = (ShortVector)vb.reinterpret(S256);
 100             for (int j = 0; j < 2; j++) {
 101                 var tj = vs.shiftR(j * 8).and(lobyte_mask);
 102                 acc = acc.add(tj);
 103             }
 104         }
 105 
 106         int mid = S128.length();
 107         var accLo = ((IntVector)(acc             .reshape(S128).cast(I256))).and(0xFFFF); // low half as ints
 108         var accHi = ((IntVector)(acc.shiftEL(mid).reshape(S128).cast(I256))).and(0xFFFF); // high half as ints
 109         return accLo.addAll() + accHi.addAll();
 110     }
 111 
 112     /*
 113     // 3. 8-bit halves (MISSING: _mm_adds_epu8)
 114     @Benchmark
 115     public int vectorByte() {
 116         int window = 256;
 117         var acc_hi  = IntVector.zero(I256);
 118         var acc8_lo = ByteVector.zero(B256);
 119         for (int i = 0; i < data.length; i += window) {
 120             var acc8_hi = ByteVector.zero(B256);
 121             int limit = Math.min(window, data.length - i);
 122             for (int j = 0; j < limit; j += B256.length()) {
 123                 var vb = ByteVector.fromArray(B256, data, i + j);
 124 
 125                 var t0 = acc8_lo.add(vb);
 126                 var t1 = addSaturated(acc8_lo, vb); // MISSING
 127                 var overflow = t0.notEqual(t1);
 128 
 129                 acc8_lo = t0;




  63 
  64     @Benchmark
  65     public int scalar() {
  66         int sum = 0;
  67         for (int i = 0; i < data.length; i++) {
  68             sum += data[i] & 0xFF;
  69         }
  70         return sum;
  71     }
  72 
  73     // 1. 32-bit accumulators
  74     @Benchmark
  75     public int vectorInt() {
  76         final var lobyte_mask = IntVector.broadcast(I256, 0x000000FF);
  77 
  78         var acc = IntVector.zero(I256);
  79         for (int i = 0; i < data.length; i += B256.length()) {
  80             var vb = ByteVector.fromArray(B256, data, i);
  81             var vi = (IntVector)vb.reinterpret(I256);
  82             for (int j = 0; j < 4; j++) {
  83                 var tj = vi.shiftRight(j * 8).and(lobyte_mask);
  84                 acc = acc.add(tj);
  85             }
  86         }
  87         return (int)Integer.toUnsignedLong(acc.addLanes());
  88     }
  89 
  90     // 2. 16-bit accumulators
  91     @Benchmark
  92     public int vectorShort() {
  93         final var lobyte_mask = ShortVector.broadcast(S256, (short) 0x00FF);
  94 
  95         // FIXME: overflow
  96         var acc = ShortVector.zero(S256);
  97         for (int i = 0; i < data.length; i += B256.length()) {
  98             var vb = ByteVector.fromArray(B256, data, i);
  99             var vs = (ShortVector)vb.reinterpret(S256);
 100             for (int j = 0; j < 2; j++) {
 101                 var tj = vs.shiftRight(j * 8).and(lobyte_mask);
 102                 acc = acc.add(tj);
 103             }
 104         }
 105 
 106         int mid = S128.length();
 107         var accLo = ((IntVector)(acc             .reshape(S128).cast(I256))).and(0xFFFF); // low half as ints
 108         var accHi = ((IntVector)(acc.shiftLanesLeft(mid).reshape(S128).cast(I256))).and(0xFFFF); // high half as ints
 109         return accLo.addLanes() + accHi.addLanes();
 110     }
 111 
 112     /*
 113     // 3. 8-bit halves (MISSING: _mm_adds_epu8)
 114     @Benchmark
 115     public int vectorByte() {
 116         int window = 256;
 117         var acc_hi  = IntVector.zero(I256);
 118         var acc8_lo = ByteVector.zero(B256);
 119         for (int i = 0; i < data.length; i += window) {
 120             var acc8_hi = ByteVector.zero(B256);
 121             int limit = Math.min(window, data.length - i);
 122             for (int j = 0; j < limit; j += B256.length()) {
 123                 var vb = ByteVector.fromArray(B256, data, i + j);
 124 
 125                 var t0 = acc8_lo.add(vb);
 126                 var t1 = addSaturated(acc8_lo, vb); // MISSING
 127                 var overflow = t0.notEqual(t1);
 128 
 129                 acc8_lo = t0;


< prev index next >