< prev index next >
test/jdk/jdk/incubator/vector/benchmark/src/main/java/benchmark/jdk/incubator/vector/SumOfUnsignedBytes.java
Print this page
rev 55589 : Species-phase2
rev 55594 : tests and benchmark changes
*** 55,66 ****
data = fillByte(size, i -> (byte)(int)i);
int sum = scalar();
assertEquals(vectorInt(), sum);
assertEquals(vectorShort(), sum);
! assertEquals(vectorByte(), sum);
! assertEquals(vectorSAD(), sum);
}
@Benchmark
public int scalar() {
int sum = 0;
--- 55,66 ----
data = fillByte(size, i -> (byte)(int)i);
int sum = scalar();
assertEquals(vectorInt(), sum);
assertEquals(vectorShort(), sum);
! //assertEquals(vectorByte(), sum);
! //assertEquals(vectorSAD(), sum);
}
@Benchmark
public int scalar() {
int sum = 0;
*** 71,81 ****
}
// 1. 32-bit accumulators
@Benchmark
public int vectorInt() {
! final var lobyte_mask = I256.broadcast(0x000000FF);
var acc = IntVector.zero(I256);
for (int i = 0; i < data.length; i += B256.length()) {
var vb = ByteVector.fromArray(B256, data, i);
var vi = (IntVector)vb.reinterpret(I256);
--- 71,81 ----
}
// 1. 32-bit accumulators
@Benchmark
public int vectorInt() {
! final var lobyte_mask = IntVector.broadcast(I256, 0x000000FF);
var acc = IntVector.zero(I256);
for (int i = 0; i < data.length; i += B256.length()) {
var vb = ByteVector.fromArray(B256, data, i);
var vi = (IntVector)vb.reinterpret(I256);
*** 88,98 ****
}
// 2. 16-bit accumulators
@Benchmark
public int vectorShort() {
! final var lobyte_mask = S256.broadcast((short) 0x00FF);
// FIXME: overflow
var acc = ShortVector.zero(S256);
for (int i = 0; i < data.length; i += B256.length()) {
var vb = ByteVector.fromArray(B256, data, i);
--- 88,98 ----
}
// 2. 16-bit accumulators
@Benchmark
public int vectorShort() {
! final var lobyte_mask = ShortVector.broadcast(S256, (short) 0x00FF);
// FIXME: overflow
var acc = ShortVector.zero(S256);
for (int i = 0; i < data.length; i += B256.length()) {
var vb = ByteVector.fromArray(B256, data, i);
*** 107,116 ****
--- 107,117 ----
var accLo = ((IntVector)(acc .reshape(S128).cast(I256))).and(0xFFFF); // low half as ints
var accHi = ((IntVector)(acc.shiftEL(mid).reshape(S128).cast(I256))).and(0xFFFF); // high half as ints
return accLo.addAll() + accHi.addAll();
}
+ /*
// 3. 8-bit halves (MISSING: _mm_adds_epu8)
@Benchmark
public int vectorByte() {
int window = 256;
var acc_hi = IntVector.zero(I256);
*** 142,155 ****
var v = ByteVector.fromArray(B256, data, i);
var sad = sumOfAbsoluteDifferences(v, ByteVector.zero(B256)); // MISSING
acc = acc.add(sad);
}
return acc.addAll();
! }
// Helpers
!
static ByteVector addSaturated(ByteVector va, ByteVector vb) {
return ByteVectorHelper.map(va, vb, (i, a, b) -> {
if ((a & 0xFF) + (b & 0xFF) < 0xFF) {
return (byte) (a + b);
} else {
--- 143,156 ----
var v = ByteVector.fromArray(B256, data, i);
var sad = sumOfAbsoluteDifferences(v, ByteVector.zero(B256)); // MISSING
acc = acc.add(sad);
}
return acc.addAll();
! } */
// Helpers
! /*
static ByteVector addSaturated(ByteVector va, ByteVector vb) {
return ByteVectorHelper.map(va, vb, (i, a, b) -> {
if ((a & 0xFF) + (b & 0xFF) < 0xFF) {
return (byte) (a + b);
} else {
*** 165,171 ****
} else {
return (byte)(b - a);
}
});
return sum(vc);
! }
}
--- 166,172 ----
} else {
return (byte)(b - a);
}
});
return sum(vc);
! } */
}
< prev index next >