< prev index next >
test/jdk/jdk/incubator/vector/benchmark/src/main/java/benchmark/jdk/incubator/vector/PopulationCount.java
Print this page
rev 55894 : 8222897: [vector] Renaming of shift, rotate operations. Few other api changes.
Summary: Renaming of shift, rotate operations. Few other api changes.
Reviewed-by: jrose, briangoetz
*** 26,36 ****
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.ShortVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.LongVector;
! import jdk.incubator.vector.Vector.Species;
import org.openjdk.jmh.annotations.*;
import java.util.concurrent.TimeUnit;
import static org.junit.jupiter.api.Assertions.assertEquals;
--- 26,36 ----
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.ShortVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.LongVector;
! import jdk.incubator.vector.VectorSpecies;
import org.openjdk.jmh.annotations.*;
import java.util.concurrent.TimeUnit;
import static org.junit.jupiter.api.Assertions.assertEquals;
*** 348,358 ****
ByteVector popcntB128(ByteVector v) {
var low_mask = ByteVector.broadcast(B128, (byte)0x0f);
var lo = v .and(low_mask);
! var hi = v.shiftR(4).and(low_mask);
var cnt1 = MULA128_LOOKUP.rearrange(lo.toShuffle());
var cnt2 = MULA128_LOOKUP.rearrange(hi.toShuffle());
return cnt1.add(cnt2);
--- 348,358 ----
ByteVector popcntB128(ByteVector v) {
var low_mask = ByteVector.broadcast(B128, (byte)0x0f);
var lo = v .and(low_mask);
! var hi = v.shiftRight(4).and(low_mask);
var cnt1 = MULA128_LOOKUP.rearrange(lo.toShuffle());
var cnt2 = MULA128_LOOKUP.rearrange(hi.toShuffle());
return cnt1.add(cnt2);
*** 371,381 ****
var v3 = popcntB128(v2);
bacc = bacc.add(v3);
}
acc = acc.add(sumUnsignedBytes(bacc));
}
! var r = acc.addAll() + tail(upper);
return r;
}
/* ============================================================================================================== */
--- 371,381 ----
var v3 = popcntB128(v2);
bacc = bacc.add(v3);
}
acc = acc.add(sumUnsignedBytes(bacc));
}
! var r = acc.addLanes() + tail(upper);
return r;
}
/* ============================================================================================================== */
*** 388,398 ****
ByteVector popcntB256(ByteVector v) {
var low_mask = ByteVector.broadcast(B256, (byte)0x0F);
var lo = v .and(low_mask);
! var hi = v.shiftR(4).and(low_mask);
var cnt1 = MULA256_LOOKUP.rearrange(lo.toShuffle());
var cnt2 = MULA256_LOOKUP.rearrange(hi.toShuffle());
var cnt = cnt1.add(cnt2);
--- 388,398 ----
ByteVector popcntB256(ByteVector v) {
var low_mask = ByteVector.broadcast(B256, (byte)0x0F);
var lo = v .and(low_mask);
! var hi = v.shiftRight(4).and(low_mask);
var cnt1 = MULA256_LOOKUP.rearrange(lo.toShuffle());
var cnt2 = MULA256_LOOKUP.rearrange(hi.toShuffle());
var cnt = cnt1.add(cnt2);
*** 406,456 ****
return sumUnsignedBytesShapes(vb);
// return sumUnsignedBytesShifts(vb);
}
LongVector sumUnsignedBytesShapes(ByteVector vb) {
! Species<Short> shortSpecies = Species.of(short.class, vb.shape());
! Species<Integer> intSpecies = Species.of(int.class, vb.shape());
! Species<Long> longSpecies = Species.of(long.class, vb.shape());
var low_short_mask = ShortVector.broadcast(shortSpecies, (short) 0xFF);
var low_int_mask = IntVector.broadcast(intSpecies, 0xFFFF);
var low_long_mask = LongVector.broadcast(longSpecies, 0xFFFFFFFFL);
var vs = (ShortVector)vb.reinterpret(shortSpecies); // 16-bit
var vs0 = vs.and(low_short_mask);
! var vs1 = vs.shiftR(8).and(low_short_mask);
var vs01 = vs0.add(vs1);
var vi = (IntVector)vs01.reinterpret(intSpecies); // 32-bit
var vi0 = vi.and(low_int_mask);
! var vi1 = vi.shiftR(16).and(low_int_mask);
var vi01 = vi0.add(vi1);
var vl = (LongVector)vi01.reinterpret(longSpecies); // 64-bit
var vl0 = vl.and(low_long_mask);
! var vl1 = vl.shiftR(32).and(low_long_mask);
var vl01 = vl0.add(vl1);
return vl01;
}
LongVector sumUnsignedBytesShifts(ByteVector vb) {
! Species<Long> to = Species.of(long.class, vb.shape());
var low_mask = LongVector.broadcast(to, 0xFF);
var vl = (LongVector)vb.reinterpret(to);
var v0 = vl .and(low_mask); // 8-bit
! var v1 = vl.shiftR( 8).and(low_mask); // 8-bit
! var v2 = vl.shiftR(16).and(low_mask); // 8-bit
! var v3 = vl.shiftR(24).and(low_mask); // 8-bit
! var v4 = vl.shiftR(32).and(low_mask); // 8-bit
! var v5 = vl.shiftR(40).and(low_mask); // 8-bit
! var v6 = vl.shiftR(48).and(low_mask); // 8-bit
! var v7 = vl.shiftR(56).and(low_mask); // 8-bit
var v01 = v0.add(v1);
var v23 = v2.add(v3);
var v45 = v4.add(v5);
var v67 = v6.add(v7);
--- 406,456 ----
return sumUnsignedBytesShapes(vb);
// return sumUnsignedBytesShifts(vb);
}
LongVector sumUnsignedBytesShapes(ByteVector vb) {
! VectorSpecies<Short> shortSpecies = VectorSpecies.of(short.class, vb.shape());
! VectorSpecies<Integer> intSpecies = VectorSpecies.of(int.class, vb.shape());
! VectorSpecies<Long> longSpecies = VectorSpecies.of(long.class, vb.shape());
var low_short_mask = ShortVector.broadcast(shortSpecies, (short) 0xFF);
var low_int_mask = IntVector.broadcast(intSpecies, 0xFFFF);
var low_long_mask = LongVector.broadcast(longSpecies, 0xFFFFFFFFL);
var vs = (ShortVector)vb.reinterpret(shortSpecies); // 16-bit
var vs0 = vs.and(low_short_mask);
! var vs1 = vs.shiftRight(8).and(low_short_mask);
var vs01 = vs0.add(vs1);
var vi = (IntVector)vs01.reinterpret(intSpecies); // 32-bit
var vi0 = vi.and(low_int_mask);
! var vi1 = vi.shiftRight(16).and(low_int_mask);
var vi01 = vi0.add(vi1);
var vl = (LongVector)vi01.reinterpret(longSpecies); // 64-bit
var vl0 = vl.and(low_long_mask);
! var vl1 = vl.shiftRight(32).and(low_long_mask);
var vl01 = vl0.add(vl1);
return vl01;
}
LongVector sumUnsignedBytesShifts(ByteVector vb) {
! VectorSpecies<Long> to = VectorSpecies.of(long.class, vb.shape());
var low_mask = LongVector.broadcast(to, 0xFF);
var vl = (LongVector)vb.reinterpret(to);
var v0 = vl .and(low_mask); // 8-bit
! var v1 = vl.shiftRight( 8).and(low_mask); // 8-bit
! var v2 = vl.shiftRight(16).and(low_mask); // 8-bit
! var v3 = vl.shiftRight(24).and(low_mask); // 8-bit
! var v4 = vl.shiftRight(32).and(low_mask); // 8-bit
! var v5 = vl.shiftRight(40).and(low_mask); // 8-bit
! var v6 = vl.shiftRight(48).and(low_mask); // 8-bit
! var v7 = vl.shiftRight(56).and(low_mask); // 8-bit
var v01 = v0.add(v1);
var v23 = v2.add(v3);
var v45 = v4.add(v5);
var v67 = v6.add(v7);
*** 474,484 ****
var v2 = popcntB256((ByteVector)(v1.reinterpret(B256)));
bacc = bacc.add(v2);
}
acc = acc.add(sumUnsignedBytes(bacc));
}
! return acc.addAll() + tail(upper);
}
/* ============================================================================================================== */
--- 474,484 ----
var v2 = popcntB256((ByteVector)(v1.reinterpret(B256)));
bacc = bacc.add(v2);
}
acc = acc.add(sumUnsignedBytes(bacc));
}
! return acc.addLanes() + tail(upper);
}
/* ============================================================================================================== */
*** 612,622 ****
vtotal = vtotal.add(popcntL256(eights).mul(8)); // << 3
vtotal = vtotal.add(popcntL256(fours).mul(4)); // << 2
vtotal = vtotal.add(popcntL256(twos).mul(2)); // << 1
vtotal = vtotal.add(popcntL256(ones)); // << 0
! var total = vtotal.addAll();
return total + tail(upper);
}
/* ============================================================================================================== */
--- 612,622 ----
vtotal = vtotal.add(popcntL256(eights).mul(8)); // << 3
vtotal = vtotal.add(popcntL256(fours).mul(4)); // << 2
vtotal = vtotal.add(popcntL256(twos).mul(2)); // << 1
vtotal = vtotal.add(popcntL256(ones)); // << 0
! var total = vtotal.addLanes();
return total + tail(upper);
}
/* ============================================================================================================== */
< prev index next >