743 bh.consume(r);
744 }
745
746 @Benchmark
747 public void rearrange(Blackhole bh) {
748 float[] a = fa.apply(SPECIES.length());
749 int[] order = fs.apply(a.length, SPECIES.length());
750 float[] r = fr.apply(SPECIES.length());
751
752 for (int ic = 0; ic < INVOC_COUNT; ic++) {
753 for (int i = 0; i < a.length; i += SPECIES.length()) {
754 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
755 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
756 }
757 }
758
759 bh.consume(r);
760 }
761
762 @Benchmark
763 public void extract(Blackhole bh) {
764 float[] a = fa.apply(SPECIES.length());
765 float[] r = fr.apply(SPECIES.length());
766
767 for (int ic = 0; ic < INVOC_COUNT; ic++) {
768 for (int i = 0; i < a.length; i += SPECIES.length()) {
769 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
770 int num_lanes = SPECIES.length();
771 // Manually unroll because full unroll happens after intrinsification.
772 // Unroll is needed because get intrinsic requires for index to be a known constant.
773 if (num_lanes == 1) {
774 r[i]=av.lane(0);
775 } else if (num_lanes == 2) {
776 r[i]=av.lane(0);
777 r[i+1]=av.lane(1);
778 } else if (num_lanes == 4) {
779 r[i]=av.lane(0);
780 r[i+1]=av.lane(1);
781 r[i+2]=av.lane(2);
782 r[i+3]=av.lane(3);
783 } else if (num_lanes == 8) {
|
743 bh.consume(r);
744 }
745
746 @Benchmark
747 public void rearrange(Blackhole bh) {
748 float[] a = fa.apply(SPECIES.length());
749 int[] order = fs.apply(a.length, SPECIES.length());
750 float[] r = fr.apply(SPECIES.length());
751
752 for (int ic = 0; ic < INVOC_COUNT; ic++) {
753 for (int i = 0; i < a.length; i += SPECIES.length()) {
754 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
755 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
756 }
757 }
758
759 bh.consume(r);
760 }
761
762 @Benchmark
763 public void laneextract(Blackhole bh) {
764 float[] a = fa.apply(SPECIES.length());
765 float[] r = fr.apply(SPECIES.length());
766
767 for (int ic = 0; ic < INVOC_COUNT; ic++) {
768 for (int i = 0; i < a.length; i += SPECIES.length()) {
769 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
770 int num_lanes = SPECIES.length();
771 // Manually unroll because full unroll happens after intrinsification.
772 // Unroll is needed because get intrinsic requires for index to be a known constant.
773 if (num_lanes == 1) {
774 r[i]=av.lane(0);
775 } else if (num_lanes == 2) {
776 r[i]=av.lane(0);
777 r[i+1]=av.lane(1);
778 } else if (num_lanes == 4) {
779 r[i]=av.lane(0);
780 r[i+1]=av.lane(1);
781 r[i+2]=av.lane(2);
782 r[i+3]=av.lane(3);
783 } else if (num_lanes == 8) {
|