7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have
21 * questions.
22 */
23
24 package benchmark.jdk.incubator.vector;
25
26 import jdk.incubator.vector.Vector;
27 import jdk.incubator.vector.Vector.Shape;
28 import jdk.incubator.vector.Vector.Species;
29 import jdk.incubator.vector.FloatVector;
30
31 import java.util.concurrent.TimeUnit;
32 import java.util.function.BiFunction;
33 import java.util.function.IntFunction;
34
35 import org.openjdk.jmh.annotations.*;
36 import org.openjdk.jmh.infra.Blackhole;
37
38 @BenchmarkMode(Mode.Throughput)
39 @OutputTimeUnit(TimeUnit.MILLISECONDS)
40 @State(Scope.Benchmark)
41 @Warmup(iterations = 3, time = 1)
42 @Measurement(iterations = 5, time = 1)
43 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
44 public class Float512Vector extends AbstractVectorBenchmark {
45 static final Species<Float> SPECIES = FloatVector.SPECIES_512;
46
47 static final int INVOC_COUNT = 1; // get rid of outer loop
48
49 @Param("1024")
50 int size;
51
52 float[] fill(IntFunction<Float> f) {
53 float[] array = new float[size];
54 for (int i = 0; i < array.length; i++) {
55 array[i] = f.apply(i);
56 }
57 return array;
58 }
59
60 float[] a, b, c, r;
61 boolean[] m, rm;
62 int[] s;
63
64 @Setup
65 public void init() {
91 float[] b = fb.apply(SPECIES.length());
92 float[] r = fr.apply(SPECIES.length());
93
94 for (int ic = 0; ic < INVOC_COUNT; ic++) {
95 for (int i = 0; i < a.length; i += SPECIES.length()) {
96 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
97 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
98 av.add(bv).intoArray(r, i);
99 }
100 }
101
102 bh.consume(r);
103 }
104
105 @Benchmark
106 public void addMasked(Blackhole bh) {
107 float[] a = fa.apply(SPECIES.length());
108 float[] b = fb.apply(SPECIES.length());
109 float[] r = fr.apply(SPECIES.length());
110 boolean[] mask = fm.apply(SPECIES.length());
111 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
112
113 for (int ic = 0; ic < INVOC_COUNT; ic++) {
114 for (int i = 0; i < a.length; i += SPECIES.length()) {
115 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
116 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
117 av.add(bv, vmask).intoArray(r, i);
118 }
119 }
120
121 bh.consume(r);
122 }
123
124 @Benchmark
125 public void sub(Blackhole bh) {
126 float[] a = fa.apply(SPECIES.length());
127 float[] b = fb.apply(SPECIES.length());
128 float[] r = fr.apply(SPECIES.length());
129
130 for (int ic = 0; ic < INVOC_COUNT; ic++) {
131 for (int i = 0; i < a.length; i += SPECIES.length()) {
132 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
133 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
134 av.sub(bv).intoArray(r, i);
135 }
136 }
137
138 bh.consume(r);
139 }
140
141 @Benchmark
142 public void subMasked(Blackhole bh) {
143 float[] a = fa.apply(SPECIES.length());
144 float[] b = fb.apply(SPECIES.length());
145 float[] r = fr.apply(SPECIES.length());
146 boolean[] mask = fm.apply(SPECIES.length());
147 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
148
149 for (int ic = 0; ic < INVOC_COUNT; ic++) {
150 for (int i = 0; i < a.length; i += SPECIES.length()) {
151 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
152 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
153 av.sub(bv, vmask).intoArray(r, i);
154 }
155 }
156
157 bh.consume(r);
158 }
159
160
161 @Benchmark
162 public void div(Blackhole bh) {
163 float[] a = fa.apply(SPECIES.length());
164 float[] b = fb.apply(SPECIES.length());
165 float[] r = fr.apply(SPECIES.length());
166
167 for (int ic = 0; ic < INVOC_COUNT; ic++) {
168 for (int i = 0; i < a.length; i += SPECIES.length()) {
169 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
170 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
171 av.div(bv).intoArray(r, i);
172 }
173 }
174
175 bh.consume(r);
176 }
177
178
179
180 @Benchmark
181 public void divMasked(Blackhole bh) {
182 float[] a = fa.apply(SPECIES.length());
183 float[] b = fb.apply(SPECIES.length());
184 float[] r = fr.apply(SPECIES.length());
185 boolean[] mask = fm.apply(SPECIES.length());
186 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
187
188 for (int ic = 0; ic < INVOC_COUNT; ic++) {
189 for (int i = 0; i < a.length; i += SPECIES.length()) {
190 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
191 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
192 av.div(bv, vmask).intoArray(r, i);
193 }
194 }
195
196 bh.consume(r);
197 }
198
199
200 @Benchmark
201 public void mul(Blackhole bh) {
202 float[] a = fa.apply(SPECIES.length());
203 float[] b = fb.apply(SPECIES.length());
204 float[] r = fr.apply(SPECIES.length());
205
206 for (int ic = 0; ic < INVOC_COUNT; ic++) {
207 for (int i = 0; i < a.length; i += SPECIES.length()) {
208 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
209 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
210 av.mul(bv).intoArray(r, i);
211 }
212 }
213
214 bh.consume(r);
215 }
216
217 @Benchmark
218 public void mulMasked(Blackhole bh) {
219 float[] a = fa.apply(SPECIES.length());
220 float[] b = fb.apply(SPECIES.length());
221 float[] r = fr.apply(SPECIES.length());
222 boolean[] mask = fm.apply(SPECIES.length());
223 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
224
225 for (int ic = 0; ic < INVOC_COUNT; ic++) {
226 for (int i = 0; i < a.length; i += SPECIES.length()) {
227 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
228 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
229 av.mul(bv, vmask).intoArray(r, i);
230 }
231 }
232
233 bh.consume(r);
234 }
235
236
237
238
239
240
241
242
243
365 @Benchmark
366 public void with(Blackhole bh) {
367 float[] a = fa.apply(SPECIES.length());
368 float[] r = fr.apply(SPECIES.length());
369
370 for (int ic = 0; ic < INVOC_COUNT; ic++) {
371 for (int i = 0; i < a.length; i += SPECIES.length()) {
372 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
373 av.with(0, (float)4).intoArray(r, i);
374 }
375 }
376
377 bh.consume(r);
378 }
379
380 @Benchmark
381 public Object lessThan() {
382 float[] a = fa.apply(size);
383 float[] b = fb.apply(size);
384 boolean[] ms = fm.apply(size);
385 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
386
387 for (int ic = 0; ic < INVOC_COUNT; ic++) {
388 for (int i = 0; i < a.length; i += SPECIES.length()) {
389 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
390 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
391 Vector.Mask<Float> mv = av.lessThan(bv);
392
393 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
394 }
395 }
396 return m;
397 }
398
399
400 @Benchmark
401 public Object greaterThan() {
402 float[] a = fa.apply(size);
403 float[] b = fb.apply(size);
404 boolean[] ms = fm.apply(size);
405 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
406
407 for (int ic = 0; ic < INVOC_COUNT; ic++) {
408 for (int i = 0; i < a.length; i += SPECIES.length()) {
409 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
410 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
411 Vector.Mask<Float> mv = av.greaterThan(bv);
412
413 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
414 }
415 }
416 return m;
417 }
418
419
420 @Benchmark
421 public Object equal() {
422 float[] a = fa.apply(size);
423 float[] b = fb.apply(size);
424 boolean[] ms = fm.apply(size);
425 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
426
427 for (int ic = 0; ic < INVOC_COUNT; ic++) {
428 for (int i = 0; i < a.length; i += SPECIES.length()) {
429 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
430 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
431 Vector.Mask<Float> mv = av.equal(bv);
432
433 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
434 }
435 }
436 return m;
437 }
438
439
440 @Benchmark
441 public Object notEqual() {
442 float[] a = fa.apply(size);
443 float[] b = fb.apply(size);
444 boolean[] ms = fm.apply(size);
445 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
446
447 for (int ic = 0; ic < INVOC_COUNT; ic++) {
448 for (int i = 0; i < a.length; i += SPECIES.length()) {
449 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
450 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
451 Vector.Mask<Float> mv = av.notEqual(bv);
452
453 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
454 }
455 }
456 return m;
457 }
458
459
460 @Benchmark
461 public Object lessThanEq() {
462 float[] a = fa.apply(size);
463 float[] b = fb.apply(size);
464 boolean[] ms = fm.apply(size);
465 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
466
467 for (int ic = 0; ic < INVOC_COUNT; ic++) {
468 for (int i = 0; i < a.length; i += SPECIES.length()) {
469 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
470 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
471 Vector.Mask<Float> mv = av.lessThanEq(bv);
472
473 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
474 }
475 }
476 return m;
477 }
478
479
480 @Benchmark
481 public Object greaterThanEq() {
482 float[] a = fa.apply(size);
483 float[] b = fb.apply(size);
484 boolean[] ms = fm.apply(size);
485 Vector.Mask<Float> m = FloatVector.maskFromArray(SPECIES, ms, 0);
486
487 for (int ic = 0; ic < INVOC_COUNT; ic++) {
488 for (int i = 0; i < a.length; i += SPECIES.length()) {
489 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
490 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
491 Vector.Mask<Float> mv = av.greaterThanEq(bv);
492
493 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
494 }
495 }
496 return m;
497 }
498
499
500 @Benchmark
501 public void blend(Blackhole bh) {
502 float[] a = fa.apply(SPECIES.length());
503 float[] b = fb.apply(SPECIES.length());
504 float[] r = fr.apply(SPECIES.length());
505 boolean[] mask = fm.apply(SPECIES.length());
506 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
507
508 for (int ic = 0; ic < INVOC_COUNT; ic++) {
509 for (int i = 0; i < a.length; i += SPECIES.length()) {
510 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
511 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
512 av.blend(bv, vmask).intoArray(r, i);
513 }
514 }
515
516 bh.consume(r);
517 }
518
519 @Benchmark
520 public void rearrange(Blackhole bh) {
521 float[] a = fa.apply(SPECIES.length());
522 int[] order = fs.apply(a.length, SPECIES.length());
523 float[] r = fr.apply(SPECIES.length());
524
525 for (int ic = 0; ic < INVOC_COUNT; ic++) {
526 for (int i = 0; i < a.length; i += SPECIES.length()) {
527 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
528 av.rearrange(FloatVector.shuffleFromArray(SPECIES, order, i)).intoArray(r, i);
529 }
530 }
531
532 bh.consume(r);
533 }
534
535 @Benchmark
536 public void extract(Blackhole bh) {
537 float[] a = fa.apply(SPECIES.length());
538 float[] r = fr.apply(SPECIES.length());
539
540 for (int ic = 0; ic < INVOC_COUNT; ic++) {
541 for (int i = 0; i < a.length; i += SPECIES.length()) {
542 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
543 int num_lanes = SPECIES.length();
544 // Manually unroll because full unroll happens after intrinsification.
545 // Unroll is needed because get intrinsic requires for index to be a known constant.
546 if (num_lanes == 1) {
547 r[i]=av.get(0);
548 } else if (num_lanes == 2) {
1012 for (int i = 0; i < a.length; i += SPECIES.length()) {
1013 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1014 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1015 FloatVector cv = FloatVector.fromArray(SPECIES, c, i);
1016 av.fma(bv, cv).intoArray(r, i);
1017 }
1018 }
1019
1020 bh.consume(r);
1021 }
1022
1023
1024
1025 @Benchmark
1026 public void fmaMasked(Blackhole bh) {
1027 float[] a = fa.apply(SPECIES.length());
1028 float[] b = fb.apply(SPECIES.length());
1029 float[] c = fc.apply(SPECIES.length());
1030 float[] r = fr.apply(SPECIES.length());
1031 boolean[] mask = fm.apply(SPECIES.length());
1032 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
1033
1034 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1035 for (int i = 0; i < a.length; i += SPECIES.length()) {
1036 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1037 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1038 FloatVector cv = FloatVector.fromArray(SPECIES, c, i);
1039 av.fma(bv, cv, vmask).intoArray(r, i);
1040 }
1041 }
1042
1043 bh.consume(r);
1044 }
1045
1046
1047 @Benchmark
1048 public void neg(Blackhole bh) {
1049 float[] a = fa.apply(SPECIES.length());
1050 float[] r = fr.apply(SPECIES.length());
1051
1052 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1053 for (int i = 0; i < a.length; i += SPECIES.length()) {
1054 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1055 av.neg().intoArray(r, i);
1056 }
1057 }
1058
1059 bh.consume(r);
1060 }
1061
1062 @Benchmark
1063 public void negMasked(Blackhole bh) {
1064 float[] a = fa.apply(SPECIES.length());
1065 float[] r = fr.apply(SPECIES.length());
1066 boolean[] mask = fm.apply(SPECIES.length());
1067 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
1068
1069 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1070 for (int i = 0; i < a.length; i += SPECIES.length()) {
1071 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1072 av.neg(vmask).intoArray(r, i);
1073 }
1074 }
1075
1076 bh.consume(r);
1077 }
1078
1079 @Benchmark
1080 public void abs(Blackhole bh) {
1081 float[] a = fa.apply(SPECIES.length());
1082 float[] r = fr.apply(SPECIES.length());
1083
1084 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1085 for (int i = 0; i < a.length; i += SPECIES.length()) {
1086 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1087 av.abs().intoArray(r, i);
1088 }
1089 }
1090
1091 bh.consume(r);
1092 }
1093
1094 @Benchmark
1095 public void absMasked(Blackhole bh) {
1096 float[] a = fa.apply(SPECIES.length());
1097 float[] r = fr.apply(SPECIES.length());
1098 boolean[] mask = fm.apply(SPECIES.length());
1099 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
1100
1101 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1102 for (int i = 0; i < a.length; i += SPECIES.length()) {
1103 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1104 av.abs(vmask).intoArray(r, i);
1105 }
1106 }
1107
1108 bh.consume(r);
1109 }
1110
1111
1112
1113
1114 @Benchmark
1115 public void sqrt(Blackhole bh) {
1116 float[] a = fa.apply(SPECIES.length());
1117 float[] r = fr.apply(SPECIES.length());
1118
1119 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1120 for (int i = 0; i < a.length; i += SPECIES.length()) {
1121 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1122 av.sqrt().intoArray(r, i);
1123 }
1124 }
1125
1126 bh.consume(r);
1127 }
1128
1129
1130
1131 @Benchmark
1132 public void sqrtMasked(Blackhole bh) {
1133 float[] a = fa.apply(SPECIES.length());
1134 float[] r = fr.apply(SPECIES.length());
1135 boolean[] mask = fm.apply(SPECIES.length());
1136 Vector.Mask<Float> vmask = FloatVector.maskFromValues(SPECIES, mask);
1137
1138 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1139 for (int i = 0; i < a.length; i += SPECIES.length()) {
1140 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1141 av.sqrt(vmask).intoArray(r, i);
1142 }
1143 }
1144
1145 bh.consume(r);
1146 }
1147
1148
1149
1150 @Benchmark
1151 public void gather(Blackhole bh) {
1152 float[] a = fa.apply(SPECIES.length());
1153 int[] b = fs.apply(a.length, SPECIES.length());
1154 float[] r = new float[a.length];
1155
1156 for (int ic = 0; ic < INVOC_COUNT; ic++) {
|
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have
21 * questions.
22 */
23
24 package benchmark.jdk.incubator.vector;
25
26 import jdk.incubator.vector.Vector;
27 import jdk.incubator.vector.VectorShape;
28 import jdk.incubator.vector.VectorSpecies;
29 import jdk.incubator.vector.VectorShuffle;
30 import jdk.incubator.vector.FloatVector;
31
32 import java.util.concurrent.TimeUnit;
33 import java.util.function.BiFunction;
34 import java.util.function.IntFunction;
35
36 import org.openjdk.jmh.annotations.*;
37 import org.openjdk.jmh.infra.Blackhole;
38
39 @BenchmarkMode(Mode.Throughput)
40 @OutputTimeUnit(TimeUnit.MILLISECONDS)
41 @State(Scope.Benchmark)
42 @Warmup(iterations = 3, time = 1)
43 @Measurement(iterations = 5, time = 1)
44 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
45 public class Float512Vector extends AbstractVectorBenchmark {
46 static final VectorSpecies<Float> SPECIES = FloatVector.SPECIES_512;
47
48 static final int INVOC_COUNT = 1; // get rid of outer loop
49
50 @Param("1024")
51 int size;
52
53 float[] fill(IntFunction<Float> f) {
54 float[] array = new float[size];
55 for (int i = 0; i < array.length; i++) {
56 array[i] = f.apply(i);
57 }
58 return array;
59 }
60
61 float[] a, b, c, r;
62 boolean[] m, rm;
63 int[] s;
64
65 @Setup
66 public void init() {
92 float[] b = fb.apply(SPECIES.length());
93 float[] r = fr.apply(SPECIES.length());
94
95 for (int ic = 0; ic < INVOC_COUNT; ic++) {
96 for (int i = 0; i < a.length; i += SPECIES.length()) {
97 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
98 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
99 av.add(bv).intoArray(r, i);
100 }
101 }
102
103 bh.consume(r);
104 }
105
106 @Benchmark
107 public void addMasked(Blackhole bh) {
108 float[] a = fa.apply(SPECIES.length());
109 float[] b = fb.apply(SPECIES.length());
110 float[] r = fr.apply(SPECIES.length());
111 boolean[] mask = fm.apply(SPECIES.length());
112 VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
113
114 for (int ic = 0; ic < INVOC_COUNT; ic++) {
115 for (int i = 0; i < a.length; i += SPECIES.length()) {
116 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
117 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
118 av.add(bv, vmask).intoArray(r, i);
119 }
120 }
121
122 bh.consume(r);
123 }
124
125 @Benchmark
126 public void sub(Blackhole bh) {
127 float[] a = fa.apply(SPECIES.length());
128 float[] b = fb.apply(SPECIES.length());
129 float[] r = fr.apply(SPECIES.length());
130
131 for (int ic = 0; ic < INVOC_COUNT; ic++) {
132 for (int i = 0; i < a.length; i += SPECIES.length()) {
133 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
134 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
135 av.sub(bv).intoArray(r, i);
136 }
137 }
138
139 bh.consume(r);
140 }
141
142 @Benchmark
143 public void subMasked(Blackhole bh) {
144 float[] a = fa.apply(SPECIES.length());
145 float[] b = fb.apply(SPECIES.length());
146 float[] r = fr.apply(SPECIES.length());
147 boolean[] mask = fm.apply(SPECIES.length());
148 VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
149
150 for (int ic = 0; ic < INVOC_COUNT; ic++) {
151 for (int i = 0; i < a.length; i += SPECIES.length()) {
152 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
153 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
154 av.sub(bv, vmask).intoArray(r, i);
155 }
156 }
157
158 bh.consume(r);
159 }
160
161
162 @Benchmark
163 public void div(Blackhole bh) {
164 float[] a = fa.apply(SPECIES.length());
165 float[] b = fb.apply(SPECIES.length());
166 float[] r = fr.apply(SPECIES.length());
167
168 for (int ic = 0; ic < INVOC_COUNT; ic++) {
169 for (int i = 0; i < a.length; i += SPECIES.length()) {
170 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
171 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
172 av.div(bv).intoArray(r, i);
173 }
174 }
175
176 bh.consume(r);
177 }
178
179
180
181 @Benchmark
182 public void divMasked(Blackhole bh) {
183 float[] a = fa.apply(SPECIES.length());
184 float[] b = fb.apply(SPECIES.length());
185 float[] r = fr.apply(SPECIES.length());
186 boolean[] mask = fm.apply(SPECIES.length());
187 VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
188
189 for (int ic = 0; ic < INVOC_COUNT; ic++) {
190 for (int i = 0; i < a.length; i += SPECIES.length()) {
191 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
192 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
193 av.div(bv, vmask).intoArray(r, i);
194 }
195 }
196
197 bh.consume(r);
198 }
199
200
201 @Benchmark
202 public void mul(Blackhole bh) {
203 float[] a = fa.apply(SPECIES.length());
204 float[] b = fb.apply(SPECIES.length());
205 float[] r = fr.apply(SPECIES.length());
206
207 for (int ic = 0; ic < INVOC_COUNT; ic++) {
208 for (int i = 0; i < a.length; i += SPECIES.length()) {
209 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
210 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
211 av.mul(bv).intoArray(r, i);
212 }
213 }
214
215 bh.consume(r);
216 }
217
218 @Benchmark
219 public void mulMasked(Blackhole bh) {
220 float[] a = fa.apply(SPECIES.length());
221 float[] b = fb.apply(SPECIES.length());
222 float[] r = fr.apply(SPECIES.length());
223 boolean[] mask = fm.apply(SPECIES.length());
224 VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
225
226 for (int ic = 0; ic < INVOC_COUNT; ic++) {
227 for (int i = 0; i < a.length; i += SPECIES.length()) {
228 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
229 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
230 av.mul(bv, vmask).intoArray(r, i);
231 }
232 }
233
234 bh.consume(r);
235 }
236
237
238
239
240
241
242
243
244
366 @Benchmark
367 public void with(Blackhole bh) {
368 float[] a = fa.apply(SPECIES.length());
369 float[] r = fr.apply(SPECIES.length());
370
371 for (int ic = 0; ic < INVOC_COUNT; ic++) {
372 for (int i = 0; i < a.length; i += SPECIES.length()) {
373 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
374 av.with(0, (float)4).intoArray(r, i);
375 }
376 }
377
378 bh.consume(r);
379 }
380
381 @Benchmark
382 public Object lessThan() {
383 float[] a = fa.apply(size);
384 float[] b = fb.apply(size);
385 boolean[] ms = fm.apply(size);
386 VectorMask<Float> m = VectorMask.maskFromArray(SPECIES, ms, 0);
387
388 for (int ic = 0; ic < INVOC_COUNT; ic++) {
389 for (int i = 0; i < a.length; i += SPECIES.length()) {
390 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
391 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
392 VectorMask<Float> mv = av.lessThan(bv);
393
394 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
395 }
396 }
397 return m;
398 }
399
400
401 @Benchmark
402 public Object greaterThan() {
403 float[] a = fa.apply(size);
404 float[] b = fb.apply(size);
405 boolean[] ms = fm.apply(size);
406 VectorMask<Float> m = VectorMask.maskFromArray(SPECIES, ms, 0);
407
408 for (int ic = 0; ic < INVOC_COUNT; ic++) {
409 for (int i = 0; i < a.length; i += SPECIES.length()) {
410 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
411 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
412 VectorMask<Float> mv = av.greaterThan(bv);
413
414 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
415 }
416 }
417 return m;
418 }
419
420
421 @Benchmark
422 public Object equal() {
423 float[] a = fa.apply(size);
424 float[] b = fb.apply(size);
425 boolean[] ms = fm.apply(size);
426 VectorMask<Float> m = VectorMask.maskFromArray(SPECIES, ms, 0);
427
428 for (int ic = 0; ic < INVOC_COUNT; ic++) {
429 for (int i = 0; i < a.length; i += SPECIES.length()) {
430 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
431 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
432 VectorMask<Float> mv = av.equal(bv);
433
434 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
435 }
436 }
437 return m;
438 }
439
440
441 @Benchmark
442 public Object notEqual() {
443 float[] a = fa.apply(size);
444 float[] b = fb.apply(size);
445 boolean[] ms = fm.apply(size);
446 VectorMask<Float> m = VectorMask.maskFromArray(SPECIES, ms, 0);
447
448 for (int ic = 0; ic < INVOC_COUNT; ic++) {
449 for (int i = 0; i < a.length; i += SPECIES.length()) {
450 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
451 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
452 VectorMask<Float> mv = av.notEqual(bv);
453
454 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
455 }
456 }
457 return m;
458 }
459
460
461 @Benchmark
462 public Object lessThanEq() {
463 float[] a = fa.apply(size);
464 float[] b = fb.apply(size);
465 boolean[] ms = fm.apply(size);
466 VectorMask<Float> m = VectorMask.maskFromArray(SPECIES, ms, 0);
467
468 for (int ic = 0; ic < INVOC_COUNT; ic++) {
469 for (int i = 0; i < a.length; i += SPECIES.length()) {
470 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
471 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
472 VectorMask<Float> mv = av.lessThanEq(bv);
473
474 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
475 }
476 }
477 return m;
478 }
479
480
481 @Benchmark
482 public Object greaterThanEq() {
483 float[] a = fa.apply(size);
484 float[] b = fb.apply(size);
485 boolean[] ms = fm.apply(size);
486 VectorMask<Float> m = VectorMask.maskFromArray(SPECIES, ms, 0);
487
488 for (int ic = 0; ic < INVOC_COUNT; ic++) {
489 for (int i = 0; i < a.length; i += SPECIES.length()) {
490 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
491 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
492 VectorMask<Float> mv = av.greaterThanEq(bv);
493
494 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
495 }
496 }
497 return m;
498 }
499
500
501 @Benchmark
502 public void blend(Blackhole bh) {
503 float[] a = fa.apply(SPECIES.length());
504 float[] b = fb.apply(SPECIES.length());
505 float[] r = fr.apply(SPECIES.length());
506 boolean[] mask = fm.apply(SPECIES.length());
507 VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
508
509 for (int ic = 0; ic < INVOC_COUNT; ic++) {
510 for (int i = 0; i < a.length; i += SPECIES.length()) {
511 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
512 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
513 av.blend(bv, vmask).intoArray(r, i);
514 }
515 }
516
517 bh.consume(r);
518 }
519
520 @Benchmark
521 public void rearrange(Blackhole bh) {
522 float[] a = fa.apply(SPECIES.length());
523 int[] order = fs.apply(a.length, SPECIES.length());
524 float[] r = fr.apply(SPECIES.length());
525
526 for (int ic = 0; ic < INVOC_COUNT; ic++) {
527 for (int i = 0; i < a.length; i += SPECIES.length()) {
528 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
529 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
530 }
531 }
532
533 bh.consume(r);
534 }
535
536 @Benchmark
537 public void extract(Blackhole bh) {
538 float[] a = fa.apply(SPECIES.length());
539 float[] r = fr.apply(SPECIES.length());
540
541 for (int ic = 0; ic < INVOC_COUNT; ic++) {
542 for (int i = 0; i < a.length; i += SPECIES.length()) {
543 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
544 int num_lanes = SPECIES.length();
545 // Manually unroll because full unroll happens after intrinsification.
546 // Unroll is needed because get intrinsic requires for index to be a known constant.
547 if (num_lanes == 1) {
548 r[i]=av.get(0);
549 } else if (num_lanes == 2) {
1013 for (int i = 0; i < a.length; i += SPECIES.length()) {
1014 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1015 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1016 FloatVector cv = FloatVector.fromArray(SPECIES, c, i);
1017 av.fma(bv, cv).intoArray(r, i);
1018 }
1019 }
1020
1021 bh.consume(r);
1022 }
1023
1024
1025
1026 @Benchmark
1027 public void fmaMasked(Blackhole bh) {
1028 float[] a = fa.apply(SPECIES.length());
1029 float[] b = fb.apply(SPECIES.length());
1030 float[] c = fc.apply(SPECIES.length());
1031 float[] r = fr.apply(SPECIES.length());
1032 boolean[] mask = fm.apply(SPECIES.length());
1033 VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
1034
1035 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1036 for (int i = 0; i < a.length; i += SPECIES.length()) {
1037 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1038 FloatVector bv = FloatVector.fromArray(SPECIES, b, i);
1039 FloatVector cv = FloatVector.fromArray(SPECIES, c, i);
1040 av.fma(bv, cv, vmask).intoArray(r, i);
1041 }
1042 }
1043
1044 bh.consume(r);
1045 }
1046
1047
1048 @Benchmark
1049 public void neg(Blackhole bh) {
1050 float[] a = fa.apply(SPECIES.length());
1051 float[] r = fr.apply(SPECIES.length());
1052
1053 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1054 for (int i = 0; i < a.length; i += SPECIES.length()) {
1055 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1056 av.neg().intoArray(r, i);
1057 }
1058 }
1059
1060 bh.consume(r);
1061 }
1062
1063 @Benchmark
1064 public void negMasked(Blackhole bh) {
1065 float[] a = fa.apply(SPECIES.length());
1066 float[] r = fr.apply(SPECIES.length());
1067 boolean[] mask = fm.apply(SPECIES.length());
1068 VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
1069
1070 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1071 for (int i = 0; i < a.length; i += SPECIES.length()) {
1072 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1073 av.neg(vmask).intoArray(r, i);
1074 }
1075 }
1076
1077 bh.consume(r);
1078 }
1079
1080 @Benchmark
1081 public void abs(Blackhole bh) {
1082 float[] a = fa.apply(SPECIES.length());
1083 float[] r = fr.apply(SPECIES.length());
1084
1085 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1086 for (int i = 0; i < a.length; i += SPECIES.length()) {
1087 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1088 av.abs().intoArray(r, i);
1089 }
1090 }
1091
1092 bh.consume(r);
1093 }
1094
1095 @Benchmark
1096 public void absMasked(Blackhole bh) {
1097 float[] a = fa.apply(SPECIES.length());
1098 float[] r = fr.apply(SPECIES.length());
1099 boolean[] mask = fm.apply(SPECIES.length());
1100 VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
1101
1102 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1103 for (int i = 0; i < a.length; i += SPECIES.length()) {
1104 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1105 av.abs(vmask).intoArray(r, i);
1106 }
1107 }
1108
1109 bh.consume(r);
1110 }
1111
1112
1113
1114
1115 @Benchmark
1116 public void sqrt(Blackhole bh) {
1117 float[] a = fa.apply(SPECIES.length());
1118 float[] r = fr.apply(SPECIES.length());
1119
1120 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1121 for (int i = 0; i < a.length; i += SPECIES.length()) {
1122 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1123 av.sqrt().intoArray(r, i);
1124 }
1125 }
1126
1127 bh.consume(r);
1128 }
1129
1130
1131
1132 @Benchmark
1133 public void sqrtMasked(Blackhole bh) {
1134 float[] a = fa.apply(SPECIES.length());
1135 float[] r = fr.apply(SPECIES.length());
1136 boolean[] mask = fm.apply(SPECIES.length());
1137 VectorMask<Float> vmask = VectorMask.fromValues(SPECIES, mask);
1138
1139 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1140 for (int i = 0; i < a.length; i += SPECIES.length()) {
1141 FloatVector av = FloatVector.fromArray(SPECIES, a, i);
1142 av.sqrt(vmask).intoArray(r, i);
1143 }
1144 }
1145
1146 bh.consume(r);
1147 }
1148
1149
1150
1151 @Benchmark
1152 public void gather(Blackhole bh) {
1153 float[] a = fa.apply(SPECIES.length());
1154 int[] b = fs.apply(a.length, SPECIES.length());
1155 float[] r = new float[a.length];
1156
1157 for (int ic = 0; ic < INVOC_COUNT; ic++) {
|