7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have
21 * questions.
22 */
23
24 package benchmark.jdk.incubator.vector;
25
26 import jdk.incubator.vector.Vector;
27 import jdk.incubator.vector.Vector.Shape;
28 import jdk.incubator.vector.Vector.Species;
29 import jdk.incubator.vector.ByteVector;
30
31 import java.util.concurrent.TimeUnit;
32 import java.util.function.BiFunction;
33 import java.util.function.IntFunction;
34
35 import org.openjdk.jmh.annotations.*;
36 import org.openjdk.jmh.infra.Blackhole;
37
38 @BenchmarkMode(Mode.Throughput)
39 @OutputTimeUnit(TimeUnit.MILLISECONDS)
40 @State(Scope.Benchmark)
41 @Warmup(iterations = 3, time = 1)
42 @Measurement(iterations = 5, time = 1)
43 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
44 public class ByteMaxVector extends AbstractVectorBenchmark {
45 static final Species<Byte> SPECIES = ByteVector.SPECIES_MAX;
46
47 static final int INVOC_COUNT = 1; // get rid of outer loop
48
49 @Param("1024")
50 int size;
51
52 byte[] fill(IntFunction<Byte> f) {
53 byte[] array = new byte[size];
54 for (int i = 0; i < array.length; i++) {
55 array[i] = f.apply(i);
56 }
57 return array;
58 }
59
60 byte[] a, b, c, r;
61 boolean[] m, rm;
62 int[] s;
63
64 @Setup
65 public void init() {
91 byte[] b = fb.apply(SPECIES.length());
92 byte[] r = fr.apply(SPECIES.length());
93
94 for (int ic = 0; ic < INVOC_COUNT; ic++) {
95 for (int i = 0; i < a.length; i += SPECIES.length()) {
96 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
97 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
98 av.add(bv).intoArray(r, i);
99 }
100 }
101
102 bh.consume(r);
103 }
104
105 @Benchmark
106 public void addMasked(Blackhole bh) {
107 byte[] a = fa.apply(SPECIES.length());
108 byte[] b = fb.apply(SPECIES.length());
109 byte[] r = fr.apply(SPECIES.length());
110 boolean[] mask = fm.apply(SPECIES.length());
111 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
112
113 for (int ic = 0; ic < INVOC_COUNT; ic++) {
114 for (int i = 0; i < a.length; i += SPECIES.length()) {
115 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
116 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
117 av.add(bv, vmask).intoArray(r, i);
118 }
119 }
120
121 bh.consume(r);
122 }
123
124 @Benchmark
125 public void sub(Blackhole bh) {
126 byte[] a = fa.apply(SPECIES.length());
127 byte[] b = fb.apply(SPECIES.length());
128 byte[] r = fr.apply(SPECIES.length());
129
130 for (int ic = 0; ic < INVOC_COUNT; ic++) {
131 for (int i = 0; i < a.length; i += SPECIES.length()) {
132 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
133 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
134 av.sub(bv).intoArray(r, i);
135 }
136 }
137
138 bh.consume(r);
139 }
140
141 @Benchmark
142 public void subMasked(Blackhole bh) {
143 byte[] a = fa.apply(SPECIES.length());
144 byte[] b = fb.apply(SPECIES.length());
145 byte[] r = fr.apply(SPECIES.length());
146 boolean[] mask = fm.apply(SPECIES.length());
147 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
148
149 for (int ic = 0; ic < INVOC_COUNT; ic++) {
150 for (int i = 0; i < a.length; i += SPECIES.length()) {
151 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
152 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
153 av.sub(bv, vmask).intoArray(r, i);
154 }
155 }
156
157 bh.consume(r);
158 }
159
160
161
162 @Benchmark
163 public void mul(Blackhole bh) {
164 byte[] a = fa.apply(SPECIES.length());
165 byte[] b = fb.apply(SPECIES.length());
166 byte[] r = fr.apply(SPECIES.length());
167
168 for (int ic = 0; ic < INVOC_COUNT; ic++) {
169 for (int i = 0; i < a.length; i += SPECIES.length()) {
170 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
171 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
172 av.mul(bv).intoArray(r, i);
173 }
174 }
175
176 bh.consume(r);
177 }
178
179 @Benchmark
180 public void mulMasked(Blackhole bh) {
181 byte[] a = fa.apply(SPECIES.length());
182 byte[] b = fb.apply(SPECIES.length());
183 byte[] r = fr.apply(SPECIES.length());
184 boolean[] mask = fm.apply(SPECIES.length());
185 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
186
187 for (int ic = 0; ic < INVOC_COUNT; ic++) {
188 for (int i = 0; i < a.length; i += SPECIES.length()) {
189 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
190 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
191 av.mul(bv, vmask).intoArray(r, i);
192 }
193 }
194
195 bh.consume(r);
196 }
197
198
199 @Benchmark
200 public void and(Blackhole bh) {
201 byte[] a = fa.apply(SPECIES.length());
202 byte[] b = fb.apply(SPECIES.length());
203 byte[] r = fr.apply(SPECIES.length());
204
205 for (int ic = 0; ic < INVOC_COUNT; ic++) {
206 for (int i = 0; i < a.length; i += SPECIES.length()) {
207 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
208 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
209 av.and(bv).intoArray(r, i);
210 }
211 }
212
213 bh.consume(r);
214 }
215
216
217
218 @Benchmark
219 public void andMasked(Blackhole bh) {
220 byte[] a = fa.apply(SPECIES.length());
221 byte[] b = fb.apply(SPECIES.length());
222 byte[] r = fr.apply(SPECIES.length());
223 boolean[] mask = fm.apply(SPECIES.length());
224 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
225
226 for (int ic = 0; ic < INVOC_COUNT; ic++) {
227 for (int i = 0; i < a.length; i += SPECIES.length()) {
228 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
229 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
230 av.and(bv, vmask).intoArray(r, i);
231 }
232 }
233
234 bh.consume(r);
235 }
236
237
238
239 @Benchmark
240 public void or(Blackhole bh) {
241 byte[] a = fa.apply(SPECIES.length());
242 byte[] b = fb.apply(SPECIES.length());
243 byte[] r = fr.apply(SPECIES.length());
244
245 for (int ic = 0; ic < INVOC_COUNT; ic++) {
246 for (int i = 0; i < a.length; i += SPECIES.length()) {
247 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
248 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
249 av.or(bv).intoArray(r, i);
250 }
251 }
252
253 bh.consume(r);
254 }
255
256
257
258 @Benchmark
259 public void orMasked(Blackhole bh) {
260 byte[] a = fa.apply(SPECIES.length());
261 byte[] b = fb.apply(SPECIES.length());
262 byte[] r = fr.apply(SPECIES.length());
263 boolean[] mask = fm.apply(SPECIES.length());
264 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
265
266 for (int ic = 0; ic < INVOC_COUNT; ic++) {
267 for (int i = 0; i < a.length; i += SPECIES.length()) {
268 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
269 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
270 av.or(bv, vmask).intoArray(r, i);
271 }
272 }
273
274 bh.consume(r);
275 }
276
277
278
279 @Benchmark
280 public void xor(Blackhole bh) {
281 byte[] a = fa.apply(SPECIES.length());
282 byte[] b = fb.apply(SPECIES.length());
283 byte[] r = fr.apply(SPECIES.length());
284
285 for (int ic = 0; ic < INVOC_COUNT; ic++) {
286 for (int i = 0; i < a.length; i += SPECIES.length()) {
287 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
288 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
289 av.xor(bv).intoArray(r, i);
290 }
291 }
292
293 bh.consume(r);
294 }
295
296
297
298 @Benchmark
299 public void xorMasked(Blackhole bh) {
300 byte[] a = fa.apply(SPECIES.length());
301 byte[] b = fb.apply(SPECIES.length());
302 byte[] r = fr.apply(SPECIES.length());
303 boolean[] mask = fm.apply(SPECIES.length());
304 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
305
306 for (int ic = 0; ic < INVOC_COUNT; ic++) {
307 for (int i = 0; i < a.length; i += SPECIES.length()) {
308 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
309 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
310 av.xor(bv, vmask).intoArray(r, i);
311 }
312 }
313
314 bh.consume(r);
315 }
316
317
318
319
320
321
322
323
324
335 byte[] r = fr.apply(SPECIES.length());
336
337 for (int ic = 0; ic < INVOC_COUNT; ic++) {
338 for (int i = 0; i < a.length; i += SPECIES.length()) {
339 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
340 av.aShiftR((int)b[i]).intoArray(r, i);
341 }
342 }
343
344 bh.consume(r);
345 }
346
347
348
349 @Benchmark
350 public void aShiftRMaskedShift(Blackhole bh) {
351 byte[] a = fa.apply(SPECIES.length());
352 byte[] b = fb.apply(SPECIES.length());
353 byte[] r = fr.apply(SPECIES.length());
354 boolean[] mask = fm.apply(SPECIES.length());
355 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
356
357 for (int ic = 0; ic < INVOC_COUNT; ic++) {
358 for (int i = 0; i < a.length; i += SPECIES.length()) {
359 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
360 av.aShiftR((int)b[i], vmask).intoArray(r, i);
361 }
362 }
363
364 bh.consume(r);
365 }
366
367
368
369 @Benchmark
370 public void shiftLShift(Blackhole bh) {
371 byte[] a = fa.apply(SPECIES.length());
372 byte[] b = fb.apply(SPECIES.length());
373 byte[] r = fr.apply(SPECIES.length());
374
375 for (int ic = 0; ic < INVOC_COUNT; ic++) {
376 for (int i = 0; i < a.length; i += SPECIES.length()) {
377 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
378 av.shiftL((int)b[i]).intoArray(r, i);
379 }
380 }
381
382 bh.consume(r);
383 }
384
385
386
387 @Benchmark
388 public void shiftLMaskedShift(Blackhole bh) {
389 byte[] a = fa.apply(SPECIES.length());
390 byte[] b = fb.apply(SPECIES.length());
391 byte[] r = fr.apply(SPECIES.length());
392 boolean[] mask = fm.apply(SPECIES.length());
393 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
394
395 for (int ic = 0; ic < INVOC_COUNT; ic++) {
396 for (int i = 0; i < a.length; i += SPECIES.length()) {
397 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
398 av.shiftL((int)b[i], vmask).intoArray(r, i);
399 }
400 }
401
402 bh.consume(r);
403 }
404
405
406
407 @Benchmark
408 public void shiftRShift(Blackhole bh) {
409 byte[] a = fa.apply(SPECIES.length());
410 byte[] b = fb.apply(SPECIES.length());
411 byte[] r = fr.apply(SPECIES.length());
412
413 for (int ic = 0; ic < INVOC_COUNT; ic++) {
414 for (int i = 0; i < a.length; i += SPECIES.length()) {
415 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
416 av.shiftR((int)b[i]).intoArray(r, i);
417 }
418 }
419
420 bh.consume(r);
421 }
422
423
424
425 @Benchmark
426 public void shiftRMaskedShift(Blackhole bh) {
427 byte[] a = fa.apply(SPECIES.length());
428 byte[] b = fb.apply(SPECIES.length());
429 byte[] r = fr.apply(SPECIES.length());
430 boolean[] mask = fm.apply(SPECIES.length());
431 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
432
433 for (int ic = 0; ic < INVOC_COUNT; ic++) {
434 for (int i = 0; i < a.length; i += SPECIES.length()) {
435 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
436 av.shiftR((int)b[i], vmask).intoArray(r, i);
437 }
438 }
439
440 bh.consume(r);
441 }
442
443
444
445
446
447
448
449
450 @Benchmark
451 public void max(Blackhole bh) {
583 byte ra = Byte.MIN_VALUE;
584
585 for (int ic = 0; ic < INVOC_COUNT; ic++) {
586 ra = Byte.MIN_VALUE;
587 for (int i = 0; i < a.length; i += SPECIES.length()) {
588 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
589 ra = (byte)Math.max(ra, av.maxAll());
590 }
591 }
592 bh.consume(ra);
593 }
594
595
596 @Benchmark
597 public void anyTrue(Blackhole bh) {
598 boolean[] mask = fm.apply(SPECIES.length());
599 boolean[] r = fmr.apply(SPECIES.length());
600
601 for (int ic = 0; ic < INVOC_COUNT; ic++) {
602 for (int i = 0; i < mask.length; i += SPECIES.length()) {
603 Vector.Mask<Byte> vmask = ByteVector.maskFromArray(SPECIES, mask, i);
604 r[i] = vmask.anyTrue();
605 }
606 }
607
608 bh.consume(r);
609 }
610
611
612
613 @Benchmark
614 public void allTrue(Blackhole bh) {
615 boolean[] mask = fm.apply(SPECIES.length());
616 boolean[] r = fmr.apply(SPECIES.length());
617
618 for (int ic = 0; ic < INVOC_COUNT; ic++) {
619 for (int i = 0; i < mask.length; i += SPECIES.length()) {
620 Vector.Mask<Byte> vmask = ByteVector.maskFromArray(SPECIES, mask, i);
621 r[i] = vmask.allTrue();
622 }
623 }
624
625 bh.consume(r);
626 }
627
628
629 @Benchmark
630 public void with(Blackhole bh) {
631 byte[] a = fa.apply(SPECIES.length());
632 byte[] r = fr.apply(SPECIES.length());
633
634 for (int ic = 0; ic < INVOC_COUNT; ic++) {
635 for (int i = 0; i < a.length; i += SPECIES.length()) {
636 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
637 av.with(0, (byte)4).intoArray(r, i);
638 }
639 }
640
641 bh.consume(r);
642 }
643
644 @Benchmark
645 public Object lessThan() {
646 byte[] a = fa.apply(size);
647 byte[] b = fb.apply(size);
648 boolean[] ms = fm.apply(size);
649 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
650
651 for (int ic = 0; ic < INVOC_COUNT; ic++) {
652 for (int i = 0; i < a.length; i += SPECIES.length()) {
653 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
654 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
655 Vector.Mask<Byte> mv = av.lessThan(bv);
656
657 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
658 }
659 }
660 return m;
661 }
662
663
664 @Benchmark
665 public Object greaterThan() {
666 byte[] a = fa.apply(size);
667 byte[] b = fb.apply(size);
668 boolean[] ms = fm.apply(size);
669 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
670
671 for (int ic = 0; ic < INVOC_COUNT; ic++) {
672 for (int i = 0; i < a.length; i += SPECIES.length()) {
673 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
674 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
675 Vector.Mask<Byte> mv = av.greaterThan(bv);
676
677 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
678 }
679 }
680 return m;
681 }
682
683
684 @Benchmark
685 public Object equal() {
686 byte[] a = fa.apply(size);
687 byte[] b = fb.apply(size);
688 boolean[] ms = fm.apply(size);
689 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
690
691 for (int ic = 0; ic < INVOC_COUNT; ic++) {
692 for (int i = 0; i < a.length; i += SPECIES.length()) {
693 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
694 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
695 Vector.Mask<Byte> mv = av.equal(bv);
696
697 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
698 }
699 }
700 return m;
701 }
702
703
704 @Benchmark
705 public Object notEqual() {
706 byte[] a = fa.apply(size);
707 byte[] b = fb.apply(size);
708 boolean[] ms = fm.apply(size);
709 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
710
711 for (int ic = 0; ic < INVOC_COUNT; ic++) {
712 for (int i = 0; i < a.length; i += SPECIES.length()) {
713 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
714 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
715 Vector.Mask<Byte> mv = av.notEqual(bv);
716
717 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
718 }
719 }
720 return m;
721 }
722
723
724 @Benchmark
725 public Object lessThanEq() {
726 byte[] a = fa.apply(size);
727 byte[] b = fb.apply(size);
728 boolean[] ms = fm.apply(size);
729 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
730
731 for (int ic = 0; ic < INVOC_COUNT; ic++) {
732 for (int i = 0; i < a.length; i += SPECIES.length()) {
733 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
734 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
735 Vector.Mask<Byte> mv = av.lessThanEq(bv);
736
737 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
738 }
739 }
740 return m;
741 }
742
743
744 @Benchmark
745 public Object greaterThanEq() {
746 byte[] a = fa.apply(size);
747 byte[] b = fb.apply(size);
748 boolean[] ms = fm.apply(size);
749 Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
750
751 for (int ic = 0; ic < INVOC_COUNT; ic++) {
752 for (int i = 0; i < a.length; i += SPECIES.length()) {
753 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
754 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
755 Vector.Mask<Byte> mv = av.greaterThanEq(bv);
756
757 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
758 }
759 }
760 return m;
761 }
762
763
764 @Benchmark
765 public void blend(Blackhole bh) {
766 byte[] a = fa.apply(SPECIES.length());
767 byte[] b = fb.apply(SPECIES.length());
768 byte[] r = fr.apply(SPECIES.length());
769 boolean[] mask = fm.apply(SPECIES.length());
770 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
771
772 for (int ic = 0; ic < INVOC_COUNT; ic++) {
773 for (int i = 0; i < a.length; i += SPECIES.length()) {
774 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
775 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
776 av.blend(bv, vmask).intoArray(r, i);
777 }
778 }
779
780 bh.consume(r);
781 }
782
783 @Benchmark
784 public void rearrange(Blackhole bh) {
785 byte[] a = fa.apply(SPECIES.length());
786 int[] order = fs.apply(a.length, SPECIES.length());
787 byte[] r = fr.apply(SPECIES.length());
788
789 for (int ic = 0; ic < INVOC_COUNT; ic++) {
790 for (int i = 0; i < a.length; i += SPECIES.length()) {
791 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
792 av.rearrange(ByteVector.shuffleFromArray(SPECIES, order, i)).intoArray(r, i);
793 }
794 }
795
796 bh.consume(r);
797 }
798
799 @Benchmark
800 public void extract(Blackhole bh) {
801 byte[] a = fa.apply(SPECIES.length());
802 byte[] r = fr.apply(SPECIES.length());
803
804 for (int ic = 0; ic < INVOC_COUNT; ic++) {
805 for (int i = 0; i < a.length; i += SPECIES.length()) {
806 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
807 int num_lanes = SPECIES.length();
808 // Manually unroll because full unroll happens after intrinsification.
809 // Unroll is needed because get intrinsic requires for index to be a known constant.
810 if (num_lanes == 1) {
811 r[i]=av.get(0);
812 } else if (num_lanes == 2) {
975 @Benchmark
976 public void neg(Blackhole bh) {
977 byte[] a = fa.apply(SPECIES.length());
978 byte[] r = fr.apply(SPECIES.length());
979
980 for (int ic = 0; ic < INVOC_COUNT; ic++) {
981 for (int i = 0; i < a.length; i += SPECIES.length()) {
982 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
983 av.neg().intoArray(r, i);
984 }
985 }
986
987 bh.consume(r);
988 }
989
990 @Benchmark
991 public void negMasked(Blackhole bh) {
992 byte[] a = fa.apply(SPECIES.length());
993 byte[] r = fr.apply(SPECIES.length());
994 boolean[] mask = fm.apply(SPECIES.length());
995 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
996
997 for (int ic = 0; ic < INVOC_COUNT; ic++) {
998 for (int i = 0; i < a.length; i += SPECIES.length()) {
999 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1000 av.neg(vmask).intoArray(r, i);
1001 }
1002 }
1003
1004 bh.consume(r);
1005 }
1006
1007 @Benchmark
1008 public void abs(Blackhole bh) {
1009 byte[] a = fa.apply(SPECIES.length());
1010 byte[] r = fr.apply(SPECIES.length());
1011
1012 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1013 for (int i = 0; i < a.length; i += SPECIES.length()) {
1014 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1015 av.abs().intoArray(r, i);
1016 }
1017 }
1018
1019 bh.consume(r);
1020 }
1021
1022 @Benchmark
1023 public void absMasked(Blackhole bh) {
1024 byte[] a = fa.apply(SPECIES.length());
1025 byte[] r = fr.apply(SPECIES.length());
1026 boolean[] mask = fm.apply(SPECIES.length());
1027 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
1028
1029 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1030 for (int i = 0; i < a.length; i += SPECIES.length()) {
1031 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1032 av.abs(vmask).intoArray(r, i);
1033 }
1034 }
1035
1036 bh.consume(r);
1037 }
1038
1039
1040 @Benchmark
1041 public void not(Blackhole bh) {
1042 byte[] a = fa.apply(SPECIES.length());
1043 byte[] r = fr.apply(SPECIES.length());
1044
1045 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1046 for (int i = 0; i < a.length; i += SPECIES.length()) {
1047 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1048 av.not().intoArray(r, i);
1049 }
1050 }
1051
1052 bh.consume(r);
1053 }
1054
1055
1056
1057 @Benchmark
1058 public void notMasked(Blackhole bh) {
1059 byte[] a = fa.apply(SPECIES.length());
1060 byte[] r = fr.apply(SPECIES.length());
1061 boolean[] mask = fm.apply(SPECIES.length());
1062 Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
1063
1064 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1065 for (int i = 0; i < a.length; i += SPECIES.length()) {
1066 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1067 av.not(vmask).intoArray(r, i);
1068 }
1069 }
1070
1071 bh.consume(r);
1072 }
1073
1074
1075
1076
1077
1078 }
1079
|
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have
21 * questions.
22 */
23
24 package benchmark.jdk.incubator.vector;
25
26 import jdk.incubator.vector.Vector;
27 import jdk.incubator.vector.VectorShape;
28 import jdk.incubator.vector.VectorSpecies;
29 import jdk.incubator.vector.VectorShuffle;
30 import jdk.incubator.vector.ByteVector;
31
32 import java.util.concurrent.TimeUnit;
33 import java.util.function.BiFunction;
34 import java.util.function.IntFunction;
35
36 import org.openjdk.jmh.annotations.*;
37 import org.openjdk.jmh.infra.Blackhole;
38
39 @BenchmarkMode(Mode.Throughput)
40 @OutputTimeUnit(TimeUnit.MILLISECONDS)
41 @State(Scope.Benchmark)
42 @Warmup(iterations = 3, time = 1)
43 @Measurement(iterations = 5, time = 1)
44 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
45 public class ByteMaxVector extends AbstractVectorBenchmark {
46 static final VectorSpecies<Byte> SPECIES = ByteVector.SPECIES_MAX;
47
48 static final int INVOC_COUNT = 1; // get rid of outer loop
49
50 @Param("1024")
51 int size;
52
53 byte[] fill(IntFunction<Byte> f) {
54 byte[] array = new byte[size];
55 for (int i = 0; i < array.length; i++) {
56 array[i] = f.apply(i);
57 }
58 return array;
59 }
60
61 byte[] a, b, c, r;
62 boolean[] m, rm;
63 int[] s;
64
65 @Setup
66 public void init() {
92 byte[] b = fb.apply(SPECIES.length());
93 byte[] r = fr.apply(SPECIES.length());
94
95 for (int ic = 0; ic < INVOC_COUNT; ic++) {
96 for (int i = 0; i < a.length; i += SPECIES.length()) {
97 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
98 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
99 av.add(bv).intoArray(r, i);
100 }
101 }
102
103 bh.consume(r);
104 }
105
106 @Benchmark
107 public void addMasked(Blackhole bh) {
108 byte[] a = fa.apply(SPECIES.length());
109 byte[] b = fb.apply(SPECIES.length());
110 byte[] r = fr.apply(SPECIES.length());
111 boolean[] mask = fm.apply(SPECIES.length());
112 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
113
114 for (int ic = 0; ic < INVOC_COUNT; ic++) {
115 for (int i = 0; i < a.length; i += SPECIES.length()) {
116 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
117 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
118 av.add(bv, vmask).intoArray(r, i);
119 }
120 }
121
122 bh.consume(r);
123 }
124
125 @Benchmark
126 public void sub(Blackhole bh) {
127 byte[] a = fa.apply(SPECIES.length());
128 byte[] b = fb.apply(SPECIES.length());
129 byte[] r = fr.apply(SPECIES.length());
130
131 for (int ic = 0; ic < INVOC_COUNT; ic++) {
132 for (int i = 0; i < a.length; i += SPECIES.length()) {
133 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
134 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
135 av.sub(bv).intoArray(r, i);
136 }
137 }
138
139 bh.consume(r);
140 }
141
142 @Benchmark
143 public void subMasked(Blackhole bh) {
144 byte[] a = fa.apply(SPECIES.length());
145 byte[] b = fb.apply(SPECIES.length());
146 byte[] r = fr.apply(SPECIES.length());
147 boolean[] mask = fm.apply(SPECIES.length());
148 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
149
150 for (int ic = 0; ic < INVOC_COUNT; ic++) {
151 for (int i = 0; i < a.length; i += SPECIES.length()) {
152 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
153 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
154 av.sub(bv, vmask).intoArray(r, i);
155 }
156 }
157
158 bh.consume(r);
159 }
160
161
162
163 @Benchmark
164 public void mul(Blackhole bh) {
165 byte[] a = fa.apply(SPECIES.length());
166 byte[] b = fb.apply(SPECIES.length());
167 byte[] r = fr.apply(SPECIES.length());
168
169 for (int ic = 0; ic < INVOC_COUNT; ic++) {
170 for (int i = 0; i < a.length; i += SPECIES.length()) {
171 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
172 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
173 av.mul(bv).intoArray(r, i);
174 }
175 }
176
177 bh.consume(r);
178 }
179
180 @Benchmark
181 public void mulMasked(Blackhole bh) {
182 byte[] a = fa.apply(SPECIES.length());
183 byte[] b = fb.apply(SPECIES.length());
184 byte[] r = fr.apply(SPECIES.length());
185 boolean[] mask = fm.apply(SPECIES.length());
186 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
187
188 for (int ic = 0; ic < INVOC_COUNT; ic++) {
189 for (int i = 0; i < a.length; i += SPECIES.length()) {
190 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
191 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
192 av.mul(bv, vmask).intoArray(r, i);
193 }
194 }
195
196 bh.consume(r);
197 }
198
199
200 @Benchmark
201 public void and(Blackhole bh) {
202 byte[] a = fa.apply(SPECIES.length());
203 byte[] b = fb.apply(SPECIES.length());
204 byte[] r = fr.apply(SPECIES.length());
205
206 for (int ic = 0; ic < INVOC_COUNT; ic++) {
207 for (int i = 0; i < a.length; i += SPECIES.length()) {
208 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
209 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
210 av.and(bv).intoArray(r, i);
211 }
212 }
213
214 bh.consume(r);
215 }
216
217
218
219 @Benchmark
220 public void andMasked(Blackhole bh) {
221 byte[] a = fa.apply(SPECIES.length());
222 byte[] b = fb.apply(SPECIES.length());
223 byte[] r = fr.apply(SPECIES.length());
224 boolean[] mask = fm.apply(SPECIES.length());
225 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
226
227 for (int ic = 0; ic < INVOC_COUNT; ic++) {
228 for (int i = 0; i < a.length; i += SPECIES.length()) {
229 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
230 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
231 av.and(bv, vmask).intoArray(r, i);
232 }
233 }
234
235 bh.consume(r);
236 }
237
238
239
240 @Benchmark
241 public void or(Blackhole bh) {
242 byte[] a = fa.apply(SPECIES.length());
243 byte[] b = fb.apply(SPECIES.length());
244 byte[] r = fr.apply(SPECIES.length());
245
246 for (int ic = 0; ic < INVOC_COUNT; ic++) {
247 for (int i = 0; i < a.length; i += SPECIES.length()) {
248 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
249 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
250 av.or(bv).intoArray(r, i);
251 }
252 }
253
254 bh.consume(r);
255 }
256
257
258
259 @Benchmark
260 public void orMasked(Blackhole bh) {
261 byte[] a = fa.apply(SPECIES.length());
262 byte[] b = fb.apply(SPECIES.length());
263 byte[] r = fr.apply(SPECIES.length());
264 boolean[] mask = fm.apply(SPECIES.length());
265 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
266
267 for (int ic = 0; ic < INVOC_COUNT; ic++) {
268 for (int i = 0; i < a.length; i += SPECIES.length()) {
269 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
270 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
271 av.or(bv, vmask).intoArray(r, i);
272 }
273 }
274
275 bh.consume(r);
276 }
277
278
279
280 @Benchmark
281 public void xor(Blackhole bh) {
282 byte[] a = fa.apply(SPECIES.length());
283 byte[] b = fb.apply(SPECIES.length());
284 byte[] r = fr.apply(SPECIES.length());
285
286 for (int ic = 0; ic < INVOC_COUNT; ic++) {
287 for (int i = 0; i < a.length; i += SPECIES.length()) {
288 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
289 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
290 av.xor(bv).intoArray(r, i);
291 }
292 }
293
294 bh.consume(r);
295 }
296
297
298
299 @Benchmark
300 public void xorMasked(Blackhole bh) {
301 byte[] a = fa.apply(SPECIES.length());
302 byte[] b = fb.apply(SPECIES.length());
303 byte[] r = fr.apply(SPECIES.length());
304 boolean[] mask = fm.apply(SPECIES.length());
305 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
306
307 for (int ic = 0; ic < INVOC_COUNT; ic++) {
308 for (int i = 0; i < a.length; i += SPECIES.length()) {
309 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
310 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
311 av.xor(bv, vmask).intoArray(r, i);
312 }
313 }
314
315 bh.consume(r);
316 }
317
318
319
320
321
322
323
324
325
336 byte[] r = fr.apply(SPECIES.length());
337
338 for (int ic = 0; ic < INVOC_COUNT; ic++) {
339 for (int i = 0; i < a.length; i += SPECIES.length()) {
340 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
341 av.aShiftR((int)b[i]).intoArray(r, i);
342 }
343 }
344
345 bh.consume(r);
346 }
347
348
349
350 @Benchmark
351 public void aShiftRMaskedShift(Blackhole bh) {
352 byte[] a = fa.apply(SPECIES.length());
353 byte[] b = fb.apply(SPECIES.length());
354 byte[] r = fr.apply(SPECIES.length());
355 boolean[] mask = fm.apply(SPECIES.length());
356 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
357
358 for (int ic = 0; ic < INVOC_COUNT; ic++) {
359 for (int i = 0; i < a.length; i += SPECIES.length()) {
360 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
361 av.aShiftR((int)b[i], vmask).intoArray(r, i);
362 }
363 }
364
365 bh.consume(r);
366 }
367
368
369
370 @Benchmark
371 public void shiftLShift(Blackhole bh) {
372 byte[] a = fa.apply(SPECIES.length());
373 byte[] b = fb.apply(SPECIES.length());
374 byte[] r = fr.apply(SPECIES.length());
375
376 for (int ic = 0; ic < INVOC_COUNT; ic++) {
377 for (int i = 0; i < a.length; i += SPECIES.length()) {
378 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
379 av.shiftL((int)b[i]).intoArray(r, i);
380 }
381 }
382
383 bh.consume(r);
384 }
385
386
387
388 @Benchmark
389 public void shiftLMaskedShift(Blackhole bh) {
390 byte[] a = fa.apply(SPECIES.length());
391 byte[] b = fb.apply(SPECIES.length());
392 byte[] r = fr.apply(SPECIES.length());
393 boolean[] mask = fm.apply(SPECIES.length());
394 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
395
396 for (int ic = 0; ic < INVOC_COUNT; ic++) {
397 for (int i = 0; i < a.length; i += SPECIES.length()) {
398 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
399 av.shiftL((int)b[i], vmask).intoArray(r, i);
400 }
401 }
402
403 bh.consume(r);
404 }
405
406
407
408 @Benchmark
409 public void shiftRShift(Blackhole bh) {
410 byte[] a = fa.apply(SPECIES.length());
411 byte[] b = fb.apply(SPECIES.length());
412 byte[] r = fr.apply(SPECIES.length());
413
414 for (int ic = 0; ic < INVOC_COUNT; ic++) {
415 for (int i = 0; i < a.length; i += SPECIES.length()) {
416 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
417 av.shiftR((int)b[i]).intoArray(r, i);
418 }
419 }
420
421 bh.consume(r);
422 }
423
424
425
426 @Benchmark
427 public void shiftRMaskedShift(Blackhole bh) {
428 byte[] a = fa.apply(SPECIES.length());
429 byte[] b = fb.apply(SPECIES.length());
430 byte[] r = fr.apply(SPECIES.length());
431 boolean[] mask = fm.apply(SPECIES.length());
432 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
433
434 for (int ic = 0; ic < INVOC_COUNT; ic++) {
435 for (int i = 0; i < a.length; i += SPECIES.length()) {
436 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
437 av.shiftR((int)b[i], vmask).intoArray(r, i);
438 }
439 }
440
441 bh.consume(r);
442 }
443
444
445
446
447
448
449
450
451 @Benchmark
452 public void max(Blackhole bh) {
584 byte ra = Byte.MIN_VALUE;
585
586 for (int ic = 0; ic < INVOC_COUNT; ic++) {
587 ra = Byte.MIN_VALUE;
588 for (int i = 0; i < a.length; i += SPECIES.length()) {
589 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
590 ra = (byte)Math.max(ra, av.maxAll());
591 }
592 }
593 bh.consume(ra);
594 }
595
596
597 @Benchmark
598 public void anyTrue(Blackhole bh) {
599 boolean[] mask = fm.apply(SPECIES.length());
600 boolean[] r = fmr.apply(SPECIES.length());
601
602 for (int ic = 0; ic < INVOC_COUNT; ic++) {
603 for (int i = 0; i < mask.length; i += SPECIES.length()) {
604 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i);
605 r[i] = vmask.anyTrue();
606 }
607 }
608
609 bh.consume(r);
610 }
611
612
613
614 @Benchmark
615 public void allTrue(Blackhole bh) {
616 boolean[] mask = fm.apply(SPECIES.length());
617 boolean[] r = fmr.apply(SPECIES.length());
618
619 for (int ic = 0; ic < INVOC_COUNT; ic++) {
620 for (int i = 0; i < mask.length; i += SPECIES.length()) {
621 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i);
622 r[i] = vmask.allTrue();
623 }
624 }
625
626 bh.consume(r);
627 }
628
629
630 @Benchmark
631 public void with(Blackhole bh) {
632 byte[] a = fa.apply(SPECIES.length());
633 byte[] r = fr.apply(SPECIES.length());
634
635 for (int ic = 0; ic < INVOC_COUNT; ic++) {
636 for (int i = 0; i < a.length; i += SPECIES.length()) {
637 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
638 av.with(0, (byte)4).intoArray(r, i);
639 }
640 }
641
642 bh.consume(r);
643 }
644
645 @Benchmark
646 public Object lessThan() {
647 byte[] a = fa.apply(size);
648 byte[] b = fb.apply(size);
649 boolean[] ms = fm.apply(size);
650 VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0);
651
652 for (int ic = 0; ic < INVOC_COUNT; ic++) {
653 for (int i = 0; i < a.length; i += SPECIES.length()) {
654 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
655 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
656 VectorMask<Byte> mv = av.lessThan(bv);
657
658 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
659 }
660 }
661 return m;
662 }
663
664
665 @Benchmark
666 public Object greaterThan() {
667 byte[] a = fa.apply(size);
668 byte[] b = fb.apply(size);
669 boolean[] ms = fm.apply(size);
670 VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0);
671
672 for (int ic = 0; ic < INVOC_COUNT; ic++) {
673 for (int i = 0; i < a.length; i += SPECIES.length()) {
674 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
675 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
676 VectorMask<Byte> mv = av.greaterThan(bv);
677
678 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
679 }
680 }
681 return m;
682 }
683
684
685 @Benchmark
686 public Object equal() {
687 byte[] a = fa.apply(size);
688 byte[] b = fb.apply(size);
689 boolean[] ms = fm.apply(size);
690 VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0);
691
692 for (int ic = 0; ic < INVOC_COUNT; ic++) {
693 for (int i = 0; i < a.length; i += SPECIES.length()) {
694 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
695 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
696 VectorMask<Byte> mv = av.equal(bv);
697
698 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
699 }
700 }
701 return m;
702 }
703
704
705 @Benchmark
706 public Object notEqual() {
707 byte[] a = fa.apply(size);
708 byte[] b = fb.apply(size);
709 boolean[] ms = fm.apply(size);
710 VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0);
711
712 for (int ic = 0; ic < INVOC_COUNT; ic++) {
713 for (int i = 0; i < a.length; i += SPECIES.length()) {
714 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
715 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
716 VectorMask<Byte> mv = av.notEqual(bv);
717
718 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
719 }
720 }
721 return m;
722 }
723
724
725 @Benchmark
726 public Object lessThanEq() {
727 byte[] a = fa.apply(size);
728 byte[] b = fb.apply(size);
729 boolean[] ms = fm.apply(size);
730 VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0);
731
732 for (int ic = 0; ic < INVOC_COUNT; ic++) {
733 for (int i = 0; i < a.length; i += SPECIES.length()) {
734 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
735 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
736 VectorMask<Byte> mv = av.lessThanEq(bv);
737
738 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
739 }
740 }
741 return m;
742 }
743
744
745 @Benchmark
746 public Object greaterThanEq() {
747 byte[] a = fa.apply(size);
748 byte[] b = fb.apply(size);
749 boolean[] ms = fm.apply(size);
750 VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0);
751
752 for (int ic = 0; ic < INVOC_COUNT; ic++) {
753 for (int i = 0; i < a.length; i += SPECIES.length()) {
754 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
755 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
756 VectorMask<Byte> mv = av.greaterThanEq(bv);
757
758 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
759 }
760 }
761 return m;
762 }
763
764
765 @Benchmark
766 public void blend(Blackhole bh) {
767 byte[] a = fa.apply(SPECIES.length());
768 byte[] b = fb.apply(SPECIES.length());
769 byte[] r = fr.apply(SPECIES.length());
770 boolean[] mask = fm.apply(SPECIES.length());
771 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
772
773 for (int ic = 0; ic < INVOC_COUNT; ic++) {
774 for (int i = 0; i < a.length; i += SPECIES.length()) {
775 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
776 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
777 av.blend(bv, vmask).intoArray(r, i);
778 }
779 }
780
781 bh.consume(r);
782 }
783
784 @Benchmark
785 public void rearrange(Blackhole bh) {
786 byte[] a = fa.apply(SPECIES.length());
787 int[] order = fs.apply(a.length, SPECIES.length());
788 byte[] r = fr.apply(SPECIES.length());
789
790 for (int ic = 0; ic < INVOC_COUNT; ic++) {
791 for (int i = 0; i < a.length; i += SPECIES.length()) {
792 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
793 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
794 }
795 }
796
797 bh.consume(r);
798 }
799
800 @Benchmark
801 public void extract(Blackhole bh) {
802 byte[] a = fa.apply(SPECIES.length());
803 byte[] r = fr.apply(SPECIES.length());
804
805 for (int ic = 0; ic < INVOC_COUNT; ic++) {
806 for (int i = 0; i < a.length; i += SPECIES.length()) {
807 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
808 int num_lanes = SPECIES.length();
809 // Manually unroll because full unroll happens after intrinsification.
810 // Unroll is needed because get intrinsic requires for index to be a known constant.
811 if (num_lanes == 1) {
812 r[i]=av.get(0);
813 } else if (num_lanes == 2) {
976 @Benchmark
977 public void neg(Blackhole bh) {
978 byte[] a = fa.apply(SPECIES.length());
979 byte[] r = fr.apply(SPECIES.length());
980
981 for (int ic = 0; ic < INVOC_COUNT; ic++) {
982 for (int i = 0; i < a.length; i += SPECIES.length()) {
983 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
984 av.neg().intoArray(r, i);
985 }
986 }
987
988 bh.consume(r);
989 }
990
991 @Benchmark
992 public void negMasked(Blackhole bh) {
993 byte[] a = fa.apply(SPECIES.length());
994 byte[] r = fr.apply(SPECIES.length());
995 boolean[] mask = fm.apply(SPECIES.length());
996 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
997
998 for (int ic = 0; ic < INVOC_COUNT; ic++) {
999 for (int i = 0; i < a.length; i += SPECIES.length()) {
1000 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1001 av.neg(vmask).intoArray(r, i);
1002 }
1003 }
1004
1005 bh.consume(r);
1006 }
1007
1008 @Benchmark
1009 public void abs(Blackhole bh) {
1010 byte[] a = fa.apply(SPECIES.length());
1011 byte[] r = fr.apply(SPECIES.length());
1012
1013 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1014 for (int i = 0; i < a.length; i += SPECIES.length()) {
1015 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1016 av.abs().intoArray(r, i);
1017 }
1018 }
1019
1020 bh.consume(r);
1021 }
1022
1023 @Benchmark
1024 public void absMasked(Blackhole bh) {
1025 byte[] a = fa.apply(SPECIES.length());
1026 byte[] r = fr.apply(SPECIES.length());
1027 boolean[] mask = fm.apply(SPECIES.length());
1028 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
1029
1030 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1031 for (int i = 0; i < a.length; i += SPECIES.length()) {
1032 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1033 av.abs(vmask).intoArray(r, i);
1034 }
1035 }
1036
1037 bh.consume(r);
1038 }
1039
1040
1041 @Benchmark
1042 public void not(Blackhole bh) {
1043 byte[] a = fa.apply(SPECIES.length());
1044 byte[] r = fr.apply(SPECIES.length());
1045
1046 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1047 for (int i = 0; i < a.length; i += SPECIES.length()) {
1048 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1049 av.not().intoArray(r, i);
1050 }
1051 }
1052
1053 bh.consume(r);
1054 }
1055
1056
1057
1058 @Benchmark
1059 public void notMasked(Blackhole bh) {
1060 byte[] a = fa.apply(SPECIES.length());
1061 byte[] r = fr.apply(SPECIES.length());
1062 boolean[] mask = fm.apply(SPECIES.length());
1063 VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
1064
1065 for (int ic = 0; ic < INVOC_COUNT; ic++) {
1066 for (int i = 0; i < a.length; i += SPECIES.length()) {
1067 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1068 av.not(vmask).intoArray(r, i);
1069 }
1070 }
1071
1072 bh.consume(r);
1073 }
1074
1075
1076
1077
1078
1079 }
1080
|