1 /*
2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have
21 * questions.
22 */
23
24 package benchmark.jdk.incubator.vector;
25
26 import java.util.concurrent.TimeUnit;
27 import java.util.function.IntFunction;
28
29 import org.openjdk.jmh.annotations.*;
30
31 @BenchmarkMode(Mode.Throughput)
32 @OutputTimeUnit(TimeUnit.MILLISECONDS)
33 @State(Scope.Benchmark)
34 @Warmup(iterations = 3, time = 1)
35 @Measurement(iterations = 5, time = 1)
36 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
37 public class ByteScalar extends AbstractVectorBenchmark {
38 @Param("1024")
39 int size;
40
41 byte[] fill(IntFunction<Byte> f) {
42 byte[] array = new byte[size];
43 for (int i = 0; i < array.length; i++) {
44 array[i] = f.apply(i);
45 }
46 return array;
47 }
48
49 byte[] as, bs, cs, rs;
50 boolean[] ms, rms;
51 int[] ss;
52
53 @Setup
54 public void init() {
55 as = fill(i -> (byte)(2*i));
56 bs = fill(i -> (byte)(i+1));
57 cs = fill(i -> (byte)(i+5));
58 rs = fill(i -> (byte)0);
59 ms = fillMask(size, i -> (i % 2) == 0);
60 rms = fillMask(size, i -> false);
61
62 ss = fillInt(size, i -> RANDOM.nextInt(Math.max(i,1)));
63 }
64
65 final IntFunction<byte[]> fa = vl -> as;
66 final IntFunction<byte[]> fb = vl -> bs;
67 final IntFunction<byte[]> fc = vl -> cs;
68 final IntFunction<byte[]> fr = vl -> rs;
69 final IntFunction<boolean[]> fm = vl -> ms;
70 final IntFunction<boolean[]> fmr = vl -> rms;
71 final IntFunction<int[]> fs = vl -> ss;
72
73
74 @Benchmark
75 public Object add() {
76 byte[] as = fa.apply(size);
77 byte[] bs = fb.apply(size);
78 byte[] rs = fr.apply(size);
79
80 for (int i = 0; i < as.length; i++) {
81 byte a = as[i];
82 byte b = bs[i];
83 rs[i] = (byte)(a + b);
84 }
85
86 return rs;
87 }
88
89 @Benchmark
90 public Object addMasked() {
91 byte[] as = fa.apply(size);
92 byte[] bs = fb.apply(size);
93 byte[] rs = fr.apply(size);
94 boolean[] ms = fm.apply(size);
95
96 for (int i = 0; i < as.length; i++) {
97 byte a = as[i];
98 byte b = bs[i];
99 if (ms[i % ms.length]) {
100 rs[i] = (byte)(a + b);
101 } else {
102 rs[i] = a;
103 }
104 }
105 return rs;
106 }
107
108 @Benchmark
109 public Object sub() {
110 byte[] as = fa.apply(size);
111 byte[] bs = fb.apply(size);
112 byte[] rs = fr.apply(size);
113
114 for (int i = 0; i < as.length; i++) {
115 byte a = as[i];
116 byte b = bs[i];
117 rs[i] = (byte)(a - b);
118 }
119
120 return rs;
121 }
122
123 @Benchmark
124 public Object subMasked() {
125 byte[] as = fa.apply(size);
126 byte[] bs = fb.apply(size);
127 byte[] rs = fr.apply(size);
128 boolean[] ms = fm.apply(size);
129
130 for (int i = 0; i < as.length; i++) {
131 byte a = as[i];
132 byte b = bs[i];
133 if (ms[i % ms.length]) {
134 rs[i] = (byte)(a - b);
135 } else {
136 rs[i] = a;
137 }
138 }
139 return rs;
140 }
141
142
143
144 @Benchmark
145 public Object mul() {
146 byte[] as = fa.apply(size);
147 byte[] bs = fb.apply(size);
148 byte[] rs = fr.apply(size);
149
150 for (int i = 0; i < as.length; i++) {
151 byte a = as[i];
152 byte b = bs[i];
153 rs[i] = (byte)(a * b);
154 }
155
156 return rs;
157 }
158
159 @Benchmark
160 public Object mulMasked() {
161 byte[] as = fa.apply(size);
162 byte[] bs = fb.apply(size);
163 byte[] rs = fr.apply(size);
164 boolean[] ms = fm.apply(size);
165
166 for (int i = 0; i < as.length; i++) {
167 byte a = as[i];
168 byte b = bs[i];
169 if (ms[i % ms.length]) {
170 rs[i] = (byte)(a * b);
171 } else {
172 rs[i] = a;
173 }
174 }
175 return rs;
176 }
177
178
179 @Benchmark
180 public Object and() {
181 byte[] as = fa.apply(size);
182 byte[] bs = fb.apply(size);
183 byte[] rs = fr.apply(size);
184
185 for (int i = 0; i < as.length; i++) {
186 byte a = as[i];
187 byte b = bs[i];
188 rs[i] = (byte)(a & b);
189 }
190
191 return rs;
192 }
193
194
195
196 @Benchmark
197 public Object andMasked() {
198 byte[] as = fa.apply(size);
199 byte[] bs = fb.apply(size);
200 byte[] rs = fr.apply(size);
201 boolean[] ms = fm.apply(size);
202
203 for (int i = 0; i < as.length; i++) {
204 byte a = as[i];
205 byte b = bs[i];
206 if (ms[i % ms.length]) {
207 rs[i] = (byte)(a & b);
208 } else {
209 rs[i] = a;
210 }
211 }
212 return rs;
213 }
214
215
216
217 @Benchmark
218 public Object or() {
219 byte[] as = fa.apply(size);
220 byte[] bs = fb.apply(size);
221 byte[] rs = fr.apply(size);
222
223 for (int i = 0; i < as.length; i++) {
224 byte a = as[i];
225 byte b = bs[i];
226 rs[i] = (byte)(a | b);
227 }
228
229 return rs;
230 }
231
232
233
234 @Benchmark
235 public Object orMasked() {
236 byte[] as = fa.apply(size);
237 byte[] bs = fb.apply(size);
238 byte[] rs = fr.apply(size);
239 boolean[] ms = fm.apply(size);
240
241 for (int i = 0; i < as.length; i++) {
242 byte a = as[i];
243 byte b = bs[i];
244 if (ms[i % ms.length]) {
245 rs[i] = (byte)(a | b);
246 } else {
247 rs[i] = a;
248 }
249 }
250 return rs;
251 }
252
253
254
255 @Benchmark
256 public Object xor() {
257 byte[] as = fa.apply(size);
258 byte[] bs = fb.apply(size);
259 byte[] rs = fr.apply(size);
260
261 for (int i = 0; i < as.length; i++) {
262 byte a = as[i];
263 byte b = bs[i];
264 rs[i] = (byte)(a ^ b);
265 }
266
267 return rs;
268 }
269
270
271
272 @Benchmark
273 public Object xorMasked() {
274 byte[] as = fa.apply(size);
275 byte[] bs = fb.apply(size);
276 byte[] rs = fr.apply(size);
277 boolean[] ms = fm.apply(size);
278
279 for (int i = 0; i < as.length; i++) {
280 byte a = as[i];
281 byte b = bs[i];
282 if (ms[i % ms.length]) {
283 rs[i] = (byte)(a ^ b);
284 } else {
285 rs[i] = a;
286 }
287 }
288 return rs;
289 }
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305 @Benchmark
306 public Object aShiftRShift() {
307 byte[] as = fa.apply(size);
308 byte[] bs = fb.apply(size);
309 byte[] rs = fr.apply(size);
310
311 for (int i = 0; i < as.length; i++) {
312 byte a = as[i];
313 byte b = bs[i];
314 rs[i] = (byte)((a >> (b & 7)));
315 }
316
317 return rs;
318 }
319
320
321
322 @Benchmark
323 public Object aShiftRMaskedShift() {
324 byte[] as = fa.apply(size);
325 byte[] bs = fb.apply(size);
326 byte[] rs = fr.apply(size);
327 boolean[] ms = fm.apply(size);
328
329 for (int i = 0; i < as.length; i++) {
330 byte a = as[i];
331 byte b = bs[i];
332 boolean m = ms[i % ms.length];
333 rs[i] = (m ? (byte)((a >> (b & 7))) : a);
334 }
335
336 return rs;
337 }
338
339
340
341 @Benchmark
342 public Object shiftLShift() {
343 byte[] as = fa.apply(size);
344 byte[] bs = fb.apply(size);
345 byte[] rs = fr.apply(size);
346
347 for (int i = 0; i < as.length; i++) {
348 byte a = as[i];
349 byte b = bs[i];
350 rs[i] = (byte)((a << (b & 7)));
351 }
352
353 return rs;
354 }
355
356
357
358 @Benchmark
359 public Object shiftLMaskedShift() {
360 byte[] as = fa.apply(size);
361 byte[] bs = fb.apply(size);
362 byte[] rs = fr.apply(size);
363 boolean[] ms = fm.apply(size);
364
365 for (int i = 0; i < as.length; i++) {
366 byte a = as[i];
367 byte b = bs[i];
368 boolean m = ms[i % ms.length];
369 rs[i] = (m ? (byte)((a << (b & 7))) : a);
370 }
371
372 return rs;
373 }
374
375
376
377 @Benchmark
378 public Object shiftRShift() {
379 byte[] as = fa.apply(size);
380 byte[] bs = fb.apply(size);
381 byte[] rs = fr.apply(size);
382
383 for (int i = 0; i < as.length; i++) {
384 byte a = as[i];
385 byte b = bs[i];
386 rs[i] = (byte)(((a & 0xFF) >>> (b & 7)));
387 }
388
389 return rs;
390 }
391
392
393
394 @Benchmark
395 public Object shiftRMaskedShift() {
396 byte[] as = fa.apply(size);
397 byte[] bs = fb.apply(size);
398 byte[] rs = fr.apply(size);
399 boolean[] ms = fm.apply(size);
400
401 for (int i = 0; i < as.length; i++) {
402 byte a = as[i];
403 byte b = bs[i];
404 boolean m = ms[i % ms.length];
405 rs[i] = (m ? (byte)(((a & 0xFF) >>> (b & 7))) : a);
406 }
407
408 return rs;
409 }
410
411
412
413
414
415
416
417
418 @Benchmark
419 public Object max() {
420 byte[] as = fa.apply(size);
421 byte[] bs = fb.apply(size);
422 byte[] rs = fr.apply(size);
423
424 for (int i = 0; i < as.length; i++) {
425 byte a = as[i];
426 byte b = bs[i];
427 rs[i] = (byte)(Math.max(a, b));
428 }
429
430 return rs;
431 }
432
433 @Benchmark
434 public Object min() {
435 byte[] as = fa.apply(size);
436 byte[] bs = fb.apply(size);
437 byte[] rs = fr.apply(size);
438
439 for (int i = 0; i < as.length; i++) {
440 byte a = as[i];
441 byte b = bs[i];
442 rs[i] = (byte)(Math.min(a, b));
443 }
444
445 return rs;
446 }
447
448
449 @Benchmark
450 public byte andAll() {
451 byte[] as = fa.apply(size);
452 byte r = -1;
453 for (int i = 0; i < as.length; i++) {
454 r &= as[i];
455 }
456 return r;
457 }
458
459
460
461 @Benchmark
462 public byte orAll() {
463 byte[] as = fa.apply(size);
464 byte r = 0;
465 for (int i = 0; i < as.length; i++) {
466 r |= as[i];
467 }
468 return r;
469 }
470
471
472
473 @Benchmark
474 public byte xorAll() {
475 byte[] as = fa.apply(size);
476 byte r = 0;
477 for (int i = 0; i < as.length; i++) {
478 r ^= as[i];
479 }
480 return r;
481 }
482
483
484 @Benchmark
485 public byte addAll() {
486 byte[] as = fa.apply(size);
487 byte r = 0;
488 for (int i = 0; i < as.length; i++) {
489 r += as[i];
490 }
491 return r;
492 }
493
494 @Benchmark
495 public byte mulAll() {
496 byte[] as = fa.apply(size);
497 byte r = 1;
498 for (int i = 0; i < as.length; i++) {
499 r *= as[i];
500 }
501 return r;
502 }
503
504 @Benchmark
505 public byte minAll() {
506 byte[] as = fa.apply(size);
507 byte r = Byte.MAX_VALUE;
508 for (int i = 0; i < as.length; i++) {
509 r = (byte)Math.min(r, as[i]);
510 }
511 return r;
512 }
513
514 @Benchmark
515 public byte maxAll() {
516 byte[] as = fa.apply(size);
517 byte r = Byte.MIN_VALUE;
518 for (int i = 0; i < as.length; i++) {
519 r = (byte)Math.max(r, as[i]);
520 }
521 return r;
522 }
523
524
525 @Benchmark
526 public boolean anyTrue() {
527 boolean[] ms = fm.apply(size);
528 boolean r = false;
529 for (int i = 0; i < ms.length; i++) {
530 r |= ms[i];
531 }
532 return r;
533 }
534
535
536
537 @Benchmark
538 public boolean allTrue() {
539 boolean[] ms = fm.apply(size);
540 boolean r = true;
541 for (int i = 0; i < ms.length; i++) {
542 r &= ms[i];
543 }
544 return r;
545 }
546
547
548 @Benchmark
549 public boolean lessThan() {
550 byte[] as = fa.apply(size);
551 byte[] bs = fb.apply(size);
552
553 boolean r = false;
554 for (int i = 0; i < as.length; i++) {
555 boolean m = (as[i] < bs[i]);
556 r |= m; // accumulate so JIT can't eliminate the computation
557 }
558
559 return r;
560 }
561
562 @Benchmark
563 public boolean greaterThan() {
564 byte[] as = fa.apply(size);
565 byte[] bs = fb.apply(size);
566
567 boolean r = false;
568 for (int i = 0; i < as.length; i++) {
569 boolean m = (as[i] > bs[i]);
570 r |= m; // accumulate so JIT can't eliminate the computation
571 }
572
573 return r;
574 }
575
576 @Benchmark
577 public boolean equal() {
578 byte[] as = fa.apply(size);
579 byte[] bs = fb.apply(size);
580
581 boolean r = false;
582 for (int i = 0; i < as.length; i++) {
583 boolean m = (as[i] == bs[i]);
584 r |= m; // accumulate so JIT can't eliminate the computation
585 }
586
587 return r;
588 }
589
590 @Benchmark
591 public boolean notEqual() {
592 byte[] as = fa.apply(size);
593 byte[] bs = fb.apply(size);
594
595 boolean r = false;
596 for (int i = 0; i < as.length; i++) {
597 boolean m = (as[i] != bs[i]);
598 r |= m; // accumulate so JIT can't eliminate the computation
599 }
600
601 return r;
602 }
603
604 @Benchmark
605 public boolean lessThanEq() {
606 byte[] as = fa.apply(size);
607 byte[] bs = fb.apply(size);
608
609 boolean r = false;
610 for (int i = 0; i < as.length; i++) {
611 boolean m = (as[i] <= bs[i]);
612 r |= m; // accumulate so JIT can't eliminate the computation
613 }
614
615 return r;
616 }
617
618 @Benchmark
619 public boolean greaterThanEq() {
620 byte[] as = fa.apply(size);
621 byte[] bs = fb.apply(size);
622
623 boolean r = false;
624 for (int i = 0; i < as.length; i++) {
625 boolean m = (as[i] >= bs[i]);
626 r |= m; // accumulate so JIT can't eliminate the computation
627 }
628
629 return r;
630 }
631
632 @Benchmark
633 public Object blend() {
634 byte[] as = fa.apply(size);
635 byte[] bs = fb.apply(size);
636 byte[] rs = fr.apply(size);
637 boolean[] ms = fm.apply(size);
638
639 for (int i = 0; i < as.length; i++) {
640 byte a = as[i];
641 byte b = bs[i];
642 boolean m = ms[i % ms.length];
643 rs[i] = (m ? b : a);
644 }
645
646 return rs;
647 }
648 Object rearrangeShared(int window) {
649 byte[] as = fa.apply(size);
650 int[] order = fs.apply(size);
651 byte[] rs = fr.apply(size);
652
653 for (int i = 0; i < as.length; i += window) {
654 for (int j = 0; j < window; j++) {
655 byte a = as[i+j];
656 int pos = order[j];
657 rs[i + pos] = a;
658 }
659 }
660
661 return rs;
662 }
663
664 @Benchmark
665 public Object rearrange064() {
666 int window = 64 / Byte.SIZE;
667 return rearrangeShared(window);
668 }
669
670 @Benchmark
671 public Object rearrange128() {
672 int window = 128 / Byte.SIZE;
673 return rearrangeShared(window);
674 }
675
676 @Benchmark
677 public Object rearrange256() {
678 int window = 256 / Byte.SIZE;
679 return rearrangeShared(window);
680 }
681
682 @Benchmark
683 public Object rearrange512() {
684 int window = 512 / Byte.SIZE;
685 return rearrangeShared(window);
686 }
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708 @Benchmark
709 public Object neg() {
710 byte[] as = fa.apply(size);
711 byte[] rs = fr.apply(size);
712
713 for (int i = 0; i < as.length; i++) {
714 byte a = as[i];
715 rs[i] = (byte)(-((byte)a));
716 }
717
718 return rs;
719 }
720
721 @Benchmark
722 public Object negMasked() {
723 byte[] as = fa.apply(size);
724 byte[] rs = fr.apply(size);
725 boolean[] ms = fm.apply(size);
726
727 for (int i = 0; i < as.length; i++) {
728 byte a = as[i];
729 boolean m = ms[i % ms.length];
730 rs[i] = (m ? (byte)(-((byte)a)) : a);
731 }
732
733 return rs;
734 }
735
736 @Benchmark
737 public Object abs() {
738 byte[] as = fa.apply(size);
739 byte[] rs = fr.apply(size);
740
741 for (int i = 0; i < as.length; i++) {
742 byte a = as[i];
743 rs[i] = (byte)(Math.abs((byte)a));
744 }
745
746 return rs;
747 }
748
749 @Benchmark
750 public Object absMasked() {
751 byte[] as = fa.apply(size);
752 byte[] rs = fr.apply(size);
753 boolean[] ms = fm.apply(size);
754
755 for (int i = 0; i < as.length; i++) {
756 byte a = as[i];
757 boolean m = ms[i % ms.length];
758 rs[i] = (m ? (byte)(Math.abs((byte)a)) : a);
759 }
760
761 return rs;
762 }
763
764
765 @Benchmark
766 public Object not() {
767 byte[] as = fa.apply(size);
768 byte[] rs = fr.apply(size);
769
770 for (int i = 0; i < as.length; i++) {
771 byte a = as[i];
772 rs[i] = (byte)(~((byte)a));
773 }
774
775 return rs;
776 }
777
778
779
780 @Benchmark
781 public Object notMasked() {
782 byte[] as = fa.apply(size);
783 byte[] rs = fr.apply(size);
784 boolean[] ms = fm.apply(size);
785
786 for (int i = 0; i < as.length; i++) {
787 byte a = as[i];
788 boolean m = ms[i % ms.length];
789 rs[i] = (m ? (byte)(~((byte)a)) : a);
790 }
791
792 return rs;
793 }
794
795
796
797
798
799 }
800
|
1 /*
2 * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have
21 * questions.
22 */
23
24 package benchmark.jdk.incubator.vector;
25
26 import java.util.concurrent.TimeUnit;
27 import java.util.function.IntFunction;
28
29 import org.openjdk.jmh.annotations.*;
30 import org.openjdk.jmh.infra.Blackhole;
31
32 @BenchmarkMode(Mode.Throughput)
33 @OutputTimeUnit(TimeUnit.MILLISECONDS)
34 @State(Scope.Benchmark)
35 @Warmup(iterations = 3, time = 1)
36 @Measurement(iterations = 5, time = 1)
37 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
38 public class ByteScalar extends AbstractVectorBenchmark {
39 static final int INVOC_COUNT = 1; // To align with vector benchmarks.
40
41 @Param("1024")
42 int size;
43
44 byte[] fill(IntFunction<Byte> f) {
45 byte[] array = new byte[size];
46 for (int i = 0; i < array.length; i++) {
47 array[i] = f.apply(i);
48 }
49 return array;
50 }
51
52 byte[] as, bs, cs, rs;
53 boolean[] ms, rms;
54 int[] ss;
55
56 @Setup
57 public void init() {
58 as = fill(i -> (byte)(2*i));
59 bs = fill(i -> (byte)(i+1));
60 cs = fill(i -> (byte)(i+5));
61 rs = fill(i -> (byte)0);
62 ms = fillMask(size, i -> (i % 2) == 0);
63 rms = fillMask(size, i -> false);
64
65 ss = fillInt(size, i -> RANDOM.nextInt(Math.max(i,1)));
66 }
67
68 final IntFunction<byte[]> fa = vl -> as;
69 final IntFunction<byte[]> fb = vl -> bs;
70 final IntFunction<byte[]> fc = vl -> cs;
71 final IntFunction<byte[]> fr = vl -> rs;
72 final IntFunction<boolean[]> fm = vl -> ms;
73 final IntFunction<boolean[]> fmr = vl -> rms;
74 final IntFunction<int[]> fs = vl -> ss;
75
76
77 @Benchmark
78 public void add(Blackhole bh) {
79 byte[] as = fa.apply(size);
80 byte[] bs = fb.apply(size);
81 byte[] rs = fr.apply(size);
82
83 for (int ic = 0; ic < INVOC_COUNT; ic++) {
84 for (int i = 0; i < as.length; i++) {
85 byte a = as[i];
86 byte b = bs[i];
87 rs[i] = (byte)(a + b);
88 }
89 }
90
91 bh.consume(rs);
92 }
93
94 @Benchmark
95 public void addMasked(Blackhole bh) {
96 byte[] as = fa.apply(size);
97 byte[] bs = fb.apply(size);
98 byte[] rs = fr.apply(size);
99 boolean[] ms = fm.apply(size);
100
101 for (int ic = 0; ic < INVOC_COUNT; ic++) {
102 for (int i = 0; i < as.length; i++) {
103 byte a = as[i];
104 byte b = bs[i];
105 if (ms[i % ms.length]) {
106 rs[i] = (byte)(a + b);
107 } else {
108 rs[i] = a;
109 }
110 }
111 }
112 bh.consume(rs);
113 }
114
115 @Benchmark
116 public void sub(Blackhole bh) {
117 byte[] as = fa.apply(size);
118 byte[] bs = fb.apply(size);
119 byte[] rs = fr.apply(size);
120
121 for (int ic = 0; ic < INVOC_COUNT; ic++) {
122 for (int i = 0; i < as.length; i++) {
123 byte a = as[i];
124 byte b = bs[i];
125 rs[i] = (byte)(a - b);
126 }
127 }
128
129 bh.consume(rs);
130 }
131
132 @Benchmark
133 public void subMasked(Blackhole bh) {
134 byte[] as = fa.apply(size);
135 byte[] bs = fb.apply(size);
136 byte[] rs = fr.apply(size);
137 boolean[] ms = fm.apply(size);
138
139 for (int ic = 0; ic < INVOC_COUNT; ic++) {
140 for (int i = 0; i < as.length; i++) {
141 byte a = as[i];
142 byte b = bs[i];
143 if (ms[i % ms.length]) {
144 rs[i] = (byte)(a - b);
145 } else {
146 rs[i] = a;
147 }
148 }
149 }
150 bh.consume(rs);
151 }
152
153
154
155 @Benchmark
156 public void mul(Blackhole bh) {
157 byte[] as = fa.apply(size);
158 byte[] bs = fb.apply(size);
159 byte[] rs = fr.apply(size);
160
161 for (int ic = 0; ic < INVOC_COUNT; ic++) {
162 for (int i = 0; i < as.length; i++) {
163 byte a = as[i];
164 byte b = bs[i];
165 rs[i] = (byte)(a * b);
166 }
167 }
168
169 bh.consume(rs);
170 }
171
172 @Benchmark
173 public void mulMasked(Blackhole bh) {
174 byte[] as = fa.apply(size);
175 byte[] bs = fb.apply(size);
176 byte[] rs = fr.apply(size);
177 boolean[] ms = fm.apply(size);
178
179 for (int ic = 0; ic < INVOC_COUNT; ic++) {
180 for (int i = 0; i < as.length; i++) {
181 byte a = as[i];
182 byte b = bs[i];
183 if (ms[i % ms.length]) {
184 rs[i] = (byte)(a * b);
185 } else {
186 rs[i] = a;
187 }
188 }
189 }
190 bh.consume(rs);
191 }
192
193
194 @Benchmark
195 public void and(Blackhole bh) {
196 byte[] as = fa.apply(size);
197 byte[] bs = fb.apply(size);
198 byte[] rs = fr.apply(size);
199
200 for (int ic = 0; ic < INVOC_COUNT; ic++) {
201 for (int i = 0; i < as.length; i++) {
202 byte a = as[i];
203 byte b = bs[i];
204 rs[i] = (byte)(a & b);
205 }
206 }
207
208 bh.consume(rs);
209 }
210
211
212
213 @Benchmark
214 public void andMasked(Blackhole bh) {
215 byte[] as = fa.apply(size);
216 byte[] bs = fb.apply(size);
217 byte[] rs = fr.apply(size);
218 boolean[] ms = fm.apply(size);
219
220 for (int ic = 0; ic < INVOC_COUNT; ic++) {
221 for (int i = 0; i < as.length; i++) {
222 byte a = as[i];
223 byte b = bs[i];
224 if (ms[i % ms.length]) {
225 rs[i] = (byte)(a & b);
226 } else {
227 rs[i] = a;
228 }
229 }
230 }
231 bh.consume(rs);
232 }
233
234
235
236 @Benchmark
237 public void or(Blackhole bh) {
238 byte[] as = fa.apply(size);
239 byte[] bs = fb.apply(size);
240 byte[] rs = fr.apply(size);
241
242 for (int ic = 0; ic < INVOC_COUNT; ic++) {
243 for (int i = 0; i < as.length; i++) {
244 byte a = as[i];
245 byte b = bs[i];
246 rs[i] = (byte)(a | b);
247 }
248 }
249
250 bh.consume(rs);
251 }
252
253
254
255 @Benchmark
256 public void orMasked(Blackhole bh) {
257 byte[] as = fa.apply(size);
258 byte[] bs = fb.apply(size);
259 byte[] rs = fr.apply(size);
260 boolean[] ms = fm.apply(size);
261
262 for (int ic = 0; ic < INVOC_COUNT; ic++) {
263 for (int i = 0; i < as.length; i++) {
264 byte a = as[i];
265 byte b = bs[i];
266 if (ms[i % ms.length]) {
267 rs[i] = (byte)(a | b);
268 } else {
269 rs[i] = a;
270 }
271 }
272 }
273 bh.consume(rs);
274 }
275
276
277
278 @Benchmark
279 public void xor(Blackhole bh) {
280 byte[] as = fa.apply(size);
281 byte[] bs = fb.apply(size);
282 byte[] rs = fr.apply(size);
283
284 for (int ic = 0; ic < INVOC_COUNT; ic++) {
285 for (int i = 0; i < as.length; i++) {
286 byte a = as[i];
287 byte b = bs[i];
288 rs[i] = (byte)(a ^ b);
289 }
290 }
291
292 bh.consume(rs);
293 }
294
295
296
297 @Benchmark
298 public void xorMasked(Blackhole bh) {
299 byte[] as = fa.apply(size);
300 byte[] bs = fb.apply(size);
301 byte[] rs = fr.apply(size);
302 boolean[] ms = fm.apply(size);
303
304 for (int ic = 0; ic < INVOC_COUNT; ic++) {
305 for (int i = 0; i < as.length; i++) {
306 byte a = as[i];
307 byte b = bs[i];
308 if (ms[i % ms.length]) {
309 rs[i] = (byte)(a ^ b);
310 } else {
311 rs[i] = a;
312 }
313 }
314 }
315 bh.consume(rs);
316 }
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332 @Benchmark
333 public void aShiftRShift(Blackhole bh) {
334 byte[] as = fa.apply(size);
335 byte[] bs = fb.apply(size);
336 byte[] rs = fr.apply(size);
337
338 for (int ic = 0; ic < INVOC_COUNT; ic++) {
339 for (int i = 0; i < as.length; i++) {
340 byte a = as[i];
341 byte b = bs[i];
342 rs[i] = (byte)((a >> (b & 7)));
343 }
344 }
345
346 bh.consume(rs);
347 }
348
349
350
351 @Benchmark
352 public void aShiftRMaskedShift(Blackhole bh) {
353 byte[] as = fa.apply(size);
354 byte[] bs = fb.apply(size);
355 byte[] rs = fr.apply(size);
356 boolean[] ms = fm.apply(size);
357
358 for (int ic = 0; ic < INVOC_COUNT; ic++) {
359 for (int i = 0; i < as.length; i++) {
360 byte a = as[i];
361 byte b = bs[i];
362 boolean m = ms[i % ms.length];
363 rs[i] = (m ? (byte)((a >> (b & 7))) : a);
364 }
365 }
366
367 bh.consume(rs);
368 }
369
370
371
372 @Benchmark
373 public void shiftLShift(Blackhole bh) {
374 byte[] as = fa.apply(size);
375 byte[] bs = fb.apply(size);
376 byte[] rs = fr.apply(size);
377
378 for (int ic = 0; ic < INVOC_COUNT; ic++) {
379 for (int i = 0; i < as.length; i++) {
380 byte a = as[i];
381 byte b = bs[i];
382 rs[i] = (byte)((a << (b & 7)));
383 }
384 }
385
386 bh.consume(rs);
387 }
388
389
390
391 @Benchmark
392 public void shiftLMaskedShift(Blackhole bh) {
393 byte[] as = fa.apply(size);
394 byte[] bs = fb.apply(size);
395 byte[] rs = fr.apply(size);
396 boolean[] ms = fm.apply(size);
397
398 for (int ic = 0; ic < INVOC_COUNT; ic++) {
399 for (int i = 0; i < as.length; i++) {
400 byte a = as[i];
401 byte b = bs[i];
402 boolean m = ms[i % ms.length];
403 rs[i] = (m ? (byte)((a << (b & 7))) : a);
404 }
405 }
406
407 bh.consume(rs);
408 }
409
410
411
412 @Benchmark
413 public void shiftRShift(Blackhole bh) {
414 byte[] as = fa.apply(size);
415 byte[] bs = fb.apply(size);
416 byte[] rs = fr.apply(size);
417
418 for (int ic = 0; ic < INVOC_COUNT; ic++) {
419 for (int i = 0; i < as.length; i++) {
420 byte a = as[i];
421 byte b = bs[i];
422 rs[i] = (byte)(((a & 0xFF) >>> (b & 7)));
423 }
424 }
425
426 bh.consume(rs);
427 }
428
429
430
431 @Benchmark
432 public void shiftRMaskedShift(Blackhole bh) {
433 byte[] as = fa.apply(size);
434 byte[] bs = fb.apply(size);
435 byte[] rs = fr.apply(size);
436 boolean[] ms = fm.apply(size);
437
438 for (int ic = 0; ic < INVOC_COUNT; ic++) {
439 for (int i = 0; i < as.length; i++) {
440 byte a = as[i];
441 byte b = bs[i];
442 boolean m = ms[i % ms.length];
443 rs[i] = (m ? (byte)(((a & 0xFF) >>> (b & 7))) : a);
444 }
445 }
446
447 bh.consume(rs);
448 }
449
450
451
452
453
454
455
456
457 @Benchmark
458 public void max(Blackhole bh) {
459 byte[] as = fa.apply(size);
460 byte[] bs = fb.apply(size);
461 byte[] rs = fr.apply(size);
462
463 for (int ic = 0; ic < INVOC_COUNT; ic++) {
464 for (int i = 0; i < as.length; i++) {
465 byte a = as[i];
466 byte b = bs[i];
467 rs[i] = (byte)(Math.max(a, b));
468 }
469 }
470
471 bh.consume(rs);
472 }
473
474 @Benchmark
475 public void min(Blackhole bh) {
476 byte[] as = fa.apply(size);
477 byte[] bs = fb.apply(size);
478 byte[] rs = fr.apply(size);
479
480 for (int ic = 0; ic < INVOC_COUNT; ic++) {
481 for (int i = 0; i < as.length; i++) {
482 byte a = as[i];
483 byte b = bs[i];
484 rs[i] = (byte)(Math.min(a, b));
485 }
486 }
487
488 bh.consume(rs);
489 }
490
491
492 @Benchmark
493 public void andAll(Blackhole bh) {
494 byte[] as = fa.apply(size);
495 byte r = -1;
496 for (int ic = 0; ic < INVOC_COUNT; ic++) {
497 r = -1;
498 for (int i = 0; i < as.length; i++) {
499 r &= as[i];
500 }
501 }
502 bh.consume(r);
503 }
504
505
506
507 @Benchmark
508 public void orAll(Blackhole bh) {
509 byte[] as = fa.apply(size);
510 byte r = 0;
511 for (int ic = 0; ic < INVOC_COUNT; ic++) {
512 r = 0;
513 for (int i = 0; i < as.length; i++) {
514 r |= as[i];
515 }
516 }
517 bh.consume(r);
518 }
519
520
521
522 @Benchmark
523 public void xorAll(Blackhole bh) {
524 byte[] as = fa.apply(size);
525 byte r = 0;
526 for (int ic = 0; ic < INVOC_COUNT; ic++) {
527 r = 0;
528 for (int i = 0; i < as.length; i++) {
529 r ^= as[i];
530 }
531 }
532 bh.consume(r);
533 }
534
535
536 @Benchmark
537 public void addAll(Blackhole bh) {
538 byte[] as = fa.apply(size);
539 byte r = 0;
540 for (int ic = 0; ic < INVOC_COUNT; ic++) {
541 r = 0;
542 for (int i = 0; i < as.length; i++) {
543 r += as[i];
544 }
545 }
546 bh.consume(r);
547 }
548
549 @Benchmark
550 public void mulAll(Blackhole bh) {
551 byte[] as = fa.apply(size);
552 byte r = 1;
553 for (int ic = 0; ic < INVOC_COUNT; ic++) {
554 r = 1;
555 for (int i = 0; i < as.length; i++) {
556 r *= as[i];
557 }
558 }
559 bh.consume(r);
560 }
561
562 @Benchmark
563 public void minAll(Blackhole bh) {
564 byte[] as = fa.apply(size);
565 byte r = Byte.MAX_VALUE;
566 for (int ic = 0; ic < INVOC_COUNT; ic++) {
567 r = Byte.MAX_VALUE;
568 for (int i = 0; i < as.length; i++) {
569 r = (byte)Math.min(r, as[i]);
570 }
571 }
572 bh.consume(r);
573 }
574
575 @Benchmark
576 public void maxAll(Blackhole bh) {
577 byte[] as = fa.apply(size);
578 byte r = Byte.MIN_VALUE;
579 for (int ic = 0; ic < INVOC_COUNT; ic++) {
580 r = Byte.MIN_VALUE;
581 for (int i = 0; i < as.length; i++) {
582 r = (byte)Math.max(r, as[i]);
583 }
584 }
585 bh.consume(r);
586 }
587
588
589 @Benchmark
590 public void anyTrue(Blackhole bh) {
591 boolean[] ms = fm.apply(size);
592 boolean r = false;
593 for (int ic = 0; ic < INVOC_COUNT; ic++) {
594 r = false;
595 for (int i = 0; i < ms.length; i++) {
596 r |= ms[i];
597 }
598 }
599 bh.consume(r);
600 }
601
602
603
604 @Benchmark
605 public void allTrue(Blackhole bh) {
606 boolean[] ms = fm.apply(size);
607 boolean r = true;
608 for (int ic = 0; ic < INVOC_COUNT; ic++) {
609 r = true;
610 for (int i = 0; i < ms.length; i++) {
611 r &= ms[i];
612 }
613 }
614 bh.consume(r);
615 }
616
617
618 @Benchmark
619 public void lessThan(Blackhole bh) {
620 byte[] as = fa.apply(size);
621 byte[] bs = fb.apply(size);
622
623 boolean r = false;
624 for (int ic = 0; ic < INVOC_COUNT; ic++) {
625 r = false;
626 for (int i = 0; i < as.length; i++) {
627 boolean m = (as[i] < bs[i]);
628 r |= m; // accumulate so JIT can't eliminate the computation
629 }
630 }
631
632 bh.consume(r);
633 }
634
635 @Benchmark
636 public void greaterThan(Blackhole bh) {
637 byte[] as = fa.apply(size);
638 byte[] bs = fb.apply(size);
639
640 boolean r = false;
641 for (int ic = 0; ic < INVOC_COUNT; ic++) {
642 r = false;
643 for (int i = 0; i < as.length; i++) {
644 boolean m = (as[i] > bs[i]);
645 r |= m; // accumulate so JIT can't eliminate the computation
646 }
647 }
648
649 bh.consume(r);
650 }
651
652 @Benchmark
653 public void equal(Blackhole bh) {
654 byte[] as = fa.apply(size);
655 byte[] bs = fb.apply(size);
656
657 boolean r = false;
658 for (int ic = 0; ic < INVOC_COUNT; ic++) {
659 r = false;
660 for (int i = 0; i < as.length; i++) {
661 boolean m = (as[i] == bs[i]);
662 r |= m; // accumulate so JIT can't eliminate the computation
663 }
664 }
665
666 bh.consume(r);
667 }
668
669 @Benchmark
670 public void notEqual(Blackhole bh) {
671 byte[] as = fa.apply(size);
672 byte[] bs = fb.apply(size);
673
674 boolean r = false;
675 for (int ic = 0; ic < INVOC_COUNT; ic++) {
676 r = false;
677 for (int i = 0; i < as.length; i++) {
678 boolean m = (as[i] != bs[i]);
679 r |= m; // accumulate so JIT can't eliminate the computation
680 }
681 }
682
683 bh.consume(r);
684 }
685
686 @Benchmark
687 public void lessThanEq(Blackhole bh) {
688 byte[] as = fa.apply(size);
689 byte[] bs = fb.apply(size);
690
691 boolean r = false;
692 for (int ic = 0; ic < INVOC_COUNT; ic++) {
693 r = false;
694 for (int i = 0; i < as.length; i++) {
695 boolean m = (as[i] <= bs[i]);
696 r |= m; // accumulate so JIT can't eliminate the computation
697 }
698 }
699
700 bh.consume(r);
701 }
702
703 @Benchmark
704 public void greaterThanEq(Blackhole bh) {
705 byte[] as = fa.apply(size);
706 byte[] bs = fb.apply(size);
707
708 boolean r = false;
709 for (int ic = 0; ic < INVOC_COUNT; ic++) {
710 r = false;
711 for (int i = 0; i < as.length; i++) {
712 boolean m = (as[i] >= bs[i]);
713 r |= m; // accumulate so JIT can't eliminate the computation
714 }
715 }
716
717 bh.consume(r);
718 }
719
720 @Benchmark
721 public void blend(Blackhole bh) {
722 byte[] as = fa.apply(size);
723 byte[] bs = fb.apply(size);
724 byte[] rs = fr.apply(size);
725 boolean[] ms = fm.apply(size);
726
727 for (int ic = 0; ic < INVOC_COUNT; ic++) {
728 for (int i = 0; i < as.length; i++) {
729 byte a = as[i];
730 byte b = bs[i];
731 boolean m = ms[i % ms.length];
732 rs[i] = (m ? b : a);
733 }
734 }
735
736 bh.consume(rs);
737 }
738 void rearrangeShared(int window, Blackhole bh) {
739 byte[] as = fa.apply(size);
740 int[] order = fs.apply(size);
741 byte[] rs = fr.apply(size);
742
743 for (int ic = 0; ic < INVOC_COUNT; ic++) {
744 for (int i = 0; i < as.length; i += window) {
745 for (int j = 0; j < window; j++) {
746 byte a = as[i+j];
747 int pos = order[j];
748 rs[i + pos] = a;
749 }
750 }
751 }
752
753 bh.consume(rs);
754 }
755
756 @Benchmark
757 public void rearrange064(Blackhole bh) {
758 int window = 64 / Byte.SIZE;
759 rearrangeShared(window, bh);
760 }
761
762 @Benchmark
763 public void rearrange128(Blackhole bh) {
764 int window = 128 / Byte.SIZE;
765 rearrangeShared(window, bh);
766 }
767
768 @Benchmark
769 public void rearrange256(Blackhole bh) {
770 int window = 256 / Byte.SIZE;
771 rearrangeShared(window, bh);
772 }
773
774 @Benchmark
775 public void rearrange512(Blackhole bh) {
776 int window = 512 / Byte.SIZE;
777 rearrangeShared(window, bh);
778 }
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800 @Benchmark
801 public void neg(Blackhole bh) {
802 byte[] as = fa.apply(size);
803 byte[] rs = fr.apply(size);
804
805 for (int ic = 0; ic < INVOC_COUNT; ic++) {
806 for (int i = 0; i < as.length; i++) {
807 byte a = as[i];
808 rs[i] = (byte)(-((byte)a));
809 }
810 }
811
812 bh.consume(rs);
813 }
814
815 @Benchmark
816 public void negMasked(Blackhole bh) {
817 byte[] as = fa.apply(size);
818 byte[] rs = fr.apply(size);
819 boolean[] ms = fm.apply(size);
820
821 for (int ic = 0; ic < INVOC_COUNT; ic++) {
822 for (int i = 0; i < as.length; i++) {
823 byte a = as[i];
824 boolean m = ms[i % ms.length];
825 rs[i] = (m ? (byte)(-((byte)a)) : a);
826 }
827 }
828
829 bh.consume(rs);
830 }
831
832 @Benchmark
833 public void abs(Blackhole bh) {
834 byte[] as = fa.apply(size);
835 byte[] rs = fr.apply(size);
836
837 for (int ic = 0; ic < INVOC_COUNT; ic++) {
838 for (int i = 0; i < as.length; i++) {
839 byte a = as[i];
840 rs[i] = (byte)(Math.abs((byte)a));
841 }
842 }
843
844 bh.consume(rs);
845 }
846
847 @Benchmark
848 public void absMasked(Blackhole bh) {
849 byte[] as = fa.apply(size);
850 byte[] rs = fr.apply(size);
851 boolean[] ms = fm.apply(size);
852
853 for (int ic = 0; ic < INVOC_COUNT; ic++) {
854 for (int i = 0; i < as.length; i++) {
855 byte a = as[i];
856 boolean m = ms[i % ms.length];
857 rs[i] = (m ? (byte)(Math.abs((byte)a)) : a);
858 }
859 }
860
861 bh.consume(rs);
862 }
863
864
865 @Benchmark
866 public void not(Blackhole bh) {
867 byte[] as = fa.apply(size);
868 byte[] rs = fr.apply(size);
869
870 for (int ic = 0; ic < INVOC_COUNT; ic++) {
871 for (int i = 0; i < as.length; i++) {
872 byte a = as[i];
873 rs[i] = (byte)(~((byte)a));
874 }
875 }
876
877 bh.consume(rs);
878 }
879
880
881
882 @Benchmark
883 public void notMasked(Blackhole bh) {
884 byte[] as = fa.apply(size);
885 byte[] rs = fr.apply(size);
886 boolean[] ms = fm.apply(size);
887
888 for (int ic = 0; ic < INVOC_COUNT; ic++) {
889 for (int i = 0; i < as.length; i++) {
890 byte a = as[i];
891 boolean m = ms[i % ms.length];
892 rs[i] = (m ? (byte)(~((byte)a)) : a);
893 }
894 }
895
896 bh.consume(rs);
897 }
898
899
900
901
902
903 }
904
|