1 /* 2 * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have 21 * questions. 22 */ 23 24 package benchmark.jdk.incubator.vector; 25 26 import java.util.concurrent.TimeUnit; 27 import java.util.function.IntFunction; 28 29 import org.openjdk.jmh.annotations.*; 30 import org.openjdk.jmh.infra.Blackhole; 31 32 @BenchmarkMode(Mode.Throughput) 33 @OutputTimeUnit(TimeUnit.MILLISECONDS) 34 @State(Scope.Benchmark) 35 @Warmup(iterations = 3, time = 1) 36 @Measurement(iterations = 5, time = 1) 37 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) 38 public class ByteScalar extends AbstractVectorBenchmark { 39 static final int INVOC_COUNT = 1; // To align with vector benchmarks. 40 41 @Param("1024") 42 int size; 43 44 byte[] fill(IntFunction<Byte> f) { 45 byte[] array = new byte[size]; 46 for (int i = 0; i < array.length; i++) { 47 array[i] = f.apply(i); 48 } 49 return array; 50 } 51 52 byte[] as, bs, cs, rs; 53 boolean[] ms, rms; 54 int[] ss; 55 56 @Setup 57 public void init() { 58 as = fill(i -> (byte)(2*i)); 59 bs = fill(i -> (byte)(i+1)); 60 cs = fill(i -> (byte)(i+5)); 61 rs = fill(i -> (byte)0); 62 ms = fillMask(size, i -> (i % 2) == 0); 63 rms = fillMask(size, i -> false); 64 65 ss = fillInt(size, i -> RANDOM.nextInt(Math.max(i,1))); 66 } 67 68 final IntFunction<byte[]> fa = vl -> as; 69 final IntFunction<byte[]> fb = vl -> bs; 70 final IntFunction<byte[]> fc = vl -> cs; 71 final IntFunction<byte[]> fr = vl -> rs; 72 final IntFunction<boolean[]> fm = vl -> ms; 73 final IntFunction<boolean[]> fmr = vl -> rms; 74 final IntFunction<int[]> fs = vl -> ss; 75 76 77 @Benchmark 78 public void add(Blackhole bh) { 79 byte[] as = fa.apply(size); 80 byte[] bs = fb.apply(size); 81 byte[] rs = fr.apply(size); 82 83 for (int ic = 0; ic < INVOC_COUNT; ic++) { 84 for (int i = 0; i < as.length; i++) { 85 byte a = as[i]; 86 byte b = bs[i]; 87 rs[i] = (byte)(a + b); 88 } 89 } 90 91 bh.consume(rs); 92 } 93 94 @Benchmark 95 public void addMasked(Blackhole bh) { 96 byte[] as = fa.apply(size); 97 byte[] bs = fb.apply(size); 98 byte[] rs = fr.apply(size); 99 boolean[] ms = fm.apply(size); 100 101 for (int ic = 0; ic < INVOC_COUNT; ic++) { 102 for (int i = 0; i < as.length; i++) { 103 byte a = as[i]; 104 byte b = bs[i]; 105 if (ms[i % ms.length]) { 106 rs[i] = (byte)(a + b); 107 } else { 108 rs[i] = a; 109 } 110 } 111 } 112 bh.consume(rs); 113 } 114 115 @Benchmark 116 public void sub(Blackhole bh) { 117 byte[] as = fa.apply(size); 118 byte[] bs = fb.apply(size); 119 byte[] rs = fr.apply(size); 120 121 for (int ic = 0; ic < INVOC_COUNT; ic++) { 122 for (int i = 0; i < as.length; i++) { 123 byte a = as[i]; 124 byte b = bs[i]; 125 rs[i] = (byte)(a - b); 126 } 127 } 128 129 bh.consume(rs); 130 } 131 132 @Benchmark 133 public void subMasked(Blackhole bh) { 134 byte[] as = fa.apply(size); 135 byte[] bs = fb.apply(size); 136 byte[] rs = fr.apply(size); 137 boolean[] ms = fm.apply(size); 138 139 for (int ic = 0; ic < INVOC_COUNT; ic++) { 140 for (int i = 0; i < as.length; i++) { 141 byte a = as[i]; 142 byte b = bs[i]; 143 if (ms[i % ms.length]) { 144 rs[i] = (byte)(a - b); 145 } else { 146 rs[i] = a; 147 } 148 } 149 } 150 bh.consume(rs); 151 } 152 153 154 155 @Benchmark 156 public void mul(Blackhole bh) { 157 byte[] as = fa.apply(size); 158 byte[] bs = fb.apply(size); 159 byte[] rs = fr.apply(size); 160 161 for (int ic = 0; ic < INVOC_COUNT; ic++) { 162 for (int i = 0; i < as.length; i++) { 163 byte a = as[i]; 164 byte b = bs[i]; 165 rs[i] = (byte)(a * b); 166 } 167 } 168 169 bh.consume(rs); 170 } 171 172 @Benchmark 173 public void mulMasked(Blackhole bh) { 174 byte[] as = fa.apply(size); 175 byte[] bs = fb.apply(size); 176 byte[] rs = fr.apply(size); 177 boolean[] ms = fm.apply(size); 178 179 for (int ic = 0; ic < INVOC_COUNT; ic++) { 180 for (int i = 0; i < as.length; i++) { 181 byte a = as[i]; 182 byte b = bs[i]; 183 if (ms[i % ms.length]) { 184 rs[i] = (byte)(a * b); 185 } else { 186 rs[i] = a; 187 } 188 } 189 } 190 bh.consume(rs); 191 } 192 193 194 @Benchmark 195 public void and(Blackhole bh) { 196 byte[] as = fa.apply(size); 197 byte[] bs = fb.apply(size); 198 byte[] rs = fr.apply(size); 199 200 for (int ic = 0; ic < INVOC_COUNT; ic++) { 201 for (int i = 0; i < as.length; i++) { 202 byte a = as[i]; 203 byte b = bs[i]; 204 rs[i] = (byte)(a & b); 205 } 206 } 207 208 bh.consume(rs); 209 } 210 211 212 213 @Benchmark 214 public void andMasked(Blackhole bh) { 215 byte[] as = fa.apply(size); 216 byte[] bs = fb.apply(size); 217 byte[] rs = fr.apply(size); 218 boolean[] ms = fm.apply(size); 219 220 for (int ic = 0; ic < INVOC_COUNT; ic++) { 221 for (int i = 0; i < as.length; i++) { 222 byte a = as[i]; 223 byte b = bs[i]; 224 if (ms[i % ms.length]) { 225 rs[i] = (byte)(a & b); 226 } else { 227 rs[i] = a; 228 } 229 } 230 } 231 bh.consume(rs); 232 } 233 234 235 236 @Benchmark 237 public void or(Blackhole bh) { 238 byte[] as = fa.apply(size); 239 byte[] bs = fb.apply(size); 240 byte[] rs = fr.apply(size); 241 242 for (int ic = 0; ic < INVOC_COUNT; ic++) { 243 for (int i = 0; i < as.length; i++) { 244 byte a = as[i]; 245 byte b = bs[i]; 246 rs[i] = (byte)(a | b); 247 } 248 } 249 250 bh.consume(rs); 251 } 252 253 254 255 @Benchmark 256 public void orMasked(Blackhole bh) { 257 byte[] as = fa.apply(size); 258 byte[] bs = fb.apply(size); 259 byte[] rs = fr.apply(size); 260 boolean[] ms = fm.apply(size); 261 262 for (int ic = 0; ic < INVOC_COUNT; ic++) { 263 for (int i = 0; i < as.length; i++) { 264 byte a = as[i]; 265 byte b = bs[i]; 266 if (ms[i % ms.length]) { 267 rs[i] = (byte)(a | b); 268 } else { 269 rs[i] = a; 270 } 271 } 272 } 273 bh.consume(rs); 274 } 275 276 277 278 @Benchmark 279 public void xor(Blackhole bh) { 280 byte[] as = fa.apply(size); 281 byte[] bs = fb.apply(size); 282 byte[] rs = fr.apply(size); 283 284 for (int ic = 0; ic < INVOC_COUNT; ic++) { 285 for (int i = 0; i < as.length; i++) { 286 byte a = as[i]; 287 byte b = bs[i]; 288 rs[i] = (byte)(a ^ b); 289 } 290 } 291 292 bh.consume(rs); 293 } 294 295 296 297 @Benchmark 298 public void xorMasked(Blackhole bh) { 299 byte[] as = fa.apply(size); 300 byte[] bs = fb.apply(size); 301 byte[] rs = fr.apply(size); 302 boolean[] ms = fm.apply(size); 303 304 for (int ic = 0; ic < INVOC_COUNT; ic++) { 305 for (int i = 0; i < as.length; i++) { 306 byte a = as[i]; 307 byte b = bs[i]; 308 if (ms[i % ms.length]) { 309 rs[i] = (byte)(a ^ b); 310 } else { 311 rs[i] = a; 312 } 313 } 314 } 315 bh.consume(rs); 316 } 317 318 319 320 321 322 @Benchmark 323 public void shiftLeft(Blackhole bh) { 324 byte[] as = fa.apply(size); 325 byte[] bs = fb.apply(size); 326 byte[] rs = fr.apply(size); 327 328 for (int ic = 0; ic < INVOC_COUNT; ic++) { 329 for (int i = 0; i < as.length; i++) { 330 byte a = as[i]; 331 byte b = bs[i]; 332 rs[i] = (byte)((a << (b & 0x7))); 333 } 334 } 335 336 bh.consume(rs); 337 } 338 339 340 341 @Benchmark 342 public void shiftLeftMasked(Blackhole bh) { 343 byte[] as = fa.apply(size); 344 byte[] bs = fb.apply(size); 345 byte[] rs = fr.apply(size); 346 boolean[] ms = fm.apply(size); 347 348 for (int ic = 0; ic < INVOC_COUNT; ic++) { 349 for (int i = 0; i < as.length; i++) { 350 byte a = as[i]; 351 byte b = bs[i]; 352 if (ms[i % ms.length]) { 353 rs[i] = (byte)((a << (b & 0x7))); 354 } else { 355 rs[i] = a; 356 } 357 } 358 } 359 bh.consume(rs); 360 } 361 362 363 364 365 366 367 368 @Benchmark 369 public void shiftRight(Blackhole bh) { 370 byte[] as = fa.apply(size); 371 byte[] bs = fb.apply(size); 372 byte[] rs = fr.apply(size); 373 374 for (int ic = 0; ic < INVOC_COUNT; ic++) { 375 for (int i = 0; i < as.length; i++) { 376 byte a = as[i]; 377 byte b = bs[i]; 378 rs[i] = (byte)((a >>> (b & 0x7))); 379 } 380 } 381 382 bh.consume(rs); 383 } 384 385 386 387 @Benchmark 388 public void shiftRightMasked(Blackhole bh) { 389 byte[] as = fa.apply(size); 390 byte[] bs = fb.apply(size); 391 byte[] rs = fr.apply(size); 392 boolean[] ms = fm.apply(size); 393 394 for (int ic = 0; ic < INVOC_COUNT; ic++) { 395 for (int i = 0; i < as.length; i++) { 396 byte a = as[i]; 397 byte b = bs[i]; 398 if (ms[i % ms.length]) { 399 rs[i] = (byte)((a >>> (b & 0x7))); 400 } else { 401 rs[i] = a; 402 } 403 } 404 } 405 bh.consume(rs); 406 } 407 408 409 410 411 412 413 414 @Benchmark 415 public void shiftArithmeticRight(Blackhole bh) { 416 byte[] as = fa.apply(size); 417 byte[] bs = fb.apply(size); 418 byte[] rs = fr.apply(size); 419 420 for (int ic = 0; ic < INVOC_COUNT; ic++) { 421 for (int i = 0; i < as.length; i++) { 422 byte a = as[i]; 423 byte b = bs[i]; 424 rs[i] = (byte)((a >> (b & 0x7))); 425 } 426 } 427 428 bh.consume(rs); 429 } 430 431 432 433 @Benchmark 434 public void shiftArithmeticRightMasked(Blackhole bh) { 435 byte[] as = fa.apply(size); 436 byte[] bs = fb.apply(size); 437 byte[] rs = fr.apply(size); 438 boolean[] ms = fm.apply(size); 439 440 for (int ic = 0; ic < INVOC_COUNT; ic++) { 441 for (int i = 0; i < as.length; i++) { 442 byte a = as[i]; 443 byte b = bs[i]; 444 if (ms[i % ms.length]) { 445 rs[i] = (byte)((a >> (b & 0x7))); 446 } else { 447 rs[i] = a; 448 } 449 } 450 } 451 bh.consume(rs); 452 } 453 454 455 456 457 458 459 460 @Benchmark 461 public void shiftLeftShift(Blackhole bh) { 462 byte[] as = fa.apply(size); 463 byte[] bs = fb.apply(size); 464 byte[] rs = fr.apply(size); 465 466 for (int ic = 0; ic < INVOC_COUNT; ic++) { 467 for (int i = 0; i < as.length; i++) { 468 byte a = as[i]; 469 byte b = bs[i]; 470 rs[i] = (byte)((a << (b & 7))); 471 } 472 } 473 474 bh.consume(rs); 475 } 476 477 478 479 @Benchmark 480 public void shiftLeftMaskedShift(Blackhole bh) { 481 byte[] as = fa.apply(size); 482 byte[] bs = fb.apply(size); 483 byte[] rs = fr.apply(size); 484 boolean[] ms = fm.apply(size); 485 486 for (int ic = 0; ic < INVOC_COUNT; ic++) { 487 for (int i = 0; i < as.length; i++) { 488 byte a = as[i]; 489 byte b = bs[i]; 490 boolean m = ms[i % ms.length]; 491 rs[i] = (m ? (byte)((a << (b & 7))) : a); 492 } 493 } 494 495 bh.consume(rs); 496 } 497 498 499 500 501 502 503 504 @Benchmark 505 public void shiftRightShift(Blackhole bh) { 506 byte[] as = fa.apply(size); 507 byte[] bs = fb.apply(size); 508 byte[] rs = fr.apply(size); 509 510 for (int ic = 0; ic < INVOC_COUNT; ic++) { 511 for (int i = 0; i < as.length; i++) { 512 byte a = as[i]; 513 byte b = bs[i]; 514 rs[i] = (byte)(((a & 0xFF) >>> (b & 7))); 515 } 516 } 517 518 bh.consume(rs); 519 } 520 521 522 523 @Benchmark 524 public void shiftRightMaskedShift(Blackhole bh) { 525 byte[] as = fa.apply(size); 526 byte[] bs = fb.apply(size); 527 byte[] rs = fr.apply(size); 528 boolean[] ms = fm.apply(size); 529 530 for (int ic = 0; ic < INVOC_COUNT; ic++) { 531 for (int i = 0; i < as.length; i++) { 532 byte a = as[i]; 533 byte b = bs[i]; 534 boolean m = ms[i % ms.length]; 535 rs[i] = (m ? (byte)(((a & 0xFF) >>> (b & 7))) : a); 536 } 537 } 538 539 bh.consume(rs); 540 } 541 542 543 544 545 546 547 548 @Benchmark 549 public void shiftArithmeticRightShift(Blackhole bh) { 550 byte[] as = fa.apply(size); 551 byte[] bs = fb.apply(size); 552 byte[] rs = fr.apply(size); 553 554 for (int ic = 0; ic < INVOC_COUNT; ic++) { 555 for (int i = 0; i < as.length; i++) { 556 byte a = as[i]; 557 byte b = bs[i]; 558 rs[i] = (byte)((a >> (b & 7))); 559 } 560 } 561 562 bh.consume(rs); 563 } 564 565 566 567 @Benchmark 568 public void shiftArithmeticRightMaskedShift(Blackhole bh) { 569 byte[] as = fa.apply(size); 570 byte[] bs = fb.apply(size); 571 byte[] rs = fr.apply(size); 572 boolean[] ms = fm.apply(size); 573 574 for (int ic = 0; ic < INVOC_COUNT; ic++) { 575 for (int i = 0; i < as.length; i++) { 576 byte a = as[i]; 577 byte b = bs[i]; 578 boolean m = ms[i % ms.length]; 579 rs[i] = (m ? (byte)((a >> (b & 7))) : a); 580 } 581 } 582 583 bh.consume(rs); 584 } 585 586 587 588 589 @Benchmark 590 public void max(Blackhole bh) { 591 byte[] as = fa.apply(size); 592 byte[] bs = fb.apply(size); 593 byte[] rs = fr.apply(size); 594 595 for (int ic = 0; ic < INVOC_COUNT; ic++) { 596 for (int i = 0; i < as.length; i++) { 597 byte a = as[i]; 598 byte b = bs[i]; 599 rs[i] = (byte)(Math.max(a, b)); 600 } 601 } 602 603 bh.consume(rs); 604 } 605 606 @Benchmark 607 public void min(Blackhole bh) { 608 byte[] as = fa.apply(size); 609 byte[] bs = fb.apply(size); 610 byte[] rs = fr.apply(size); 611 612 for (int ic = 0; ic < INVOC_COUNT; ic++) { 613 for (int i = 0; i < as.length; i++) { 614 byte a = as[i]; 615 byte b = bs[i]; 616 rs[i] = (byte)(Math.min(a, b)); 617 } 618 } 619 620 bh.consume(rs); 621 } 622 623 624 @Benchmark 625 public void andLanes(Blackhole bh) { 626 byte[] as = fa.apply(size); 627 byte r = -1; 628 for (int ic = 0; ic < INVOC_COUNT; ic++) { 629 r = -1; 630 for (int i = 0; i < as.length; i++) { 631 r &= as[i]; 632 } 633 } 634 bh.consume(r); 635 } 636 637 638 639 @Benchmark 640 public void orLanes(Blackhole bh) { 641 byte[] as = fa.apply(size); 642 byte r = 0; 643 for (int ic = 0; ic < INVOC_COUNT; ic++) { 644 r = 0; 645 for (int i = 0; i < as.length; i++) { 646 r |= as[i]; 647 } 648 } 649 bh.consume(r); 650 } 651 652 653 654 @Benchmark 655 public void xorLanes(Blackhole bh) { 656 byte[] as = fa.apply(size); 657 byte r = 0; 658 for (int ic = 0; ic < INVOC_COUNT; ic++) { 659 r = 0; 660 for (int i = 0; i < as.length; i++) { 661 r ^= as[i]; 662 } 663 } 664 bh.consume(r); 665 } 666 667 668 @Benchmark 669 public void addLanes(Blackhole bh) { 670 byte[] as = fa.apply(size); 671 byte r = 0; 672 for (int ic = 0; ic < INVOC_COUNT; ic++) { 673 r = 0; 674 for (int i = 0; i < as.length; i++) { 675 r += as[i]; 676 } 677 } 678 bh.consume(r); 679 } 680 681 @Benchmark 682 public void mulLanes(Blackhole bh) { 683 byte[] as = fa.apply(size); 684 byte r = 1; 685 for (int ic = 0; ic < INVOC_COUNT; ic++) { 686 r = 1; 687 for (int i = 0; i < as.length; i++) { 688 r *= as[i]; 689 } 690 } 691 bh.consume(r); 692 } 693 694 @Benchmark 695 public void minLanes(Blackhole bh) { 696 byte[] as = fa.apply(size); 697 byte r = Byte.MAX_VALUE; 698 for (int ic = 0; ic < INVOC_COUNT; ic++) { 699 r = Byte.MAX_VALUE; 700 for (int i = 0; i < as.length; i++) { 701 r = (byte)Math.min(r, as[i]); 702 } 703 } 704 bh.consume(r); 705 } 706 707 @Benchmark 708 public void maxLanes(Blackhole bh) { 709 byte[] as = fa.apply(size); 710 byte r = Byte.MIN_VALUE; 711 for (int ic = 0; ic < INVOC_COUNT; ic++) { 712 r = Byte.MIN_VALUE; 713 for (int i = 0; i < as.length; i++) { 714 r = (byte)Math.max(r, as[i]); 715 } 716 } 717 bh.consume(r); 718 } 719 720 721 @Benchmark 722 public void anyTrue(Blackhole bh) { 723 boolean[] ms = fm.apply(size); 724 boolean r = false; 725 for (int ic = 0; ic < INVOC_COUNT; ic++) { 726 r = false; 727 for (int i = 0; i < ms.length; i++) { 728 r |= ms[i]; 729 } 730 } 731 bh.consume(r); 732 } 733 734 735 736 @Benchmark 737 public void allTrue(Blackhole bh) { 738 boolean[] ms = fm.apply(size); 739 boolean r = true; 740 for (int ic = 0; ic < INVOC_COUNT; ic++) { 741 r = true; 742 for (int i = 0; i < ms.length; i++) { 743 r &= ms[i]; 744 } 745 } 746 bh.consume(r); 747 } 748 749 750 @Benchmark 751 public void lessThan(Blackhole bh) { 752 byte[] as = fa.apply(size); 753 byte[] bs = fb.apply(size); 754 755 boolean r = false; 756 for (int ic = 0; ic < INVOC_COUNT; ic++) { 757 r = false; 758 for (int i = 0; i < as.length; i++) { 759 boolean m = (as[i] < bs[i]); 760 r |= m; // accumulate so JIT can't eliminate the computation 761 } 762 } 763 764 bh.consume(r); 765 } 766 767 @Benchmark 768 public void greaterThan(Blackhole bh) { 769 byte[] as = fa.apply(size); 770 byte[] bs = fb.apply(size); 771 772 boolean r = false; 773 for (int ic = 0; ic < INVOC_COUNT; ic++) { 774 r = false; 775 for (int i = 0; i < as.length; i++) { 776 boolean m = (as[i] > bs[i]); 777 r |= m; // accumulate so JIT can't eliminate the computation 778 } 779 } 780 781 bh.consume(r); 782 } 783 784 @Benchmark 785 public void equal(Blackhole bh) { 786 byte[] as = fa.apply(size); 787 byte[] bs = fb.apply(size); 788 789 boolean r = false; 790 for (int ic = 0; ic < INVOC_COUNT; ic++) { 791 r = false; 792 for (int i = 0; i < as.length; i++) { 793 boolean m = (as[i] == bs[i]); 794 r |= m; // accumulate so JIT can't eliminate the computation 795 } 796 } 797 798 bh.consume(r); 799 } 800 801 @Benchmark 802 public void notEqual(Blackhole bh) { 803 byte[] as = fa.apply(size); 804 byte[] bs = fb.apply(size); 805 806 boolean r = false; 807 for (int ic = 0; ic < INVOC_COUNT; ic++) { 808 r = false; 809 for (int i = 0; i < as.length; i++) { 810 boolean m = (as[i] != bs[i]); 811 r |= m; // accumulate so JIT can't eliminate the computation 812 } 813 } 814 815 bh.consume(r); 816 } 817 818 @Benchmark 819 public void lessThanEq(Blackhole bh) { 820 byte[] as = fa.apply(size); 821 byte[] bs = fb.apply(size); 822 823 boolean r = false; 824 for (int ic = 0; ic < INVOC_COUNT; ic++) { 825 r = false; 826 for (int i = 0; i < as.length; i++) { 827 boolean m = (as[i] <= bs[i]); 828 r |= m; // accumulate so JIT can't eliminate the computation 829 } 830 } 831 832 bh.consume(r); 833 } 834 835 @Benchmark 836 public void greaterThanEq(Blackhole bh) { 837 byte[] as = fa.apply(size); 838 byte[] bs = fb.apply(size); 839 840 boolean r = false; 841 for (int ic = 0; ic < INVOC_COUNT; ic++) { 842 r = false; 843 for (int i = 0; i < as.length; i++) { 844 boolean m = (as[i] >= bs[i]); 845 r |= m; // accumulate so JIT can't eliminate the computation 846 } 847 } 848 849 bh.consume(r); 850 } 851 852 @Benchmark 853 public void blend(Blackhole bh) { 854 byte[] as = fa.apply(size); 855 byte[] bs = fb.apply(size); 856 byte[] rs = fr.apply(size); 857 boolean[] ms = fm.apply(size); 858 859 for (int ic = 0; ic < INVOC_COUNT; ic++) { 860 for (int i = 0; i < as.length; i++) { 861 byte a = as[i]; 862 byte b = bs[i]; 863 boolean m = ms[i % ms.length]; 864 rs[i] = (m ? b : a); 865 } 866 } 867 868 bh.consume(rs); 869 } 870 void rearrangeShared(int window, Blackhole bh) { 871 byte[] as = fa.apply(size); 872 int[] order = fs.apply(size); 873 byte[] rs = fr.apply(size); 874 875 for (int ic = 0; ic < INVOC_COUNT; ic++) { 876 for (int i = 0; i < as.length; i += window) { 877 for (int j = 0; j < window; j++) { 878 byte a = as[i+j]; 879 int pos = order[j]; 880 rs[i + pos] = a; 881 } 882 } 883 } 884 885 bh.consume(rs); 886 } 887 888 @Benchmark 889 public void rearrange064(Blackhole bh) { 890 int window = 64 / Byte.SIZE; 891 rearrangeShared(window, bh); 892 } 893 894 @Benchmark 895 public void rearrange128(Blackhole bh) { 896 int window = 128 / Byte.SIZE; 897 rearrangeShared(window, bh); 898 } 899 900 @Benchmark 901 public void rearrange256(Blackhole bh) { 902 int window = 256 / Byte.SIZE; 903 rearrangeShared(window, bh); 904 } 905 906 @Benchmark 907 public void rearrange512(Blackhole bh) { 908 int window = 512 / Byte.SIZE; 909 rearrangeShared(window, bh); 910 } 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 @Benchmark 933 public void neg(Blackhole bh) { 934 byte[] as = fa.apply(size); 935 byte[] rs = fr.apply(size); 936 937 for (int ic = 0; ic < INVOC_COUNT; ic++) { 938 for (int i = 0; i < as.length; i++) { 939 byte a = as[i]; 940 rs[i] = (byte)(-((byte)a)); 941 } 942 } 943 944 bh.consume(rs); 945 } 946 947 @Benchmark 948 public void negMasked(Blackhole bh) { 949 byte[] as = fa.apply(size); 950 byte[] rs = fr.apply(size); 951 boolean[] ms = fm.apply(size); 952 953 for (int ic = 0; ic < INVOC_COUNT; ic++) { 954 for (int i = 0; i < as.length; i++) { 955 byte a = as[i]; 956 boolean m = ms[i % ms.length]; 957 rs[i] = (m ? (byte)(-((byte)a)) : a); 958 } 959 } 960 961 bh.consume(rs); 962 } 963 964 @Benchmark 965 public void abs(Blackhole bh) { 966 byte[] as = fa.apply(size); 967 byte[] rs = fr.apply(size); 968 969 for (int ic = 0; ic < INVOC_COUNT; ic++) { 970 for (int i = 0; i < as.length; i++) { 971 byte a = as[i]; 972 rs[i] = (byte)(Math.abs((byte)a)); 973 } 974 } 975 976 bh.consume(rs); 977 } 978 979 @Benchmark 980 public void absMasked(Blackhole bh) { 981 byte[] as = fa.apply(size); 982 byte[] rs = fr.apply(size); 983 boolean[] ms = fm.apply(size); 984 985 for (int ic = 0; ic < INVOC_COUNT; ic++) { 986 for (int i = 0; i < as.length; i++) { 987 byte a = as[i]; 988 boolean m = ms[i % ms.length]; 989 rs[i] = (m ? (byte)(Math.abs((byte)a)) : a); 990 } 991 } 992 993 bh.consume(rs); 994 } 995 996 997 @Benchmark 998 public void not(Blackhole bh) { 999 byte[] as = fa.apply(size); 1000 byte[] rs = fr.apply(size); 1001 1002 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1003 for (int i = 0; i < as.length; i++) { 1004 byte a = as[i]; 1005 rs[i] = (byte)(~((byte)a)); 1006 } 1007 } 1008 1009 bh.consume(rs); 1010 } 1011 1012 1013 1014 @Benchmark 1015 public void notMasked(Blackhole bh) { 1016 byte[] as = fa.apply(size); 1017 byte[] rs = fr.apply(size); 1018 boolean[] ms = fm.apply(size); 1019 1020 for (int ic = 0; ic < INVOC_COUNT; ic++) { 1021 for (int i = 0; i < as.length; i++) { 1022 byte a = as[i]; 1023 boolean m = ms[i % ms.length]; 1024 rs[i] = (m ? (byte)(~((byte)a)) : a); 1025 } 1026 } 1027 1028 bh.consume(rs); 1029 } 1030 1031 1032 1033 1034 1035 } 1036