1 /* 2 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have 21 * questions. 22 */ 23 24 package benchmark.jdk.incubator.vector; 25 26 import java.util.concurrent.TimeUnit; 27 import java.util.function.IntFunction; 28 29 import org.openjdk.jmh.annotations.*; 30 31 @BenchmarkMode(Mode.Throughput) 32 @OutputTimeUnit(TimeUnit.MILLISECONDS) 33 @State(Scope.Benchmark) 34 @Warmup(iterations = 3, time = 1) 35 @Measurement(iterations = 5, time = 1) 36 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) 37 public class ByteScalar extends AbstractVectorBenchmark { 38 @Param("1024") 39 int size; 40 41 byte[] fill(IntFunction<Byte> f) { 42 byte[] array = new byte[size]; 43 for (int i = 0; i < array.length; i++) { 44 array[i] = f.apply(i); 45 } 46 return array; 47 } 48 49 byte[] as, bs, cs, rs; 50 boolean[] ms, rms; 51 int[] ss; 52 53 @Setup 54 public void init() { 55 as = fill(i -> (byte)(2*i)); 56 bs = fill(i -> (byte)(i+1)); 57 cs = fill(i -> (byte)(i+5)); 58 rs = fill(i -> (byte)0); 59 ms = fillMask(size, i -> (i % 2) == 0); 60 rms = fillMask(size, i -> false); 61 62 ss = fillInt(size, i -> RANDOM.nextInt(Math.max(i,1))); 63 } 64 65 final IntFunction<byte[]> fa = vl -> as; 66 final IntFunction<byte[]> fb = vl -> bs; 67 final IntFunction<byte[]> fc = vl -> cs; 68 final IntFunction<byte[]> fr = vl -> rs; 69 final IntFunction<boolean[]> fm = vl -> ms; 70 final IntFunction<boolean[]> fmr = vl -> rms; 71 final IntFunction<int[]> fs = vl -> ss; 72 73 74 @Benchmark 75 public Object add() { 76 byte[] as = fa.apply(size); 77 byte[] bs = fb.apply(size); 78 byte[] rs = fr.apply(size); 79 80 for (int i = 0; i < as.length; i++) { 81 byte a = as[i]; 82 byte b = bs[i]; 83 rs[i] = (byte)(a + b); 84 } 85 86 return rs; 87 } 88 89 @Benchmark 90 public Object addMasked() { 91 byte[] as = fa.apply(size); 92 byte[] bs = fb.apply(size); 93 byte[] rs = fr.apply(size); 94 boolean[] ms = fm.apply(size); 95 96 for (int i = 0; i < as.length; i++) { 97 byte a = as[i]; 98 byte b = bs[i]; 99 if (ms[i % ms.length]) { 100 rs[i] = (byte)(a + b); 101 } else { 102 rs[i] = a; 103 } 104 } 105 return rs; 106 } 107 108 @Benchmark 109 public Object sub() { 110 byte[] as = fa.apply(size); 111 byte[] bs = fb.apply(size); 112 byte[] rs = fr.apply(size); 113 114 for (int i = 0; i < as.length; i++) { 115 byte a = as[i]; 116 byte b = bs[i]; 117 rs[i] = (byte)(a - b); 118 } 119 120 return rs; 121 } 122 123 @Benchmark 124 public Object subMasked() { 125 byte[] as = fa.apply(size); 126 byte[] bs = fb.apply(size); 127 byte[] rs = fr.apply(size); 128 boolean[] ms = fm.apply(size); 129 130 for (int i = 0; i < as.length; i++) { 131 byte a = as[i]; 132 byte b = bs[i]; 133 if (ms[i % ms.length]) { 134 rs[i] = (byte)(a - b); 135 } else { 136 rs[i] = a; 137 } 138 } 139 return rs; 140 } 141 142 143 144 @Benchmark 145 public Object mul() { 146 byte[] as = fa.apply(size); 147 byte[] bs = fb.apply(size); 148 byte[] rs = fr.apply(size); 149 150 for (int i = 0; i < as.length; i++) { 151 byte a = as[i]; 152 byte b = bs[i]; 153 rs[i] = (byte)(a * b); 154 } 155 156 return rs; 157 } 158 159 @Benchmark 160 public Object mulMasked() { 161 byte[] as = fa.apply(size); 162 byte[] bs = fb.apply(size); 163 byte[] rs = fr.apply(size); 164 boolean[] ms = fm.apply(size); 165 166 for (int i = 0; i < as.length; i++) { 167 byte a = as[i]; 168 byte b = bs[i]; 169 if (ms[i % ms.length]) { 170 rs[i] = (byte)(a * b); 171 } else { 172 rs[i] = a; 173 } 174 } 175 return rs; 176 } 177 178 179 @Benchmark 180 public Object and() { 181 byte[] as = fa.apply(size); 182 byte[] bs = fb.apply(size); 183 byte[] rs = fr.apply(size); 184 185 for (int i = 0; i < as.length; i++) { 186 byte a = as[i]; 187 byte b = bs[i]; 188 rs[i] = (byte)(a & b); 189 } 190 191 return rs; 192 } 193 194 195 196 @Benchmark 197 public Object andMasked() { 198 byte[] as = fa.apply(size); 199 byte[] bs = fb.apply(size); 200 byte[] rs = fr.apply(size); 201 boolean[] ms = fm.apply(size); 202 203 for (int i = 0; i < as.length; i++) { 204 byte a = as[i]; 205 byte b = bs[i]; 206 if (ms[i % ms.length]) { 207 rs[i] = (byte)(a & b); 208 } else { 209 rs[i] = a; 210 } 211 } 212 return rs; 213 } 214 215 216 217 @Benchmark 218 public Object or() { 219 byte[] as = fa.apply(size); 220 byte[] bs = fb.apply(size); 221 byte[] rs = fr.apply(size); 222 223 for (int i = 0; i < as.length; i++) { 224 byte a = as[i]; 225 byte b = bs[i]; 226 rs[i] = (byte)(a | b); 227 } 228 229 return rs; 230 } 231 232 233 234 @Benchmark 235 public Object orMasked() { 236 byte[] as = fa.apply(size); 237 byte[] bs = fb.apply(size); 238 byte[] rs = fr.apply(size); 239 boolean[] ms = fm.apply(size); 240 241 for (int i = 0; i < as.length; i++) { 242 byte a = as[i]; 243 byte b = bs[i]; 244 if (ms[i % ms.length]) { 245 rs[i] = (byte)(a | b); 246 } else { 247 rs[i] = a; 248 } 249 } 250 return rs; 251 } 252 253 254 255 @Benchmark 256 public Object xor() { 257 byte[] as = fa.apply(size); 258 byte[] bs = fb.apply(size); 259 byte[] rs = fr.apply(size); 260 261 for (int i = 0; i < as.length; i++) { 262 byte a = as[i]; 263 byte b = bs[i]; 264 rs[i] = (byte)(a ^ b); 265 } 266 267 return rs; 268 } 269 270 271 272 @Benchmark 273 public Object xorMasked() { 274 byte[] as = fa.apply(size); 275 byte[] bs = fb.apply(size); 276 byte[] rs = fr.apply(size); 277 boolean[] ms = fm.apply(size); 278 279 for (int i = 0; i < as.length; i++) { 280 byte a = as[i]; 281 byte b = bs[i]; 282 if (ms[i % ms.length]) { 283 rs[i] = (byte)(a ^ b); 284 } else { 285 rs[i] = a; 286 } 287 } 288 return rs; 289 } 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 @Benchmark 306 public Object aShiftRShift() { 307 byte[] as = fa.apply(size); 308 byte[] bs = fb.apply(size); 309 byte[] rs = fr.apply(size); 310 311 for (int i = 0; i < as.length; i++) { 312 byte a = as[i]; 313 byte b = bs[i]; 314 rs[i] = (byte)((a >> (b & 7))); 315 } 316 317 return rs; 318 } 319 320 321 322 @Benchmark 323 public Object aShiftRMaskedShift() { 324 byte[] as = fa.apply(size); 325 byte[] bs = fb.apply(size); 326 byte[] rs = fr.apply(size); 327 boolean[] ms = fm.apply(size); 328 329 for (int i = 0; i < as.length; i++) { 330 byte a = as[i]; 331 byte b = bs[i]; 332 boolean m = ms[i % ms.length]; 333 rs[i] = (m ? (byte)((a >> (b & 7))) : a); 334 } 335 336 return rs; 337 } 338 339 340 341 @Benchmark 342 public Object shiftLShift() { 343 byte[] as = fa.apply(size); 344 byte[] bs = fb.apply(size); 345 byte[] rs = fr.apply(size); 346 347 for (int i = 0; i < as.length; i++) { 348 byte a = as[i]; 349 byte b = bs[i]; 350 rs[i] = (byte)((a << (b & 7))); 351 } 352 353 return rs; 354 } 355 356 357 358 @Benchmark 359 public Object shiftLMaskedShift() { 360 byte[] as = fa.apply(size); 361 byte[] bs = fb.apply(size); 362 byte[] rs = fr.apply(size); 363 boolean[] ms = fm.apply(size); 364 365 for (int i = 0; i < as.length; i++) { 366 byte a = as[i]; 367 byte b = bs[i]; 368 boolean m = ms[i % ms.length]; 369 rs[i] = (m ? (byte)((a << (b & 7))) : a); 370 } 371 372 return rs; 373 } 374 375 376 377 @Benchmark 378 public Object shiftRShift() { 379 byte[] as = fa.apply(size); 380 byte[] bs = fb.apply(size); 381 byte[] rs = fr.apply(size); 382 383 for (int i = 0; i < as.length; i++) { 384 byte a = as[i]; 385 byte b = bs[i]; 386 rs[i] = (byte)(((a & 0xFF) >>> (b & 7))); 387 } 388 389 return rs; 390 } 391 392 393 394 @Benchmark 395 public Object shiftRMaskedShift() { 396 byte[] as = fa.apply(size); 397 byte[] bs = fb.apply(size); 398 byte[] rs = fr.apply(size); 399 boolean[] ms = fm.apply(size); 400 401 for (int i = 0; i < as.length; i++) { 402 byte a = as[i]; 403 byte b = bs[i]; 404 boolean m = ms[i % ms.length]; 405 rs[i] = (m ? (byte)(((a & 0xFF) >>> (b & 7))) : a); 406 } 407 408 return rs; 409 } 410 411 412 413 414 415 416 417 418 @Benchmark 419 public Object max() { 420 byte[] as = fa.apply(size); 421 byte[] bs = fb.apply(size); 422 byte[] rs = fr.apply(size); 423 424 for (int i = 0; i < as.length; i++) { 425 byte a = as[i]; 426 byte b = bs[i]; 427 rs[i] = (byte)(Math.max(a, b)); 428 } 429 430 return rs; 431 } 432 433 @Benchmark 434 public Object min() { 435 byte[] as = fa.apply(size); 436 byte[] bs = fb.apply(size); 437 byte[] rs = fr.apply(size); 438 439 for (int i = 0; i < as.length; i++) { 440 byte a = as[i]; 441 byte b = bs[i]; 442 rs[i] = (byte)(Math.min(a, b)); 443 } 444 445 return rs; 446 } 447 448 449 @Benchmark 450 public byte andAll() { 451 byte[] as = fa.apply(size); 452 byte r = -1; 453 for (int i = 0; i < as.length; i++) { 454 r &= as[i]; 455 } 456 return r; 457 } 458 459 460 461 @Benchmark 462 public byte orAll() { 463 byte[] as = fa.apply(size); 464 byte r = 0; 465 for (int i = 0; i < as.length; i++) { 466 r |= as[i]; 467 } 468 return r; 469 } 470 471 472 473 @Benchmark 474 public byte xorAll() { 475 byte[] as = fa.apply(size); 476 byte r = 0; 477 for (int i = 0; i < as.length; i++) { 478 r ^= as[i]; 479 } 480 return r; 481 } 482 483 484 @Benchmark 485 public byte addAll() { 486 byte[] as = fa.apply(size); 487 byte r = 0; 488 for (int i = 0; i < as.length; i++) { 489 r += as[i]; 490 } 491 return r; 492 } 493 494 @Benchmark 495 public byte mulAll() { 496 byte[] as = fa.apply(size); 497 byte r = 1; 498 for (int i = 0; i < as.length; i++) { 499 r *= as[i]; 500 } 501 return r; 502 } 503 504 @Benchmark 505 public byte minAll() { 506 byte[] as = fa.apply(size); 507 byte r = Byte.MAX_VALUE; 508 for (int i = 0; i < as.length; i++) { 509 r = (byte)Math.min(r, as[i]); 510 } 511 return r; 512 } 513 514 @Benchmark 515 public byte maxAll() { 516 byte[] as = fa.apply(size); 517 byte r = Byte.MIN_VALUE; 518 for (int i = 0; i < as.length; i++) { 519 r = (byte)Math.max(r, as[i]); 520 } 521 return r; 522 } 523 524 525 @Benchmark 526 public boolean anyTrue() { 527 boolean[] ms = fm.apply(size); 528 boolean r = false; 529 for (int i = 0; i < ms.length; i++) { 530 r |= ms[i]; 531 } 532 return r; 533 } 534 535 536 537 @Benchmark 538 public boolean allTrue() { 539 boolean[] ms = fm.apply(size); 540 boolean r = true; 541 for (int i = 0; i < ms.length; i++) { 542 r &= ms[i]; 543 } 544 return r; 545 } 546 547 548 @Benchmark 549 public boolean lessThan() { 550 byte[] as = fa.apply(size); 551 byte[] bs = fb.apply(size); 552 553 boolean r = false; 554 for (int i = 0; i < as.length; i++) { 555 boolean m = (as[i] < bs[i]); 556 r |= m; // accumulate so JIT can't eliminate the computation 557 } 558 559 return r; 560 } 561 562 @Benchmark 563 public boolean greaterThan() { 564 byte[] as = fa.apply(size); 565 byte[] bs = fb.apply(size); 566 567 boolean r = false; 568 for (int i = 0; i < as.length; i++) { 569 boolean m = (as[i] > bs[i]); 570 r |= m; // accumulate so JIT can't eliminate the computation 571 } 572 573 return r; 574 } 575 576 @Benchmark 577 public boolean equal() { 578 byte[] as = fa.apply(size); 579 byte[] bs = fb.apply(size); 580 581 boolean r = false; 582 for (int i = 0; i < as.length; i++) { 583 boolean m = (as[i] == bs[i]); 584 r |= m; // accumulate so JIT can't eliminate the computation 585 } 586 587 return r; 588 } 589 590 @Benchmark 591 public boolean notEqual() { 592 byte[] as = fa.apply(size); 593 byte[] bs = fb.apply(size); 594 595 boolean r = false; 596 for (int i = 0; i < as.length; i++) { 597 boolean m = (as[i] != bs[i]); 598 r |= m; // accumulate so JIT can't eliminate the computation 599 } 600 601 return r; 602 } 603 604 @Benchmark 605 public boolean lessThanEq() { 606 byte[] as = fa.apply(size); 607 byte[] bs = fb.apply(size); 608 609 boolean r = false; 610 for (int i = 0; i < as.length; i++) { 611 boolean m = (as[i] <= bs[i]); 612 r |= m; // accumulate so JIT can't eliminate the computation 613 } 614 615 return r; 616 } 617 618 @Benchmark 619 public boolean greaterThanEq() { 620 byte[] as = fa.apply(size); 621 byte[] bs = fb.apply(size); 622 623 boolean r = false; 624 for (int i = 0; i < as.length; i++) { 625 boolean m = (as[i] >= bs[i]); 626 r |= m; // accumulate so JIT can't eliminate the computation 627 } 628 629 return r; 630 } 631 632 @Benchmark 633 public Object blend() { 634 byte[] as = fa.apply(size); 635 byte[] bs = fb.apply(size); 636 byte[] rs = fr.apply(size); 637 boolean[] ms = fm.apply(size); 638 639 for (int i = 0; i < as.length; i++) { 640 byte a = as[i]; 641 byte b = bs[i]; 642 boolean m = ms[i % ms.length]; 643 rs[i] = (m ? b : a); 644 } 645 646 return rs; 647 } 648 Object rearrangeShared(int window) { 649 byte[] as = fa.apply(size); 650 int[] order = fs.apply(size); 651 byte[] rs = fr.apply(size); 652 653 for (int i = 0; i < as.length; i += window) { 654 for (int j = 0; j < window; j++) { 655 byte a = as[i+j]; 656 int pos = order[j]; 657 rs[i + pos] = a; 658 } 659 } 660 661 return rs; 662 } 663 664 @Benchmark 665 public Object rearrange064() { 666 int window = 64 / Byte.SIZE; 667 return rearrangeShared(window); 668 } 669 670 @Benchmark 671 public Object rearrange128() { 672 int window = 128 / Byte.SIZE; 673 return rearrangeShared(window); 674 } 675 676 @Benchmark 677 public Object rearrange256() { 678 int window = 256 / Byte.SIZE; 679 return rearrangeShared(window); 680 } 681 682 @Benchmark 683 public Object rearrange512() { 684 int window = 512 / Byte.SIZE; 685 return rearrangeShared(window); 686 } 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 @Benchmark 709 public Object neg() { 710 byte[] as = fa.apply(size); 711 byte[] rs = fr.apply(size); 712 713 for (int i = 0; i < as.length; i++) { 714 byte a = as[i]; 715 rs[i] = (byte)(-((byte)a)); 716 } 717 718 return rs; 719 } 720 721 @Benchmark 722 public Object negMasked() { 723 byte[] as = fa.apply(size); 724 byte[] rs = fr.apply(size); 725 boolean[] ms = fm.apply(size); 726 727 for (int i = 0; i < as.length; i++) { 728 byte a = as[i]; 729 boolean m = ms[i % ms.length]; 730 rs[i] = (m ? (byte)(-((byte)a)) : a); 731 } 732 733 return rs; 734 } 735 736 @Benchmark 737 public Object abs() { 738 byte[] as = fa.apply(size); 739 byte[] rs = fr.apply(size); 740 741 for (int i = 0; i < as.length; i++) { 742 byte a = as[i]; 743 rs[i] = (byte)(Math.abs((byte)a)); 744 } 745 746 return rs; 747 } 748 749 @Benchmark 750 public Object absMasked() { 751 byte[] as = fa.apply(size); 752 byte[] rs = fr.apply(size); 753 boolean[] ms = fm.apply(size); 754 755 for (int i = 0; i < as.length; i++) { 756 byte a = as[i]; 757 boolean m = ms[i % ms.length]; 758 rs[i] = (m ? (byte)(Math.abs((byte)a)) : a); 759 } 760 761 return rs; 762 } 763 764 765 @Benchmark 766 public Object not() { 767 byte[] as = fa.apply(size); 768 byte[] rs = fr.apply(size); 769 770 for (int i = 0; i < as.length; i++) { 771 byte a = as[i]; 772 rs[i] = (byte)(~((byte)a)); 773 } 774 775 return rs; 776 } 777 778 779 780 @Benchmark 781 public Object notMasked() { 782 byte[] as = fa.apply(size); 783 byte[] rs = fr.apply(size); 784 boolean[] ms = fm.apply(size); 785 786 for (int i = 0; i < as.length; i++) { 787 byte a = as[i]; 788 boolean m = ms[i % ms.length]; 789 rs[i] = (m ? (byte)(~((byte)a)) : a); 790 } 791 792 return rs; 793 } 794 795 796 797 798 799 } 800