1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import java.util.concurrent.TimeUnit;
  27 import java.util.function.IntFunction;
  28 
  29 import org.openjdk.jmh.annotations.*;
  30 
  31 @BenchmarkMode(Mode.Throughput)
  32 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  33 @State(Scope.Benchmark)
  34 @Warmup(iterations = 3, time = 1)
  35 @Measurement(iterations = 5, time = 1)
  36 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  37 public class ByteScalar extends AbstractVectorBenchmark {
  38     @Param("1024")
  39     int size;
  40 
  41     byte[] fill(IntFunction<Byte> f) {
  42         byte[] array = new byte[size];
  43         for (int i = 0; i < array.length; i++) {
  44             array[i] = f.apply(i);
  45         }
  46         return array;
  47     }
  48 
  49     byte[] as, bs, cs, rs;
  50     boolean[] ms, rms;
  51     int[] ss;
  52 
  53     @Setup
  54     public void init() {
  55         as = fill(i -> (byte)(2*i));
  56         bs = fill(i -> (byte)(i+1));
  57         cs = fill(i -> (byte)(i+5));
  58         rs = fill(i -> (byte)0);
  59         ms = fillMask(size, i -> (i % 2) == 0);
  60         rms = fillMask(size, i -> false);
  61 
  62         ss = fillInt(size, i -> RANDOM.nextInt(Math.max(i,1)));
  63     }
  64 
  65     final IntFunction<byte[]> fa = vl -> as;
  66     final IntFunction<byte[]> fb = vl -> bs;
  67     final IntFunction<byte[]> fc = vl -> cs;
  68     final IntFunction<byte[]> fr = vl -> rs;
  69     final IntFunction<boolean[]> fm = vl -> ms;
  70     final IntFunction<boolean[]> fmr = vl -> rms;
  71     final IntFunction<int[]> fs = vl -> ss;
  72 
  73 
  74     @Benchmark
  75     public Object add() {
  76         byte[] as = fa.apply(size);
  77         byte[] bs = fb.apply(size);
  78         byte[] rs = fr.apply(size);
  79 
  80         for (int i = 0; i < as.length; i++) {
  81             byte a = as[i];
  82             byte b = bs[i];
  83             rs[i] = (byte)(a + b);
  84         }
  85 
  86         return rs;
  87     }
  88 
  89     @Benchmark
  90     public Object addMasked() {
  91         byte[] as = fa.apply(size);
  92         byte[] bs = fb.apply(size);
  93         byte[] rs = fr.apply(size);
  94         boolean[] ms = fm.apply(size);
  95 
  96         for (int i = 0; i < as.length; i++) {
  97             byte a = as[i];
  98             byte b = bs[i];
  99             if (ms[i % ms.length]) {
 100                 rs[i] = (byte)(a + b);
 101             } else {
 102                 rs[i] = a;
 103             }
 104         }
 105         return rs;
 106     }
 107 
 108     @Benchmark
 109     public Object sub() {
 110         byte[] as = fa.apply(size);
 111         byte[] bs = fb.apply(size);
 112         byte[] rs = fr.apply(size);
 113 
 114         for (int i = 0; i < as.length; i++) {
 115             byte a = as[i];
 116             byte b = bs[i];
 117             rs[i] = (byte)(a - b);
 118         }
 119 
 120         return rs;
 121     }
 122 
 123     @Benchmark
 124     public Object subMasked() {
 125         byte[] as = fa.apply(size);
 126         byte[] bs = fb.apply(size);
 127         byte[] rs = fr.apply(size);
 128         boolean[] ms = fm.apply(size);
 129 
 130         for (int i = 0; i < as.length; i++) {
 131             byte a = as[i];
 132             byte b = bs[i];
 133             if (ms[i % ms.length]) {
 134                 rs[i] = (byte)(a - b);
 135             } else {
 136                 rs[i] = a;
 137             }
 138         }
 139         return rs;
 140     }
 141 
 142 
 143 
 144     @Benchmark
 145     public Object mul() {
 146         byte[] as = fa.apply(size);
 147         byte[] bs = fb.apply(size);
 148         byte[] rs = fr.apply(size);
 149 
 150         for (int i = 0; i < as.length; i++) {
 151             byte a = as[i];
 152             byte b = bs[i];
 153             rs[i] = (byte)(a * b);
 154         }
 155 
 156         return rs;
 157     }
 158 
 159     @Benchmark
 160     public Object mulMasked() {
 161         byte[] as = fa.apply(size);
 162         byte[] bs = fb.apply(size);
 163         byte[] rs = fr.apply(size);
 164         boolean[] ms = fm.apply(size);
 165 
 166         for (int i = 0; i < as.length; i++) {
 167             byte a = as[i];
 168             byte b = bs[i];
 169             if (ms[i % ms.length]) {
 170                 rs[i] = (byte)(a * b);
 171             } else {
 172                 rs[i] = a;
 173             }
 174         }
 175         return rs;
 176     }
 177 
 178 
 179     @Benchmark
 180     public Object and() {
 181         byte[] as = fa.apply(size);
 182         byte[] bs = fb.apply(size);
 183         byte[] rs = fr.apply(size);
 184 
 185         for (int i = 0; i < as.length; i++) {
 186             byte a = as[i];
 187             byte b = bs[i];
 188             rs[i] = (byte)(a & b);
 189         }
 190 
 191         return rs;
 192     }
 193 
 194 
 195 
 196     @Benchmark
 197     public Object andMasked() {
 198         byte[] as = fa.apply(size);
 199         byte[] bs = fb.apply(size);
 200         byte[] rs = fr.apply(size);
 201         boolean[] ms = fm.apply(size);
 202 
 203         for (int i = 0; i < as.length; i++) {
 204             byte a = as[i];
 205             byte b = bs[i];
 206             if (ms[i % ms.length]) {
 207                 rs[i] = (byte)(a & b);
 208             } else {
 209                 rs[i] = a;
 210             }
 211         }
 212         return rs;
 213     }
 214 
 215 
 216 
 217     @Benchmark
 218     public Object or() {
 219         byte[] as = fa.apply(size);
 220         byte[] bs = fb.apply(size);
 221         byte[] rs = fr.apply(size);
 222 
 223         for (int i = 0; i < as.length; i++) {
 224             byte a = as[i];
 225             byte b = bs[i];
 226             rs[i] = (byte)(a | b);
 227         }
 228 
 229         return rs;
 230     }
 231 
 232 
 233 
 234     @Benchmark
 235     public Object orMasked() {
 236         byte[] as = fa.apply(size);
 237         byte[] bs = fb.apply(size);
 238         byte[] rs = fr.apply(size);
 239         boolean[] ms = fm.apply(size);
 240 
 241         for (int i = 0; i < as.length; i++) {
 242             byte a = as[i];
 243             byte b = bs[i];
 244             if (ms[i % ms.length]) {
 245                 rs[i] = (byte)(a | b);
 246             } else {
 247                 rs[i] = a;
 248             }
 249         }
 250         return rs;
 251     }
 252 
 253 
 254 
 255     @Benchmark
 256     public Object xor() {
 257         byte[] as = fa.apply(size);
 258         byte[] bs = fb.apply(size);
 259         byte[] rs = fr.apply(size);
 260 
 261         for (int i = 0; i < as.length; i++) {
 262             byte a = as[i];
 263             byte b = bs[i];
 264             rs[i] = (byte)(a ^ b);
 265         }
 266 
 267         return rs;
 268     }
 269 
 270 
 271 
 272     @Benchmark
 273     public Object xorMasked() {
 274         byte[] as = fa.apply(size);
 275         byte[] bs = fb.apply(size);
 276         byte[] rs = fr.apply(size);
 277         boolean[] ms = fm.apply(size);
 278 
 279         for (int i = 0; i < as.length; i++) {
 280             byte a = as[i];
 281             byte b = bs[i];
 282             if (ms[i % ms.length]) {
 283                 rs[i] = (byte)(a ^ b);
 284             } else {
 285                 rs[i] = a;
 286             }
 287         }
 288         return rs;
 289     }
 290 
 291 
 292 
 293 
 294 
 295 
 296 
 297 
 298 
 299 
 300 
 301 
 302 
 303 
 304 
 305     @Benchmark
 306     public Object aShiftRShift() {
 307         byte[] as = fa.apply(size);
 308         byte[] bs = fb.apply(size);
 309         byte[] rs = fr.apply(size);
 310 
 311         for (int i = 0; i < as.length; i++) {
 312             byte a = as[i];
 313             byte b = bs[i];
 314             rs[i] = (byte)((a >> (b & 7)));
 315         }
 316 
 317         return rs;
 318     }
 319 
 320 
 321 
 322     @Benchmark
 323     public Object aShiftRMaskedShift() {
 324         byte[] as = fa.apply(size);
 325         byte[] bs = fb.apply(size);
 326         byte[] rs = fr.apply(size);
 327         boolean[] ms = fm.apply(size);
 328 
 329         for (int i = 0; i < as.length; i++) {
 330             byte a = as[i];
 331             byte b = bs[i];
 332             boolean m = ms[i % ms.length];
 333             rs[i] = (m ? (byte)((a >> (b & 7))) : a);
 334         }
 335 
 336         return rs;
 337     }
 338 
 339 
 340 
 341     @Benchmark
 342     public Object shiftLShift() {
 343         byte[] as = fa.apply(size);
 344         byte[] bs = fb.apply(size);
 345         byte[] rs = fr.apply(size);
 346 
 347         for (int i = 0; i < as.length; i++) {
 348             byte a = as[i];
 349             byte b = bs[i];
 350             rs[i] = (byte)((a << (b & 7)));
 351         }
 352 
 353         return rs;
 354     }
 355 
 356 
 357 
 358     @Benchmark
 359     public Object shiftLMaskedShift() {
 360         byte[] as = fa.apply(size);
 361         byte[] bs = fb.apply(size);
 362         byte[] rs = fr.apply(size);
 363         boolean[] ms = fm.apply(size);
 364 
 365         for (int i = 0; i < as.length; i++) {
 366             byte a = as[i];
 367             byte b = bs[i];
 368             boolean m = ms[i % ms.length];
 369             rs[i] = (m ? (byte)((a << (b & 7))) : a);
 370         }
 371 
 372         return rs;
 373     }
 374 
 375 
 376 
 377     @Benchmark
 378     public Object shiftRShift() {
 379         byte[] as = fa.apply(size);
 380         byte[] bs = fb.apply(size);
 381         byte[] rs = fr.apply(size);
 382 
 383         for (int i = 0; i < as.length; i++) {
 384             byte a = as[i];
 385             byte b = bs[i];
 386             rs[i] = (byte)(((a & 0xFF) >>> (b & 7)));
 387         }
 388 
 389         return rs;
 390     }
 391 
 392 
 393 
 394     @Benchmark
 395     public Object shiftRMaskedShift() {
 396         byte[] as = fa.apply(size);
 397         byte[] bs = fb.apply(size);
 398         byte[] rs = fr.apply(size);
 399         boolean[] ms = fm.apply(size);
 400 
 401         for (int i = 0; i < as.length; i++) {
 402             byte a = as[i];
 403             byte b = bs[i];
 404             boolean m = ms[i % ms.length];
 405             rs[i] = (m ? (byte)(((a & 0xFF) >>> (b & 7))) : a);
 406         }
 407 
 408         return rs;
 409     }
 410 
 411 
 412 
 413 
 414 
 415 
 416 
 417 
 418     @Benchmark
 419     public Object max() {
 420         byte[] as = fa.apply(size);
 421         byte[] bs = fb.apply(size);
 422         byte[] rs = fr.apply(size);
 423 
 424         for (int i = 0; i < as.length; i++) {
 425             byte a = as[i];
 426             byte b = bs[i];
 427             rs[i] = (byte)(Math.max(a, b));
 428         }
 429 
 430         return rs;
 431     }
 432 
 433     @Benchmark
 434     public Object min() {
 435         byte[] as = fa.apply(size);
 436         byte[] bs = fb.apply(size);
 437         byte[] rs = fr.apply(size);
 438 
 439         for (int i = 0; i < as.length; i++) {
 440             byte a = as[i];
 441             byte b = bs[i];
 442             rs[i] = (byte)(Math.min(a, b));
 443         }
 444 
 445         return rs;
 446     }
 447 
 448 
 449     @Benchmark
 450     public byte andAll() {
 451         byte[] as = fa.apply(size);
 452         byte r = -1;
 453         for (int i = 0; i < as.length; i++) {
 454             r &= as[i];
 455         }
 456         return r;
 457     }
 458 
 459 
 460 
 461     @Benchmark
 462     public byte orAll() {
 463         byte[] as = fa.apply(size);
 464         byte r = 0;
 465         for (int i = 0; i < as.length; i++) {
 466             r |= as[i];
 467         }
 468         return r;
 469     }
 470 
 471 
 472 
 473     @Benchmark
 474     public byte xorAll() {
 475         byte[] as = fa.apply(size);
 476         byte r = 0;
 477         for (int i = 0; i < as.length; i++) {
 478             r ^= as[i];
 479         }
 480         return r;
 481     }
 482 
 483 
 484     @Benchmark
 485     public byte addAll() {
 486         byte[] as = fa.apply(size);
 487         byte r = 0;
 488         for (int i = 0; i < as.length; i++) {
 489             r += as[i];
 490         }
 491         return r;
 492     }
 493 
 494     @Benchmark
 495     public byte mulAll() {
 496         byte[] as = fa.apply(size);
 497         byte r = 1;
 498         for (int i = 0; i < as.length; i++) {
 499             r *= as[i];
 500         }
 501         return r;
 502     }
 503 
 504     @Benchmark
 505     public byte minAll() {
 506         byte[] as = fa.apply(size);
 507         byte r = Byte.MAX_VALUE;
 508         for (int i = 0; i < as.length; i++) {
 509             r = (byte)Math.min(r, as[i]);
 510         }
 511         return r;
 512     }
 513 
 514     @Benchmark
 515     public byte maxAll() {
 516         byte[] as = fa.apply(size);
 517         byte r = Byte.MIN_VALUE;
 518         for (int i = 0; i < as.length; i++) {
 519             r = (byte)Math.max(r, as[i]);
 520         }
 521         return r;
 522     }
 523 
 524 
 525     @Benchmark
 526     public boolean anyTrue() {
 527         boolean[] ms = fm.apply(size);
 528         boolean r = false;
 529         for (int i = 0; i < ms.length; i++) {
 530             r |= ms[i];
 531         }
 532         return r;
 533     }
 534 
 535 
 536 
 537     @Benchmark
 538     public boolean allTrue() {
 539         boolean[] ms = fm.apply(size);
 540         boolean r = true;
 541         for (int i = 0; i < ms.length; i++) {
 542             r &= ms[i];
 543         }
 544         return r;
 545     }
 546 
 547 
 548     @Benchmark
 549     public boolean lessThan() {
 550         byte[] as = fa.apply(size);
 551         byte[] bs = fb.apply(size);
 552 
 553         boolean r = false;
 554         for (int i = 0; i < as.length; i++) {
 555             boolean m = (as[i] < bs[i]);
 556             r |= m; // accumulate so JIT can't eliminate the computation
 557         }
 558 
 559         return r;
 560     }
 561 
 562     @Benchmark
 563     public boolean greaterThan() {
 564         byte[] as = fa.apply(size);
 565         byte[] bs = fb.apply(size);
 566 
 567         boolean r = false;
 568         for (int i = 0; i < as.length; i++) {
 569             boolean m = (as[i] > bs[i]);
 570             r |= m; // accumulate so JIT can't eliminate the computation
 571         }
 572 
 573         return r;
 574     }
 575 
 576     @Benchmark
 577     public boolean equal() {
 578         byte[] as = fa.apply(size);
 579         byte[] bs = fb.apply(size);
 580 
 581         boolean r = false;
 582         for (int i = 0; i < as.length; i++) {
 583             boolean m = (as[i] == bs[i]);
 584             r |= m; // accumulate so JIT can't eliminate the computation
 585         }
 586 
 587         return r;
 588     }
 589 
 590     @Benchmark
 591     public boolean notEqual() {
 592         byte[] as = fa.apply(size);
 593         byte[] bs = fb.apply(size);
 594 
 595         boolean r = false;
 596         for (int i = 0; i < as.length; i++) {
 597             boolean m = (as[i] != bs[i]);
 598             r |= m; // accumulate so JIT can't eliminate the computation
 599         }
 600 
 601         return r;
 602     }
 603 
 604     @Benchmark
 605     public boolean lessThanEq() {
 606         byte[] as = fa.apply(size);
 607         byte[] bs = fb.apply(size);
 608 
 609         boolean r = false;
 610         for (int i = 0; i < as.length; i++) {
 611             boolean m = (as[i] <= bs[i]);
 612             r |= m; // accumulate so JIT can't eliminate the computation
 613         }
 614 
 615         return r;
 616     }
 617 
 618     @Benchmark
 619     public boolean greaterThanEq() {
 620         byte[] as = fa.apply(size);
 621         byte[] bs = fb.apply(size);
 622 
 623         boolean r = false;
 624         for (int i = 0; i < as.length; i++) {
 625             boolean m = (as[i] >= bs[i]);
 626             r |= m; // accumulate so JIT can't eliminate the computation
 627         }
 628 
 629         return r;
 630     }
 631 
 632     @Benchmark
 633     public Object blend() {
 634         byte[] as = fa.apply(size);
 635         byte[] bs = fb.apply(size);
 636         byte[] rs = fr.apply(size);
 637         boolean[] ms = fm.apply(size);
 638 
 639         for (int i = 0; i < as.length; i++) {
 640             byte a = as[i];
 641             byte b = bs[i];
 642             boolean m = ms[i % ms.length];
 643             rs[i] = (m ? b : a);
 644         }
 645 
 646         return rs;
 647     }
 648     Object rearrangeShared(int window) {
 649         byte[] as = fa.apply(size);
 650         int[] order = fs.apply(size);
 651         byte[] rs = fr.apply(size);
 652 
 653         for (int i = 0; i < as.length; i += window) {
 654             for (int j = 0; j < window; j++) {
 655                 byte a = as[i+j];
 656                 int pos = order[j];
 657                 rs[i + pos] = a;
 658             }
 659         }
 660 
 661         return rs;
 662     }
 663 
 664     @Benchmark
 665     public Object rearrange064() {
 666         int window = 64 / Byte.SIZE;
 667         return rearrangeShared(window);
 668     }
 669 
 670     @Benchmark
 671     public Object rearrange128() {
 672         int window = 128 / Byte.SIZE;
 673         return rearrangeShared(window);
 674     }
 675 
 676     @Benchmark
 677     public Object rearrange256() {
 678         int window = 256 / Byte.SIZE;
 679         return rearrangeShared(window);
 680     }
 681 
 682     @Benchmark
 683     public Object rearrange512() {
 684         int window = 512 / Byte.SIZE;
 685         return rearrangeShared(window);
 686     }
 687 
 688 
 689 
 690 
 691 
 692 
 693 
 694 
 695 
 696 
 697 
 698 
 699 
 700 
 701 
 702 
 703 
 704 
 705 
 706 
 707 
 708     @Benchmark
 709     public Object neg() {
 710         byte[] as = fa.apply(size);
 711         byte[] rs = fr.apply(size);
 712 
 713         for (int i = 0; i < as.length; i++) {
 714             byte a = as[i];
 715             rs[i] = (byte)(-((byte)a));
 716         }
 717 
 718         return rs;
 719     }
 720 
 721     @Benchmark
 722     public Object negMasked() {
 723         byte[] as = fa.apply(size);
 724         byte[] rs = fr.apply(size);
 725         boolean[] ms = fm.apply(size);
 726 
 727         for (int i = 0; i < as.length; i++) {
 728             byte a = as[i];
 729             boolean m = ms[i % ms.length];
 730             rs[i] = (m ? (byte)(-((byte)a)) : a);
 731         }
 732 
 733         return rs;
 734     }
 735 
 736     @Benchmark
 737     public Object abs() {
 738         byte[] as = fa.apply(size);
 739         byte[] rs = fr.apply(size);
 740 
 741         for (int i = 0; i < as.length; i++) {
 742             byte a = as[i];
 743             rs[i] = (byte)(Math.abs((byte)a));
 744         }
 745 
 746         return rs;
 747     }
 748 
 749     @Benchmark
 750     public Object absMasked() {
 751         byte[] as = fa.apply(size);
 752         byte[] rs = fr.apply(size);
 753         boolean[] ms = fm.apply(size);
 754 
 755         for (int i = 0; i < as.length; i++) {
 756             byte a = as[i];
 757             boolean m = ms[i % ms.length];
 758             rs[i] = (m ? (byte)(Math.abs((byte)a)) : a);
 759         }
 760 
 761         return rs;
 762     }
 763 
 764 
 765     @Benchmark
 766     public Object not() {
 767         byte[] as = fa.apply(size);
 768         byte[] rs = fr.apply(size);
 769 
 770         for (int i = 0; i < as.length; i++) {
 771             byte a = as[i];
 772             rs[i] = (byte)(~((byte)a));
 773         }
 774 
 775         return rs;
 776     }
 777 
 778 
 779 
 780     @Benchmark
 781     public Object notMasked() {
 782         byte[] as = fa.apply(size);
 783         byte[] rs = fr.apply(size);
 784         boolean[] ms = fm.apply(size);
 785 
 786         for (int i = 0; i < as.length; i++) {
 787             byte a = as[i];
 788             boolean m = ms[i % ms.length];
 789             rs[i] = (m ? (byte)(~((byte)a)) : a);
 790         }
 791 
 792         return rs;
 793     }
 794 
 795 
 796 
 797 
 798 
 799 }
 800