1 /*
   2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.Vector.Shape;
  28 import jdk.incubator.vector.Vector.Species;
  29 import jdk.incubator.vector.ByteVector;
  30 
  31 import java.util.concurrent.TimeUnit;
  32 import java.util.function.BiFunction;
  33 import java.util.function.IntFunction;
  34 
  35 import org.openjdk.jmh.annotations.*;
  36 import org.openjdk.jmh.infra.Blackhole;
  37 
  38 @BenchmarkMode(Mode.Throughput)
  39 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  40 @State(Scope.Benchmark)
  41 @Warmup(iterations = 3, time = 1)
  42 @Measurement(iterations = 5, time = 1)
  43 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  44 public class ByteMaxVector extends AbstractVectorBenchmark {
  45     static final Species<Byte> SPECIES = ByteVector.SPECIES_MAX;
  46 
  47     static final int INVOC_COUNT = 1; // get rid of outer loop
  48 
  49     @Param("1024")
  50     int size;
  51 
  52     byte[] fill(IntFunction<Byte> f) {
  53         byte[] array = new byte[size];
  54         for (int i = 0; i < array.length; i++) {
  55             array[i] = f.apply(i);
  56         }
  57         return array;
  58     }
  59 
  60     byte[] a, b, c, r;
  61     boolean[] m, rm;
  62     int[] s;
  63 
  64     @Setup
  65     public void init() {
  66         size += size % SPECIES.length(); // FIXME: add post-loops
  67 
  68         a = fill(i -> (byte)(2*i));
  69         b = fill(i -> (byte)(i+1));
  70         c = fill(i -> (byte)(i+5));
  71         r = fill(i -> (byte)0);
  72 
  73         m = fillMask(size, i -> (i % 2) == 0);
  74         rm = fillMask(size, i -> false);
  75 
  76         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  77     }
  78 
  79     final IntFunction<byte[]> fa = vl -> a;
  80     final IntFunction<byte[]> fb = vl -> b;
  81     final IntFunction<byte[]> fc = vl -> c;
  82     final IntFunction<byte[]> fr = vl -> r;
  83     final IntFunction<boolean[]> fm = vl -> m;
  84     final IntFunction<boolean[]> fmr = vl -> rm;
  85     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  86 
  87 
  88     @Benchmark
  89     public void add(Blackhole bh) {
  90         byte[] a = fa.apply(SPECIES.length());
  91         byte[] b = fb.apply(SPECIES.length());
  92         byte[] r = fr.apply(SPECIES.length());
  93 
  94         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  95             for (int i = 0; i < a.length; i += SPECIES.length()) {
  96                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
  97                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
  98                 av.add(bv).intoArray(r, i);
  99             }
 100         }
 101 
 102         bh.consume(r);
 103     }
 104 
 105     @Benchmark
 106     public void addMasked(Blackhole bh) {
 107         byte[] a = fa.apply(SPECIES.length());
 108         byte[] b = fb.apply(SPECIES.length());
 109         byte[] r = fr.apply(SPECIES.length());
 110         boolean[] mask = fm.apply(SPECIES.length());
 111         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 112 
 113         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 114             for (int i = 0; i < a.length; i += SPECIES.length()) {
 115                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 116                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 117                 av.add(bv, vmask).intoArray(r, i);
 118             }
 119         }
 120 
 121         bh.consume(r);
 122     }
 123 
 124     @Benchmark
 125     public void sub(Blackhole bh) {
 126         byte[] a = fa.apply(SPECIES.length());
 127         byte[] b = fb.apply(SPECIES.length());
 128         byte[] r = fr.apply(SPECIES.length());
 129 
 130         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 131             for (int i = 0; i < a.length; i += SPECIES.length()) {
 132                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 133                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 134                 av.sub(bv).intoArray(r, i);
 135             }
 136         }
 137 
 138         bh.consume(r);
 139     }
 140 
 141     @Benchmark
 142     public void subMasked(Blackhole bh) {
 143         byte[] a = fa.apply(SPECIES.length());
 144         byte[] b = fb.apply(SPECIES.length());
 145         byte[] r = fr.apply(SPECIES.length());
 146         boolean[] mask = fm.apply(SPECIES.length());
 147         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 148 
 149         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 150             for (int i = 0; i < a.length; i += SPECIES.length()) {
 151                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 152                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 153                 av.sub(bv, vmask).intoArray(r, i);
 154             }
 155         }
 156 
 157         bh.consume(r);
 158     }
 159 
 160 
 161 
 162     @Benchmark
 163     public void mul(Blackhole bh) {
 164         byte[] a = fa.apply(SPECIES.length());
 165         byte[] b = fb.apply(SPECIES.length());
 166         byte[] r = fr.apply(SPECIES.length());
 167 
 168         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 169             for (int i = 0; i < a.length; i += SPECIES.length()) {
 170                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 171                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 172                 av.mul(bv).intoArray(r, i);
 173             }
 174         }
 175 
 176         bh.consume(r);
 177     }
 178 
 179     @Benchmark
 180     public void mulMasked(Blackhole bh) {
 181         byte[] a = fa.apply(SPECIES.length());
 182         byte[] b = fb.apply(SPECIES.length());
 183         byte[] r = fr.apply(SPECIES.length());
 184         boolean[] mask = fm.apply(SPECIES.length());
 185         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 186 
 187         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 188             for (int i = 0; i < a.length; i += SPECIES.length()) {
 189                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 190                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 191                 av.mul(bv, vmask).intoArray(r, i);
 192             }
 193         }
 194 
 195         bh.consume(r);
 196     }
 197 
 198 
 199     @Benchmark
 200     public void and(Blackhole bh) {
 201         byte[] a = fa.apply(SPECIES.length());
 202         byte[] b = fb.apply(SPECIES.length());
 203         byte[] r = fr.apply(SPECIES.length());
 204 
 205         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 206             for (int i = 0; i < a.length; i += SPECIES.length()) {
 207                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 208                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 209                 av.and(bv).intoArray(r, i);
 210             }
 211         }
 212 
 213         bh.consume(r);
 214     }
 215 
 216 
 217 
 218     @Benchmark
 219     public void andMasked(Blackhole bh) {
 220         byte[] a = fa.apply(SPECIES.length());
 221         byte[] b = fb.apply(SPECIES.length());
 222         byte[] r = fr.apply(SPECIES.length());
 223         boolean[] mask = fm.apply(SPECIES.length());
 224         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 225 
 226         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 227             for (int i = 0; i < a.length; i += SPECIES.length()) {
 228                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 229                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 230                 av.and(bv, vmask).intoArray(r, i);
 231             }
 232         }
 233 
 234         bh.consume(r);
 235     }
 236 
 237 
 238 
 239     @Benchmark
 240     public void or(Blackhole bh) {
 241         byte[] a = fa.apply(SPECIES.length());
 242         byte[] b = fb.apply(SPECIES.length());
 243         byte[] r = fr.apply(SPECIES.length());
 244 
 245         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 246             for (int i = 0; i < a.length; i += SPECIES.length()) {
 247                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 248                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 249                 av.or(bv).intoArray(r, i);
 250             }
 251         }
 252 
 253         bh.consume(r);
 254     }
 255 
 256 
 257 
 258     @Benchmark
 259     public void orMasked(Blackhole bh) {
 260         byte[] a = fa.apply(SPECIES.length());
 261         byte[] b = fb.apply(SPECIES.length());
 262         byte[] r = fr.apply(SPECIES.length());
 263         boolean[] mask = fm.apply(SPECIES.length());
 264         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 265 
 266         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 267             for (int i = 0; i < a.length; i += SPECIES.length()) {
 268                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 269                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 270                 av.or(bv, vmask).intoArray(r, i);
 271             }
 272         }
 273 
 274         bh.consume(r);
 275     }
 276 
 277 
 278 
 279     @Benchmark
 280     public void xor(Blackhole bh) {
 281         byte[] a = fa.apply(SPECIES.length());
 282         byte[] b = fb.apply(SPECIES.length());
 283         byte[] r = fr.apply(SPECIES.length());
 284 
 285         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 286             for (int i = 0; i < a.length; i += SPECIES.length()) {
 287                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 288                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 289                 av.xor(bv).intoArray(r, i);
 290             }
 291         }
 292 
 293         bh.consume(r);
 294     }
 295 
 296 
 297 
 298     @Benchmark
 299     public void xorMasked(Blackhole bh) {
 300         byte[] a = fa.apply(SPECIES.length());
 301         byte[] b = fb.apply(SPECIES.length());
 302         byte[] r = fr.apply(SPECIES.length());
 303         boolean[] mask = fm.apply(SPECIES.length());
 304         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 305 
 306         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 307             for (int i = 0; i < a.length; i += SPECIES.length()) {
 308                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 309                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 310                 av.xor(bv, vmask).intoArray(r, i);
 311             }
 312         }
 313 
 314         bh.consume(r);
 315     }
 316 
 317 
 318 
 319 
 320 
 321 
 322 
 323 
 324 
 325 
 326 
 327 
 328 
 329 
 330 
 331     @Benchmark
 332     public void aShiftRShift(Blackhole bh) {
 333         byte[] a = fa.apply(SPECIES.length());
 334         byte[] b = fb.apply(SPECIES.length());
 335         byte[] r = fr.apply(SPECIES.length());
 336 
 337         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 338             for (int i = 0; i < a.length; i += SPECIES.length()) {
 339                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 340                 av.aShiftR((int)b[i]).intoArray(r, i);
 341             }
 342         }
 343 
 344         bh.consume(r);
 345     }
 346 
 347 
 348 
 349     @Benchmark
 350     public void aShiftRMaskedShift(Blackhole bh) {
 351         byte[] a = fa.apply(SPECIES.length());
 352         byte[] b = fb.apply(SPECIES.length());
 353         byte[] r = fr.apply(SPECIES.length());
 354         boolean[] mask = fm.apply(SPECIES.length());
 355         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 356 
 357         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 358             for (int i = 0; i < a.length; i += SPECIES.length()) {
 359                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 360                 av.aShiftR((int)b[i], vmask).intoArray(r, i);
 361             }
 362         }
 363 
 364         bh.consume(r);
 365     }
 366 
 367 
 368 
 369     @Benchmark
 370     public void shiftLShift(Blackhole bh) {
 371         byte[] a = fa.apply(SPECIES.length());
 372         byte[] b = fb.apply(SPECIES.length());
 373         byte[] r = fr.apply(SPECIES.length());
 374 
 375         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 376             for (int i = 0; i < a.length; i += SPECIES.length()) {
 377                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 378                 av.shiftL((int)b[i]).intoArray(r, i);
 379             }
 380         }
 381 
 382         bh.consume(r);
 383     }
 384 
 385 
 386 
 387     @Benchmark
 388     public void shiftLMaskedShift(Blackhole bh) {
 389         byte[] a = fa.apply(SPECIES.length());
 390         byte[] b = fb.apply(SPECIES.length());
 391         byte[] r = fr.apply(SPECIES.length());
 392         boolean[] mask = fm.apply(SPECIES.length());
 393         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 394 
 395         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 396             for (int i = 0; i < a.length; i += SPECIES.length()) {
 397                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 398                 av.shiftL((int)b[i], vmask).intoArray(r, i);
 399             }
 400         }
 401 
 402         bh.consume(r);
 403     }
 404 
 405 
 406 
 407     @Benchmark
 408     public void shiftRShift(Blackhole bh) {
 409         byte[] a = fa.apply(SPECIES.length());
 410         byte[] b = fb.apply(SPECIES.length());
 411         byte[] r = fr.apply(SPECIES.length());
 412 
 413         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 414             for (int i = 0; i < a.length; i += SPECIES.length()) {
 415                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 416                 av.shiftR((int)b[i]).intoArray(r, i);
 417             }
 418         }
 419 
 420         bh.consume(r);
 421     }
 422 
 423 
 424 
 425     @Benchmark
 426     public void shiftRMaskedShift(Blackhole bh) {
 427         byte[] a = fa.apply(SPECIES.length());
 428         byte[] b = fb.apply(SPECIES.length());
 429         byte[] r = fr.apply(SPECIES.length());
 430         boolean[] mask = fm.apply(SPECIES.length());
 431         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 432 
 433         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 434             for (int i = 0; i < a.length; i += SPECIES.length()) {
 435                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 436                 av.shiftR((int)b[i], vmask).intoArray(r, i);
 437             }
 438         }
 439 
 440         bh.consume(r);
 441     }
 442 
 443 
 444 
 445 
 446 
 447 
 448 
 449 
 450     @Benchmark
 451     public void max(Blackhole bh) {
 452         byte[] a = fa.apply(SPECIES.length());
 453         byte[] b = fb.apply(SPECIES.length());
 454         byte[] r = fr.apply(SPECIES.length());
 455 
 456         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 457             for (int i = 0; i < a.length; i += SPECIES.length()) {
 458                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 459                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 460                 av.max(bv).intoArray(r, i);
 461             }
 462         }
 463 
 464         bh.consume(r);
 465     }
 466 
 467     @Benchmark
 468     public void min(Blackhole bh) {
 469         byte[] a = fa.apply(SPECIES.length());
 470         byte[] b = fb.apply(SPECIES.length());
 471         byte[] r = fr.apply(SPECIES.length());
 472 
 473         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 474             for (int i = 0; i < a.length; i += SPECIES.length()) {
 475                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 476                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 477                 av.min(bv).intoArray(r, i);
 478             }
 479         }
 480 
 481         bh.consume(r);
 482     }
 483 
 484 
 485     @Benchmark
 486     public void andAll(Blackhole bh) {
 487         byte[] a = fa.apply(SPECIES.length());
 488         byte ra = -1;
 489 
 490         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 491             ra = -1;
 492             for (int i = 0; i < a.length; i += SPECIES.length()) {
 493                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 494                 ra &= av.andAll();
 495             }
 496         }
 497         bh.consume(ra);
 498     }
 499 
 500 
 501 
 502     @Benchmark
 503     public void orAll(Blackhole bh) {
 504         byte[] a = fa.apply(SPECIES.length());
 505         byte ra = 0;
 506 
 507         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 508             ra = 0;
 509             for (int i = 0; i < a.length; i += SPECIES.length()) {
 510                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 511                 ra |= av.orAll();
 512             }
 513         }
 514         bh.consume(ra);
 515     }
 516 
 517 
 518 
 519     @Benchmark
 520     public void xorAll(Blackhole bh) {
 521         byte[] a = fa.apply(SPECIES.length());
 522         byte ra = 0;
 523 
 524         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 525             ra = 0;
 526             for (int i = 0; i < a.length; i += SPECIES.length()) {
 527                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 528                 ra ^= av.xorAll();
 529             }
 530         }
 531         bh.consume(ra);
 532     }
 533 
 534 
 535     @Benchmark
 536     public void addAll(Blackhole bh) {
 537         byte[] a = fa.apply(SPECIES.length());
 538         byte ra = 0;
 539 
 540         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 541             ra = 0;
 542             for (int i = 0; i < a.length; i += SPECIES.length()) {
 543                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 544                 ra += av.addAll();
 545             }
 546         }
 547         bh.consume(ra);
 548     }
 549 
 550     @Benchmark
 551     public void mulAll(Blackhole bh) {
 552         byte[] a = fa.apply(SPECIES.length());
 553         byte ra = 1;
 554 
 555         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 556             ra = 1;
 557             for (int i = 0; i < a.length; i += SPECIES.length()) {
 558                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 559                 ra *= av.mulAll();
 560             }
 561         }
 562         bh.consume(ra);
 563     }
 564 
 565     @Benchmark
 566     public void minAll(Blackhole bh) {
 567         byte[] a = fa.apply(SPECIES.length());
 568         byte ra = Byte.MAX_VALUE;
 569 
 570         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 571             ra = Byte.MAX_VALUE;
 572             for (int i = 0; i < a.length; i += SPECIES.length()) {
 573                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 574                 ra = (byte)Math.min(ra, av.minAll());
 575             }
 576         }
 577         bh.consume(ra);
 578     }
 579 
 580     @Benchmark
 581     public void maxAll(Blackhole bh) {
 582         byte[] a = fa.apply(SPECIES.length());
 583         byte ra = Byte.MIN_VALUE;
 584 
 585         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 586             ra = Byte.MIN_VALUE;
 587             for (int i = 0; i < a.length; i += SPECIES.length()) {
 588                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 589                 ra = (byte)Math.max(ra, av.maxAll());
 590             }
 591         }
 592         bh.consume(ra);
 593     }
 594 
 595 
 596     @Benchmark
 597     public void anyTrue(Blackhole bh) {
 598         boolean[] mask = fm.apply(SPECIES.length());
 599         boolean[] r = fmr.apply(SPECIES.length());
 600 
 601         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 602             for (int i = 0; i < mask.length; i += SPECIES.length()) {
 603                 Vector.Mask<Byte> vmask = ByteVector.maskFromArray(SPECIES, mask, i);
 604                 r[i] = vmask.anyTrue();
 605             }
 606         }
 607 
 608         bh.consume(r);
 609     }
 610 
 611 
 612 
 613     @Benchmark
 614     public void allTrue(Blackhole bh) {
 615         boolean[] mask = fm.apply(SPECIES.length());
 616         boolean[] r = fmr.apply(SPECIES.length());
 617 
 618         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 619             for (int i = 0; i < mask.length; i += SPECIES.length()) {
 620                 Vector.Mask<Byte> vmask = ByteVector.maskFromArray(SPECIES, mask, i);
 621                 r[i] = vmask.allTrue();
 622             }
 623         }
 624 
 625         bh.consume(r);
 626     }
 627 
 628 
 629     @Benchmark
 630     public void with(Blackhole bh) {
 631         byte[] a = fa.apply(SPECIES.length());
 632         byte[] r = fr.apply(SPECIES.length());
 633 
 634         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 635             for (int i = 0; i < a.length; i += SPECIES.length()) {
 636                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 637                 av.with(0, (byte)4).intoArray(r, i);
 638             }
 639         }
 640 
 641         bh.consume(r);
 642     }
 643 
 644     @Benchmark
 645     public Object lessThan() {
 646         byte[] a = fa.apply(size);
 647         byte[] b = fb.apply(size);
 648         boolean[] ms = fm.apply(size);
 649         Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
 650 
 651         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 652             for (int i = 0; i < a.length; i += SPECIES.length()) {
 653                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 654                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 655                 Vector.Mask<Byte> mv = av.lessThan(bv);
 656 
 657                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 658             }
 659         }
 660         return m;
 661     }
 662 
 663 
 664     @Benchmark
 665     public Object greaterThan() {
 666         byte[] a = fa.apply(size);
 667         byte[] b = fb.apply(size);
 668         boolean[] ms = fm.apply(size);
 669         Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
 670 
 671         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 672             for (int i = 0; i < a.length; i += SPECIES.length()) {
 673                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 674                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 675                 Vector.Mask<Byte> mv = av.greaterThan(bv);
 676 
 677                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 678             }
 679         }
 680         return m;
 681     }
 682 
 683 
 684     @Benchmark
 685     public Object equal() {
 686         byte[] a = fa.apply(size);
 687         byte[] b = fb.apply(size);
 688         boolean[] ms = fm.apply(size);
 689         Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
 690 
 691         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 692             for (int i = 0; i < a.length; i += SPECIES.length()) {
 693                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 694                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 695                 Vector.Mask<Byte> mv = av.equal(bv);
 696 
 697                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 698             }
 699         }
 700         return m;
 701     }
 702 
 703 
 704     @Benchmark
 705     public Object notEqual() {
 706         byte[] a = fa.apply(size);
 707         byte[] b = fb.apply(size);
 708         boolean[] ms = fm.apply(size);
 709         Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
 710 
 711         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 712             for (int i = 0; i < a.length; i += SPECIES.length()) {
 713                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 714                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 715                 Vector.Mask<Byte> mv = av.notEqual(bv);
 716 
 717                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 718             }
 719         }
 720         return m;
 721     }
 722 
 723 
 724     @Benchmark
 725     public Object lessThanEq() {
 726         byte[] a = fa.apply(size);
 727         byte[] b = fb.apply(size);
 728         boolean[] ms = fm.apply(size);
 729         Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
 730 
 731         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 732             for (int i = 0; i < a.length; i += SPECIES.length()) {
 733                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 734                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 735                 Vector.Mask<Byte> mv = av.lessThanEq(bv);
 736 
 737                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 738             }
 739         }
 740         return m;
 741     }
 742 
 743 
 744     @Benchmark
 745     public Object greaterThanEq() {
 746         byte[] a = fa.apply(size);
 747         byte[] b = fb.apply(size);
 748         boolean[] ms = fm.apply(size);
 749         Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
 750 
 751         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 752             for (int i = 0; i < a.length; i += SPECIES.length()) {
 753                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 754                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 755                 Vector.Mask<Byte> mv = av.greaterThanEq(bv);
 756 
 757                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 758             }
 759         }
 760         return m;
 761     }
 762 
 763 
 764     @Benchmark
 765     public void blend(Blackhole bh) {
 766         byte[] a = fa.apply(SPECIES.length());
 767         byte[] b = fb.apply(SPECIES.length());
 768         byte[] r = fr.apply(SPECIES.length());
 769         boolean[] mask = fm.apply(SPECIES.length());
 770         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 771 
 772         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 773             for (int i = 0; i < a.length; i += SPECIES.length()) {
 774                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 775                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 776                 av.blend(bv, vmask).intoArray(r, i);
 777             }
 778         }
 779 
 780         bh.consume(r);
 781     }
 782 
 783     @Benchmark
 784     public void rearrange(Blackhole bh) {
 785         byte[] a = fa.apply(SPECIES.length());
 786         int[] order = fs.apply(a.length, SPECIES.length());
 787         byte[] r = fr.apply(SPECIES.length());
 788 
 789         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 790             for (int i = 0; i < a.length; i += SPECIES.length()) {
 791                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 792                 av.rearrange(ByteVector.shuffleFromArray(SPECIES, order, i)).intoArray(r, i);
 793             }
 794         }
 795 
 796         bh.consume(r);
 797     }
 798 
 799     @Benchmark
 800     public void extract(Blackhole bh) {
 801         byte[] a = fa.apply(SPECIES.length());
 802         byte[] r = fr.apply(SPECIES.length());
 803 
 804         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 805             for (int i = 0; i < a.length; i += SPECIES.length()) {
 806                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 807                 int num_lanes = SPECIES.length();
 808                 // Manually unroll because full unroll happens after intrinsification.
 809                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 810                 if (num_lanes == 1) {
 811                     r[i]=av.get(0);
 812                 } else if (num_lanes == 2) {
 813                     r[i]=av.get(0);
 814                     r[i+1]=av.get(1);
 815                 } else if (num_lanes == 4) {
 816                     r[i]=av.get(0);
 817                     r[i+1]=av.get(1);
 818                     r[i+2]=av.get(2);
 819                     r[i+3]=av.get(3);
 820                 } else if (num_lanes == 8) {
 821                     r[i]=av.get(0);
 822                     r[i+1]=av.get(1);
 823                     r[i+2]=av.get(2);
 824                     r[i+3]=av.get(3);
 825                     r[i+4]=av.get(4);
 826                     r[i+5]=av.get(5);
 827                     r[i+6]=av.get(6);
 828                     r[i+7]=av.get(7);
 829                 } else if (num_lanes == 16) {
 830                     r[i]=av.get(0);
 831                     r[i+1]=av.get(1);
 832                     r[i+2]=av.get(2);
 833                     r[i+3]=av.get(3);
 834                     r[i+4]=av.get(4);
 835                     r[i+5]=av.get(5);
 836                     r[i+6]=av.get(6);
 837                     r[i+7]=av.get(7);
 838                     r[i+8]=av.get(8);
 839                     r[i+9]=av.get(9);
 840                     r[i+10]=av.get(10);
 841                     r[i+11]=av.get(11);
 842                     r[i+12]=av.get(12);
 843                     r[i+13]=av.get(13);
 844                     r[i+14]=av.get(14);
 845                     r[i+15]=av.get(15);
 846                 } else if (num_lanes == 32) {
 847                     r[i]=av.get(0);
 848                     r[i+1]=av.get(1);
 849                     r[i+2]=av.get(2);
 850                     r[i+3]=av.get(3);
 851                     r[i+4]=av.get(4);
 852                     r[i+5]=av.get(5);
 853                     r[i+6]=av.get(6);
 854                     r[i+7]=av.get(7);
 855                     r[i+8]=av.get(8);
 856                     r[i+9]=av.get(9);
 857                     r[i+10]=av.get(10);
 858                     r[i+11]=av.get(11);
 859                     r[i+12]=av.get(12);
 860                     r[i+13]=av.get(13);
 861                     r[i+14]=av.get(14);
 862                     r[i+15]=av.get(15);
 863                     r[i+16]=av.get(16);
 864                     r[i+17]=av.get(17);
 865                     r[i+18]=av.get(18);
 866                     r[i+19]=av.get(19);
 867                     r[i+20]=av.get(20);
 868                     r[i+21]=av.get(21);
 869                     r[i+22]=av.get(22);
 870                     r[i+23]=av.get(23);
 871                     r[i+24]=av.get(24);
 872                     r[i+25]=av.get(25);
 873                     r[i+26]=av.get(26);
 874                     r[i+27]=av.get(27);
 875                     r[i+28]=av.get(28);
 876                     r[i+29]=av.get(29);
 877                     r[i+30]=av.get(30);
 878                     r[i+31]=av.get(31);
 879                 } else if (num_lanes == 64) {
 880                     r[i]=av.get(0);
 881                     r[i+1]=av.get(1);
 882                     r[i+2]=av.get(2);
 883                     r[i+3]=av.get(3);
 884                     r[i+4]=av.get(4);
 885                     r[i+5]=av.get(5);
 886                     r[i+6]=av.get(6);
 887                     r[i+7]=av.get(7);
 888                     r[i+8]=av.get(8);
 889                     r[i+9]=av.get(9);
 890                     r[i+10]=av.get(10);
 891                     r[i+11]=av.get(11);
 892                     r[i+12]=av.get(12);
 893                     r[i+13]=av.get(13);
 894                     r[i+14]=av.get(14);
 895                     r[i+15]=av.get(15);
 896                     r[i+16]=av.get(16);
 897                     r[i+17]=av.get(17);
 898                     r[i+18]=av.get(18);
 899                     r[i+19]=av.get(19);
 900                     r[i+20]=av.get(20);
 901                     r[i+21]=av.get(21);
 902                     r[i+22]=av.get(22);
 903                     r[i+23]=av.get(23);
 904                     r[i+24]=av.get(24);
 905                     r[i+25]=av.get(25);
 906                     r[i+26]=av.get(26);
 907                     r[i+27]=av.get(27);
 908                     r[i+28]=av.get(28);
 909                     r[i+29]=av.get(29);
 910                     r[i+30]=av.get(30);
 911                     r[i+31]=av.get(31);
 912                     r[i+32]=av.get(32);
 913                     r[i+33]=av.get(33);
 914                     r[i+34]=av.get(34);
 915                     r[i+35]=av.get(35);
 916                     r[i+36]=av.get(36);
 917                     r[i+37]=av.get(37);
 918                     r[i+38]=av.get(38);
 919                     r[i+39]=av.get(39);
 920                     r[i+40]=av.get(40);
 921                     r[i+41]=av.get(41);
 922                     r[i+42]=av.get(42);
 923                     r[i+43]=av.get(43);
 924                     r[i+44]=av.get(44);
 925                     r[i+45]=av.get(45);
 926                     r[i+46]=av.get(46);
 927                     r[i+47]=av.get(47);
 928                     r[i+48]=av.get(48);
 929                     r[i+49]=av.get(49);
 930                     r[i+50]=av.get(50);
 931                     r[i+51]=av.get(51);
 932                     r[i+52]=av.get(52);
 933                     r[i+53]=av.get(53);
 934                     r[i+54]=av.get(54);
 935                     r[i+55]=av.get(55);
 936                     r[i+56]=av.get(56);
 937                     r[i+57]=av.get(57);
 938                     r[i+58]=av.get(58);
 939                     r[i+59]=av.get(59);
 940                     r[i+60]=av.get(60);
 941                     r[i+61]=av.get(61);
 942                     r[i+62]=av.get(62);
 943                     r[i+63]=av.get(63);
 944                 } else {
 945                     for (int j = 0; j < SPECIES.length(); j++) {
 946                         r[i+j]=av.get(j);
 947                     }
 948                 }
 949             }
 950         }
 951 
 952         bh.consume(r);
 953     }
 954 
 955 
 956 
 957 
 958 
 959 
 960 
 961 
 962 
 963 
 964 
 965 
 966 
 967 
 968 
 969 
 970 
 971 
 972 
 973 
 974 
 975     @Benchmark
 976     public void neg(Blackhole bh) {
 977         byte[] a = fa.apply(SPECIES.length());
 978         byte[] r = fr.apply(SPECIES.length());
 979 
 980         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 981             for (int i = 0; i < a.length; i += SPECIES.length()) {
 982                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 983                 av.neg().intoArray(r, i);
 984             }
 985         }
 986 
 987         bh.consume(r);
 988     }
 989 
 990     @Benchmark
 991     public void negMasked(Blackhole bh) {
 992         byte[] a = fa.apply(SPECIES.length());
 993         byte[] r = fr.apply(SPECIES.length());
 994         boolean[] mask = fm.apply(SPECIES.length());
 995         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 996 
 997         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 998             for (int i = 0; i < a.length; i += SPECIES.length()) {
 999                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1000                 av.neg(vmask).intoArray(r, i);
1001             }
1002         }
1003 
1004         bh.consume(r);
1005     }
1006 
1007     @Benchmark
1008     public void abs(Blackhole bh) {
1009         byte[] a = fa.apply(SPECIES.length());
1010         byte[] r = fr.apply(SPECIES.length());
1011 
1012         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1013             for (int i = 0; i < a.length; i += SPECIES.length()) {
1014                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1015                 av.abs().intoArray(r, i);
1016             }
1017         }
1018 
1019         bh.consume(r);
1020     }
1021 
1022     @Benchmark
1023     public void absMasked(Blackhole bh) {
1024         byte[] a = fa.apply(SPECIES.length());
1025         byte[] r = fr.apply(SPECIES.length());
1026         boolean[] mask = fm.apply(SPECIES.length());
1027         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
1028 
1029         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1030             for (int i = 0; i < a.length; i += SPECIES.length()) {
1031                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1032                 av.abs(vmask).intoArray(r, i);
1033             }
1034         }
1035 
1036         bh.consume(r);
1037     }
1038 
1039 
1040     @Benchmark
1041     public void not(Blackhole bh) {
1042         byte[] a = fa.apply(SPECIES.length());
1043         byte[] r = fr.apply(SPECIES.length());
1044 
1045         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1046             for (int i = 0; i < a.length; i += SPECIES.length()) {
1047                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1048                 av.not().intoArray(r, i);
1049             }
1050         }
1051 
1052         bh.consume(r);
1053     }
1054 
1055 
1056 
1057     @Benchmark
1058     public void notMasked(Blackhole bh) {
1059         byte[] a = fa.apply(SPECIES.length());
1060         byte[] r = fr.apply(SPECIES.length());
1061         boolean[] mask = fm.apply(SPECIES.length());
1062         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
1063 
1064         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1065             for (int i = 0; i < a.length; i += SPECIES.length()) {
1066                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1067                 av.not(vmask).intoArray(r, i);
1068             }
1069         }
1070 
1071         bh.consume(r);
1072     }
1073 
1074 
1075 
1076 
1077 
1078 }
1079