1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.Vector.Shape;
  28 import jdk.incubator.vector.Vector.Species;
  29 import jdk.incubator.vector.ByteVector;
  30 
  31 import java.util.concurrent.TimeUnit;
  32 import java.util.function.BiFunction;
  33 import java.util.function.IntFunction;
  34 
  35 import org.openjdk.jmh.annotations.*;
  36 import org.openjdk.jmh.infra.Blackhole;
  37 
  38 @BenchmarkMode(Mode.Throughput)
  39 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  40 @State(Scope.Benchmark)
  41 @Warmup(iterations = 3, time = 1)
  42 @Measurement(iterations = 5, time = 1)
  43 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  44 public class Byte256Vector extends AbstractVectorBenchmark {
  45     static final Species<Byte> SPECIES = ByteVector.SPECIES_256;
  46 
  47     static final int INVOC_COUNT = 1; // get rid of outer loop
  48 
  49     @Param("1024")
  50     int size;
  51 
  52     byte[] fill(IntFunction<Byte> f) {
  53         byte[] array = new byte[size];
  54         for (int i = 0; i < array.length; i++) {
  55             array[i] = f.apply(i);
  56         }
  57         return array;
  58     }
  59 
  60     byte[] a, b, c, r;
  61     boolean[] m, rm;
  62     int[] s;
  63 
  64     @Setup
  65     public void init() {
  66         size += size % SPECIES.length(); // FIXME: add post-loops
  67 
  68         a = fill(i -> (byte)(2*i));
  69         b = fill(i -> (byte)(i+1));
  70         c = fill(i -> (byte)(i+5));
  71         r = fill(i -> (byte)0);
  72 
  73         m = fillMask(size, i -> (i % 2) == 0);
  74         rm = fillMask(size, i -> false);
  75 
  76         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  77     }
  78 
  79     final IntFunction<byte[]> fa = vl -> a;
  80     final IntFunction<byte[]> fb = vl -> b;
  81     final IntFunction<byte[]> fc = vl -> c;
  82     final IntFunction<byte[]> fr = vl -> r;
  83     final IntFunction<boolean[]> fm = vl -> m;
  84     final IntFunction<boolean[]> fmr = vl -> rm;
  85     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  86 
  87 
  88     @Benchmark
  89     public void add(Blackhole bh) {
  90         byte[] a = fa.apply(SPECIES.length());
  91         byte[] b = fb.apply(SPECIES.length());
  92         byte[] r = fr.apply(SPECIES.length());
  93 
  94         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  95             for (int i = 0; i < a.length; i += SPECIES.length()) {
  96                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
  97                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
  98                 av.add(bv).intoArray(r, i);
  99             }
 100         }
 101 
 102         bh.consume(r);
 103     }
 104 
 105     @Benchmark
 106     public void addMasked(Blackhole bh) {
 107         byte[] a = fa.apply(SPECIES.length());
 108         byte[] b = fb.apply(SPECIES.length());
 109         byte[] r = fr.apply(SPECIES.length());
 110         boolean[] mask = fm.apply(SPECIES.length());
 111         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 112 
 113         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 114             for (int i = 0; i < a.length; i += SPECIES.length()) {
 115                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 116                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 117                 av.add(bv, vmask).intoArray(r, i);
 118             }
 119         }
 120 
 121         bh.consume(r);
 122     }
 123 
 124     @Benchmark
 125     public void sub(Blackhole bh) {
 126         byte[] a = fa.apply(SPECIES.length());
 127         byte[] b = fb.apply(SPECIES.length());
 128         byte[] r = fr.apply(SPECIES.length());
 129 
 130         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 131             for (int i = 0; i < a.length; i += SPECIES.length()) {
 132                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 133                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 134                 av.sub(bv).intoArray(r, i);
 135             }
 136         }
 137 
 138         bh.consume(r);
 139     }
 140 
 141     @Benchmark
 142     public void subMasked(Blackhole bh) {
 143         byte[] a = fa.apply(SPECIES.length());
 144         byte[] b = fb.apply(SPECIES.length());
 145         byte[] r = fr.apply(SPECIES.length());
 146         boolean[] mask = fm.apply(SPECIES.length());
 147         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 148 
 149         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 150             for (int i = 0; i < a.length; i += SPECIES.length()) {
 151                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 152                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 153                 av.sub(bv, vmask).intoArray(r, i);
 154             }
 155         }
 156 
 157         bh.consume(r);
 158     }
 159 
 160 
 161 
 162     @Benchmark
 163     public void mul(Blackhole bh) {
 164         byte[] a = fa.apply(SPECIES.length());
 165         byte[] b = fb.apply(SPECIES.length());
 166         byte[] r = fr.apply(SPECIES.length());
 167 
 168         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 169             for (int i = 0; i < a.length; i += SPECIES.length()) {
 170                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 171                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 172                 av.mul(bv).intoArray(r, i);
 173             }
 174         }
 175 
 176         bh.consume(r);
 177     }
 178 
 179     @Benchmark
 180     public void mulMasked(Blackhole bh) {
 181         byte[] a = fa.apply(SPECIES.length());
 182         byte[] b = fb.apply(SPECIES.length());
 183         byte[] r = fr.apply(SPECIES.length());
 184         boolean[] mask = fm.apply(SPECIES.length());
 185         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 186 
 187         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 188             for (int i = 0; i < a.length; i += SPECIES.length()) {
 189                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 190                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 191                 av.mul(bv, vmask).intoArray(r, i);
 192             }
 193         }
 194 
 195         bh.consume(r);
 196     }
 197 
 198 
 199     @Benchmark
 200     public void and(Blackhole bh) {
 201         byte[] a = fa.apply(SPECIES.length());
 202         byte[] b = fb.apply(SPECIES.length());
 203         byte[] r = fr.apply(SPECIES.length());
 204 
 205         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 206             for (int i = 0; i < a.length; i += SPECIES.length()) {
 207                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 208                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 209                 av.and(bv).intoArray(r, i);
 210             }
 211         }
 212 
 213         bh.consume(r);
 214     }
 215 
 216 
 217 
 218     @Benchmark
 219     public void andMasked(Blackhole bh) {
 220         byte[] a = fa.apply(SPECIES.length());
 221         byte[] b = fb.apply(SPECIES.length());
 222         byte[] r = fr.apply(SPECIES.length());
 223         boolean[] mask = fm.apply(SPECIES.length());
 224         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 225 
 226         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 227             for (int i = 0; i < a.length; i += SPECIES.length()) {
 228                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 229                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 230                 av.and(bv, vmask).intoArray(r, i);
 231             }
 232         }
 233 
 234         bh.consume(r);
 235     }
 236 
 237 
 238 
 239     @Benchmark
 240     public void or(Blackhole bh) {
 241         byte[] a = fa.apply(SPECIES.length());
 242         byte[] b = fb.apply(SPECIES.length());
 243         byte[] r = fr.apply(SPECIES.length());
 244 
 245         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 246             for (int i = 0; i < a.length; i += SPECIES.length()) {
 247                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 248                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 249                 av.or(bv).intoArray(r, i);
 250             }
 251         }
 252 
 253         bh.consume(r);
 254     }
 255 
 256 
 257 
 258     @Benchmark
 259     public void orMasked(Blackhole bh) {
 260         byte[] a = fa.apply(SPECIES.length());
 261         byte[] b = fb.apply(SPECIES.length());
 262         byte[] r = fr.apply(SPECIES.length());
 263         boolean[] mask = fm.apply(SPECIES.length());
 264         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 265 
 266         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 267             for (int i = 0; i < a.length; i += SPECIES.length()) {
 268                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 269                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 270                 av.or(bv, vmask).intoArray(r, i);
 271             }
 272         }
 273 
 274         bh.consume(r);
 275     }
 276 
 277 
 278 
 279     @Benchmark
 280     public void xor(Blackhole bh) {
 281         byte[] a = fa.apply(SPECIES.length());
 282         byte[] b = fb.apply(SPECIES.length());
 283         byte[] r = fr.apply(SPECIES.length());
 284 
 285         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 286             for (int i = 0; i < a.length; i += SPECIES.length()) {
 287                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 288                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 289                 av.xor(bv).intoArray(r, i);
 290             }
 291         }
 292 
 293         bh.consume(r);
 294     }
 295 
 296 
 297 
 298     @Benchmark
 299     public void xorMasked(Blackhole bh) {
 300         byte[] a = fa.apply(SPECIES.length());
 301         byte[] b = fb.apply(SPECIES.length());
 302         byte[] r = fr.apply(SPECIES.length());
 303         boolean[] mask = fm.apply(SPECIES.length());
 304         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 305 
 306         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 307             for (int i = 0; i < a.length; i += SPECIES.length()) {
 308                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 309                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 310                 av.xor(bv, vmask).intoArray(r, i);
 311             }
 312         }
 313 
 314         bh.consume(r);
 315     }
 316 
 317 
 318 
 319 
 320 
 321 
 322 
 323 
 324 
 325 
 326 
 327 
 328 
 329 
 330 
 331     @Benchmark
 332     public void aShiftRShift(Blackhole bh) {
 333         byte[] a = fa.apply(SPECIES.length());
 334         byte[] b = fb.apply(SPECIES.length());
 335         byte[] r = fr.apply(SPECIES.length());
 336 
 337         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 338             for (int i = 0; i < a.length; i += SPECIES.length()) {
 339                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 340                 av.aShiftR((int)b[i]).intoArray(r, i);
 341             }
 342         }
 343 
 344         bh.consume(r);
 345     }
 346 
 347 
 348 
 349     @Benchmark
 350     public void aShiftRMaskedShift(Blackhole bh) {
 351         byte[] a = fa.apply(SPECIES.length());
 352         byte[] b = fb.apply(SPECIES.length());
 353         byte[] r = fr.apply(SPECIES.length());
 354         boolean[] mask = fm.apply(SPECIES.length());
 355         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 356 
 357         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 358             for (int i = 0; i < a.length; i += SPECIES.length()) {
 359                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 360                 av.aShiftR((int)b[i], vmask).intoArray(r, i);
 361             }
 362         }
 363 
 364         bh.consume(r);
 365     }
 366 
 367 
 368 
 369     @Benchmark
 370     public void shiftLShift(Blackhole bh) {
 371         byte[] a = fa.apply(SPECIES.length());
 372         byte[] b = fb.apply(SPECIES.length());
 373         byte[] r = fr.apply(SPECIES.length());
 374 
 375         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 376             for (int i = 0; i < a.length; i += SPECIES.length()) {
 377                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 378                 av.shiftL((int)b[i]).intoArray(r, i);
 379             }
 380         }
 381 
 382         bh.consume(r);
 383     }
 384 
 385 
 386 
 387     @Benchmark
 388     public void shiftLMaskedShift(Blackhole bh) {
 389         byte[] a = fa.apply(SPECIES.length());
 390         byte[] b = fb.apply(SPECIES.length());
 391         byte[] r = fr.apply(SPECIES.length());
 392         boolean[] mask = fm.apply(SPECIES.length());
 393         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 394 
 395         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 396             for (int i = 0; i < a.length; i += SPECIES.length()) {
 397                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 398                 av.shiftL((int)b[i], vmask).intoArray(r, i);
 399             }
 400         }
 401 
 402         bh.consume(r);
 403     }
 404 
 405 
 406 
 407     @Benchmark
 408     public void shiftRShift(Blackhole bh) {
 409         byte[] a = fa.apply(SPECIES.length());
 410         byte[] b = fb.apply(SPECIES.length());
 411         byte[] r = fr.apply(SPECIES.length());
 412 
 413         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 414             for (int i = 0; i < a.length; i += SPECIES.length()) {
 415                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 416                 av.shiftR((int)b[i]).intoArray(r, i);
 417             }
 418         }
 419 
 420         bh.consume(r);
 421     }
 422 
 423 
 424 
 425     @Benchmark
 426     public void shiftRMaskedShift(Blackhole bh) {
 427         byte[] a = fa.apply(SPECIES.length());
 428         byte[] b = fb.apply(SPECIES.length());
 429         byte[] r = fr.apply(SPECIES.length());
 430         boolean[] mask = fm.apply(SPECIES.length());
 431         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 432 
 433         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 434             for (int i = 0; i < a.length; i += SPECIES.length()) {
 435                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 436                 av.shiftR((int)b[i], vmask).intoArray(r, i);
 437             }
 438         }
 439 
 440         bh.consume(r);
 441     }
 442 
 443 
 444 
 445 
 446 
 447 
 448 
 449 
 450     @Benchmark
 451     public void max(Blackhole bh) {
 452         byte[] a = fa.apply(SPECIES.length());
 453         byte[] b = fb.apply(SPECIES.length());
 454         byte[] r = fr.apply(SPECIES.length());
 455 
 456         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 457             for (int i = 0; i < a.length; i += SPECIES.length()) {
 458                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 459                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 460                 av.max(bv).intoArray(r, i);
 461             }
 462         }
 463 
 464         bh.consume(r);
 465     }
 466 
 467     @Benchmark
 468     public void min(Blackhole bh) {
 469         byte[] a = fa.apply(SPECIES.length());
 470         byte[] b = fb.apply(SPECIES.length());
 471         byte[] r = fr.apply(SPECIES.length());
 472 
 473         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 474             for (int i = 0; i < a.length; i += SPECIES.length()) {
 475                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 476                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 477                 av.min(bv).intoArray(r, i);
 478             }
 479         }
 480 
 481         bh.consume(r);
 482     }
 483 
 484 
 485     @Benchmark
 486     public void andAll(Blackhole bh) {
 487         byte[] a = fa.apply(SPECIES.length());
 488         byte[] r = fr.apply(SPECIES.length());
 489         byte ra = -1;
 490 
 491         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 492             for (int i = 0; i < a.length; i += SPECIES.length()) {
 493                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 494                 r[i] = av.andAll();
 495             }
 496         }
 497 
 498         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 499             ra = -1;
 500             for (int i = 0; i < a.length; i += SPECIES.length()) {
 501                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 502                 ra &= av.andAll();
 503             }
 504         }
 505 
 506         bh.consume(ra);
 507         bh.consume(r);
 508     }
 509 
 510 
 511 
 512     @Benchmark
 513     public void orAll(Blackhole bh) {
 514         byte[] a = fa.apply(SPECIES.length());
 515         byte[] r = fr.apply(SPECIES.length());
 516         byte ra = 0;
 517 
 518         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 519             for (int i = 0; i < a.length; i += SPECIES.length()) {
 520                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 521                 r[i] = av.orAll();
 522             }
 523         }
 524 
 525         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 526             ra = 0;
 527             for (int i = 0; i < a.length; i += SPECIES.length()) {
 528                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 529                 ra |= av.orAll();
 530             }
 531         }
 532 
 533         bh.consume(ra);
 534         bh.consume(r);
 535     }
 536 
 537 
 538 
 539     @Benchmark
 540     public void xorAll(Blackhole bh) {
 541         byte[] a = fa.apply(SPECIES.length());
 542         byte[] r = fr.apply(SPECIES.length());
 543         byte ra = 0;
 544 
 545         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 546             for (int i = 0; i < a.length; i += SPECIES.length()) {
 547                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 548                 r[i] = av.xorAll();
 549             }
 550         }
 551 
 552         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 553             ra = 0;
 554             for (int i = 0; i < a.length; i += SPECIES.length()) {
 555                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 556                 ra ^= av.xorAll();
 557             }
 558         }
 559 
 560         bh.consume(ra);
 561         bh.consume(r);
 562     }
 563 
 564 
 565     @Benchmark
 566     public void addAll(Blackhole bh) {
 567         byte[] a = fa.apply(SPECIES.length());
 568         byte[] r = fr.apply(SPECIES.length());
 569         byte ra = 0;
 570 
 571         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 572             for (int i = 0; i < a.length; i += SPECIES.length()) {
 573                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 574                 r[i] = av.addAll();
 575             }
 576         }
 577 
 578         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 579             ra = 0;
 580             for (int i = 0; i < a.length; i += SPECIES.length()) {
 581                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 582                 ra += av.addAll();
 583             }
 584         }
 585 
 586         bh.consume(ra);
 587         bh.consume(r);
 588     }
 589 
 590     @Benchmark
 591     public void mulAll(Blackhole bh) {
 592         byte[] a = fa.apply(SPECIES.length());
 593         byte[] r = fr.apply(SPECIES.length());
 594         byte ra = 1;
 595 
 596         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 597             for (int i = 0; i < a.length; i += SPECIES.length()) {
 598                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 599                 r[i] = av.mulAll();
 600             }
 601         }
 602 
 603         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 604             ra = 1;
 605             for (int i = 0; i < a.length; i += SPECIES.length()) {
 606                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 607                 ra *= av.mulAll();
 608             }
 609         }
 610 
 611         bh.consume(ra);
 612         bh.consume(r);
 613     }
 614 
 615     @Benchmark
 616     public void minAll(Blackhole bh) {
 617         byte[] a = fa.apply(SPECIES.length());
 618         byte[] r = fr.apply(SPECIES.length());
 619         byte ra = Byte.MAX_VALUE;
 620 
 621         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 622             for (int i = 0; i < a.length; i += SPECIES.length()) {
 623                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 624                 r[i] = av.minAll();
 625             }
 626         }
 627 
 628         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 629             ra = Byte.MAX_VALUE;
 630             for (int i = 0; i < a.length; i += SPECIES.length()) {
 631                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 632                 ra = (byte)Math.min(ra, av.minAll());
 633             }
 634         }
 635 
 636         bh.consume(ra);
 637         bh.consume(r);
 638     }
 639 
 640     @Benchmark
 641     public void maxAll(Blackhole bh) {
 642         byte[] a = fa.apply(SPECIES.length());
 643         byte[] r = fr.apply(SPECIES.length());
 644         byte ra = Byte.MIN_VALUE;
 645 
 646         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 647             for (int i = 0; i < a.length; i += SPECIES.length()) {
 648                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 649                 r[i] = av.maxAll();
 650             }
 651         }
 652 
 653         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 654             ra = Byte.MIN_VALUE;
 655             for (int i = 0; i < a.length; i += SPECIES.length()) {
 656                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 657                 ra = (byte)Math.max(ra, av.maxAll());
 658             }
 659         }
 660 
 661         bh.consume(ra);
 662         bh.consume(r);
 663     }
 664 
 665 
 666     @Benchmark
 667     public void anyTrue(Blackhole bh) {
 668         boolean[] mask = fm.apply(SPECIES.length());
 669         boolean[] r = fmr.apply(SPECIES.length());
 670 
 671         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 672             for (int i = 0; i < mask.length; i += SPECIES.length()) {
 673                 Vector.Mask<Byte> vmask = ByteVector.maskFromArray(SPECIES, mask, i);
 674                 r[i] = vmask.anyTrue();
 675             }
 676         }
 677 
 678         bh.consume(r);
 679     }
 680 
 681 
 682 
 683     @Benchmark
 684     public void allTrue(Blackhole bh) {
 685         boolean[] mask = fm.apply(SPECIES.length());
 686         boolean[] r = fmr.apply(SPECIES.length());
 687 
 688         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 689             for (int i = 0; i < mask.length; i += SPECIES.length()) {
 690                 Vector.Mask<Byte> vmask = ByteVector.maskFromArray(SPECIES, mask, i);
 691                 r[i] = vmask.allTrue();
 692             }
 693         }
 694 
 695         bh.consume(r);
 696     }
 697 
 698 
 699     @Benchmark
 700     public void with(Blackhole bh) {
 701         byte[] a = fa.apply(SPECIES.length());
 702         byte[] r = fr.apply(SPECIES.length());
 703 
 704         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 705             for (int i = 0; i < a.length; i += SPECIES.length()) {
 706                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 707                 av.with(0, (byte)4).intoArray(r, i);
 708             }
 709         }
 710 
 711         bh.consume(r);
 712     }
 713 
 714     @Benchmark
 715     public Object lessThan() {
 716         byte[] a = fa.apply(size);
 717         byte[] b = fb.apply(size);
 718         boolean[] ms = fm.apply(size);
 719         Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
 720 
 721         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 722             for (int i = 0; i < a.length; i += SPECIES.length()) {
 723                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 724                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 725                 Vector.Mask<Byte> mv = av.lessThan(bv);
 726 
 727                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 728             }
 729         }
 730         return m;
 731     }
 732 
 733 
 734     @Benchmark
 735     public Object greaterThan() {
 736         byte[] a = fa.apply(size);
 737         byte[] b = fb.apply(size);
 738         boolean[] ms = fm.apply(size);
 739         Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
 740 
 741         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 742             for (int i = 0; i < a.length; i += SPECIES.length()) {
 743                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 744                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 745                 Vector.Mask<Byte> mv = av.greaterThan(bv);
 746 
 747                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 748             }
 749         }
 750         return m;
 751     }
 752 
 753 
 754     @Benchmark
 755     public Object equal() {
 756         byte[] a = fa.apply(size);
 757         byte[] b = fb.apply(size);
 758         boolean[] ms = fm.apply(size);
 759         Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
 760 
 761         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 762             for (int i = 0; i < a.length; i += SPECIES.length()) {
 763                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 764                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 765                 Vector.Mask<Byte> mv = av.equal(bv);
 766 
 767                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 768             }
 769         }
 770         return m;
 771     }
 772 
 773 
 774     @Benchmark
 775     public Object notEqual() {
 776         byte[] a = fa.apply(size);
 777         byte[] b = fb.apply(size);
 778         boolean[] ms = fm.apply(size);
 779         Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
 780 
 781         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 782             for (int i = 0; i < a.length; i += SPECIES.length()) {
 783                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 784                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 785                 Vector.Mask<Byte> mv = av.notEqual(bv);
 786 
 787                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 788             }
 789         }
 790         return m;
 791     }
 792 
 793 
 794     @Benchmark
 795     public Object lessThanEq() {
 796         byte[] a = fa.apply(size);
 797         byte[] b = fb.apply(size);
 798         boolean[] ms = fm.apply(size);
 799         Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
 800 
 801         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 802             for (int i = 0; i < a.length; i += SPECIES.length()) {
 803                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 804                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 805                 Vector.Mask<Byte> mv = av.lessThanEq(bv);
 806 
 807                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 808             }
 809         }
 810         return m;
 811     }
 812 
 813 
 814     @Benchmark
 815     public Object greaterThanEq() {
 816         byte[] a = fa.apply(size);
 817         byte[] b = fb.apply(size);
 818         boolean[] ms = fm.apply(size);
 819         Vector.Mask<Byte> m = ByteVector.maskFromArray(SPECIES, ms, 0);
 820 
 821         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 822             for (int i = 0; i < a.length; i += SPECIES.length()) {
 823                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 824                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 825                 Vector.Mask<Byte> mv = av.greaterThanEq(bv);
 826 
 827                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 828             }
 829         }
 830         return m;
 831     }
 832 
 833 
 834     @Benchmark
 835     public void blend(Blackhole bh) {
 836         byte[] a = fa.apply(SPECIES.length());
 837         byte[] b = fb.apply(SPECIES.length());
 838         byte[] r = fr.apply(SPECIES.length());
 839         boolean[] mask = fm.apply(SPECIES.length());
 840         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
 841 
 842         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 843             for (int i = 0; i < a.length; i += SPECIES.length()) {
 844                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 845                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 846                 av.blend(bv, vmask).intoArray(r, i);
 847             }
 848         }
 849 
 850         bh.consume(r);
 851     }
 852 
 853     @Benchmark
 854     public void rearrange(Blackhole bh) {
 855         byte[] a = fa.apply(SPECIES.length());
 856         int[] order = fs.apply(a.length, SPECIES.length());
 857         byte[] r = fr.apply(SPECIES.length());
 858 
 859         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 860             for (int i = 0; i < a.length; i += SPECIES.length()) {
 861                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 862                 av.rearrange(ByteVector.shuffleFromArray(SPECIES, order, i)).intoArray(r, i);
 863             }
 864         }
 865 
 866         bh.consume(r);
 867     }
 868 
 869     @Benchmark
 870     public void extract(Blackhole bh) {
 871         byte[] a = fa.apply(SPECIES.length());
 872         byte[] r = fr.apply(SPECIES.length());
 873 
 874         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 875             for (int i = 0; i < a.length; i += SPECIES.length()) {
 876                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 877                 int num_lanes = SPECIES.length();
 878                 // Manually unroll because full unroll happens after intrinsification.
 879                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 880                 if (num_lanes == 1) {
 881                     r[i]=av.get(0);
 882                 } else if (num_lanes == 2) {
 883                     r[i]=av.get(0);
 884                     r[i+1]=av.get(1);
 885                 } else if (num_lanes == 4) {
 886                     r[i]=av.get(0);
 887                     r[i+1]=av.get(1);
 888                     r[i+2]=av.get(2);
 889                     r[i+3]=av.get(3);
 890                 } else if (num_lanes == 8) {
 891                     r[i]=av.get(0);
 892                     r[i+1]=av.get(1);
 893                     r[i+2]=av.get(2);
 894                     r[i+3]=av.get(3);
 895                     r[i+4]=av.get(4);
 896                     r[i+5]=av.get(5);
 897                     r[i+6]=av.get(6);
 898                     r[i+7]=av.get(7);
 899                 } else if (num_lanes == 16) {
 900                     r[i]=av.get(0);
 901                     r[i+1]=av.get(1);
 902                     r[i+2]=av.get(2);
 903                     r[i+3]=av.get(3);
 904                     r[i+4]=av.get(4);
 905                     r[i+5]=av.get(5);
 906                     r[i+6]=av.get(6);
 907                     r[i+7]=av.get(7);
 908                     r[i+8]=av.get(8);
 909                     r[i+9]=av.get(9);
 910                     r[i+10]=av.get(10);
 911                     r[i+11]=av.get(11);
 912                     r[i+12]=av.get(12);
 913                     r[i+13]=av.get(13);
 914                     r[i+14]=av.get(14);
 915                     r[i+15]=av.get(15);
 916                 } else if (num_lanes == 32) {
 917                     r[i]=av.get(0);
 918                     r[i+1]=av.get(1);
 919                     r[i+2]=av.get(2);
 920                     r[i+3]=av.get(3);
 921                     r[i+4]=av.get(4);
 922                     r[i+5]=av.get(5);
 923                     r[i+6]=av.get(6);
 924                     r[i+7]=av.get(7);
 925                     r[i+8]=av.get(8);
 926                     r[i+9]=av.get(9);
 927                     r[i+10]=av.get(10);
 928                     r[i+11]=av.get(11);
 929                     r[i+12]=av.get(12);
 930                     r[i+13]=av.get(13);
 931                     r[i+14]=av.get(14);
 932                     r[i+15]=av.get(15);
 933                     r[i+16]=av.get(16);
 934                     r[i+17]=av.get(17);
 935                     r[i+18]=av.get(18);
 936                     r[i+19]=av.get(19);
 937                     r[i+20]=av.get(20);
 938                     r[i+21]=av.get(21);
 939                     r[i+22]=av.get(22);
 940                     r[i+23]=av.get(23);
 941                     r[i+24]=av.get(24);
 942                     r[i+25]=av.get(25);
 943                     r[i+26]=av.get(26);
 944                     r[i+27]=av.get(27);
 945                     r[i+28]=av.get(28);
 946                     r[i+29]=av.get(29);
 947                     r[i+30]=av.get(30);
 948                     r[i+31]=av.get(31);
 949                 } else if (num_lanes == 64) {
 950                     r[i]=av.get(0);
 951                     r[i+1]=av.get(1);
 952                     r[i+2]=av.get(2);
 953                     r[i+3]=av.get(3);
 954                     r[i+4]=av.get(4);
 955                     r[i+5]=av.get(5);
 956                     r[i+6]=av.get(6);
 957                     r[i+7]=av.get(7);
 958                     r[i+8]=av.get(8);
 959                     r[i+9]=av.get(9);
 960                     r[i+10]=av.get(10);
 961                     r[i+11]=av.get(11);
 962                     r[i+12]=av.get(12);
 963                     r[i+13]=av.get(13);
 964                     r[i+14]=av.get(14);
 965                     r[i+15]=av.get(15);
 966                     r[i+16]=av.get(16);
 967                     r[i+17]=av.get(17);
 968                     r[i+18]=av.get(18);
 969                     r[i+19]=av.get(19);
 970                     r[i+20]=av.get(20);
 971                     r[i+21]=av.get(21);
 972                     r[i+22]=av.get(22);
 973                     r[i+23]=av.get(23);
 974                     r[i+24]=av.get(24);
 975                     r[i+25]=av.get(25);
 976                     r[i+26]=av.get(26);
 977                     r[i+27]=av.get(27);
 978                     r[i+28]=av.get(28);
 979                     r[i+29]=av.get(29);
 980                     r[i+30]=av.get(30);
 981                     r[i+31]=av.get(31);
 982                     r[i+32]=av.get(32);
 983                     r[i+33]=av.get(33);
 984                     r[i+34]=av.get(34);
 985                     r[i+35]=av.get(35);
 986                     r[i+36]=av.get(36);
 987                     r[i+37]=av.get(37);
 988                     r[i+38]=av.get(38);
 989                     r[i+39]=av.get(39);
 990                     r[i+40]=av.get(40);
 991                     r[i+41]=av.get(41);
 992                     r[i+42]=av.get(42);
 993                     r[i+43]=av.get(43);
 994                     r[i+44]=av.get(44);
 995                     r[i+45]=av.get(45);
 996                     r[i+46]=av.get(46);
 997                     r[i+47]=av.get(47);
 998                     r[i+48]=av.get(48);
 999                     r[i+49]=av.get(49);
1000                     r[i+50]=av.get(50);
1001                     r[i+51]=av.get(51);
1002                     r[i+52]=av.get(52);
1003                     r[i+53]=av.get(53);
1004                     r[i+54]=av.get(54);
1005                     r[i+55]=av.get(55);
1006                     r[i+56]=av.get(56);
1007                     r[i+57]=av.get(57);
1008                     r[i+58]=av.get(58);
1009                     r[i+59]=av.get(59);
1010                     r[i+60]=av.get(60);
1011                     r[i+61]=av.get(61);
1012                     r[i+62]=av.get(62);
1013                     r[i+63]=av.get(63);
1014                 } else {
1015                     for (int j = 0; j < SPECIES.length(); j++) {
1016                         r[i+j]=av.get(j);
1017                     }
1018                 }
1019             }
1020         }
1021 
1022         bh.consume(r);
1023     }
1024 
1025 
1026 
1027 
1028 
1029 
1030 
1031 
1032 
1033 
1034 
1035 
1036 
1037 
1038 
1039 
1040 
1041 
1042 
1043 
1044 
1045     @Benchmark
1046     public void neg(Blackhole bh) {
1047         byte[] a = fa.apply(SPECIES.length());
1048         byte[] r = fr.apply(SPECIES.length());
1049 
1050         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1051             for (int i = 0; i < a.length; i += SPECIES.length()) {
1052                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1053                 av.neg().intoArray(r, i);
1054             }
1055         }
1056 
1057         bh.consume(r);
1058     }
1059 
1060     @Benchmark
1061     public void negMasked(Blackhole bh) {
1062         byte[] a = fa.apply(SPECIES.length());
1063         byte[] r = fr.apply(SPECIES.length());
1064         boolean[] mask = fm.apply(SPECIES.length());
1065         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
1066 
1067         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1068             for (int i = 0; i < a.length; i += SPECIES.length()) {
1069                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1070                 av.neg(vmask).intoArray(r, i);
1071             }
1072         }
1073 
1074         bh.consume(r);
1075     }
1076 
1077     @Benchmark
1078     public void abs(Blackhole bh) {
1079         byte[] a = fa.apply(SPECIES.length());
1080         byte[] r = fr.apply(SPECIES.length());
1081 
1082         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1083             for (int i = 0; i < a.length; i += SPECIES.length()) {
1084                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1085                 av.abs().intoArray(r, i);
1086             }
1087         }
1088 
1089         bh.consume(r);
1090     }
1091 
1092     @Benchmark
1093     public void absMasked(Blackhole bh) {
1094         byte[] a = fa.apply(SPECIES.length());
1095         byte[] r = fr.apply(SPECIES.length());
1096         boolean[] mask = fm.apply(SPECIES.length());
1097         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
1098 
1099         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1100             for (int i = 0; i < a.length; i += SPECIES.length()) {
1101                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1102                 av.abs(vmask).intoArray(r, i);
1103             }
1104         }
1105 
1106         bh.consume(r);
1107     }
1108 
1109 
1110     @Benchmark
1111     public void not(Blackhole bh) {
1112         byte[] a = fa.apply(SPECIES.length());
1113         byte[] r = fr.apply(SPECIES.length());
1114 
1115         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1116             for (int i = 0; i < a.length; i += SPECIES.length()) {
1117                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1118                 av.not().intoArray(r, i);
1119             }
1120         }
1121 
1122         bh.consume(r);
1123     }
1124 
1125 
1126 
1127     @Benchmark
1128     public void notMasked(Blackhole bh) {
1129         byte[] a = fa.apply(SPECIES.length());
1130         byte[] r = fr.apply(SPECIES.length());
1131         boolean[] mask = fm.apply(SPECIES.length());
1132         Vector.Mask<Byte> vmask = ByteVector.maskFromValues(SPECIES, mask);
1133 
1134         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1135             for (int i = 0; i < a.length; i += SPECIES.length()) {
1136                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1137                 av.not(vmask).intoArray(r, i);
1138             }
1139         }
1140 
1141         bh.consume(r);
1142     }
1143 
1144 
1145 
1146 
1147 
1148 }
1149