1 /*
   2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.VectorShape;
  28 import jdk.incubator.vector.VectorSpecies;
  29 import jdk.incubator.vector.VectorShuffle;
  30 import jdk.incubator.vector.ByteVector;
  31 
  32 import java.util.concurrent.TimeUnit;
  33 import java.util.function.BiFunction;
  34 import java.util.function.IntFunction;
  35 
  36 import org.openjdk.jmh.annotations.*;
  37 import org.openjdk.jmh.infra.Blackhole;
  38 
  39 @BenchmarkMode(Mode.Throughput)
  40 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  41 @State(Scope.Benchmark)
  42 @Warmup(iterations = 3, time = 1)
  43 @Measurement(iterations = 5, time = 1)
  44 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  45 public class Byte128Vector extends AbstractVectorBenchmark {
  46     static final VectorSpecies<Byte> SPECIES = ByteVector.SPECIES_128;
  47 
  48     static final int INVOC_COUNT = 1; // get rid of outer loop
  49 
  50     @Param("1024")
  51     int size;
  52 
  53     byte[] fill(IntFunction<Byte> f) {
  54         byte[] array = new byte[size];
  55         for (int i = 0; i < array.length; i++) {
  56             array[i] = f.apply(i);
  57         }
  58         return array;
  59     }
  60 
  61     byte[] a, b, c, r;
  62     boolean[] m, rm;
  63     int[] s;
  64 
  65     @Setup
  66     public void init() {
  67         size += size % SPECIES.length(); // FIXME: add post-loops
  68 
  69         a = fill(i -> (byte)(2*i));
  70         b = fill(i -> (byte)(i+1));
  71         c = fill(i -> (byte)(i+5));
  72         r = fill(i -> (byte)0);
  73 
  74         m = fillMask(size, i -> (i % 2) == 0);
  75         rm = fillMask(size, i -> false);
  76 
  77         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  78     }
  79 
  80     final IntFunction<byte[]> fa = vl -> a;
  81     final IntFunction<byte[]> fb = vl -> b;
  82     final IntFunction<byte[]> fc = vl -> c;
  83     final IntFunction<byte[]> fr = vl -> r;
  84     final IntFunction<boolean[]> fm = vl -> m;
  85     final IntFunction<boolean[]> fmr = vl -> rm;
  86     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  87 
  88 
  89     @Benchmark
  90     public void add(Blackhole bh) {
  91         byte[] a = fa.apply(SPECIES.length());
  92         byte[] b = fb.apply(SPECIES.length());
  93         byte[] r = fr.apply(SPECIES.length());
  94 
  95         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  96             for (int i = 0; i < a.length; i += SPECIES.length()) {
  97                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
  98                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
  99                 av.add(bv).intoArray(r, i);
 100             }
 101         }
 102 
 103         bh.consume(r);
 104     }
 105 
 106     @Benchmark
 107     public void addMasked(Blackhole bh) {
 108         byte[] a = fa.apply(SPECIES.length());
 109         byte[] b = fb.apply(SPECIES.length());
 110         byte[] r = fr.apply(SPECIES.length());
 111         boolean[] mask = fm.apply(SPECIES.length());
 112         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 113 
 114         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 115             for (int i = 0; i < a.length; i += SPECIES.length()) {
 116                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 117                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 118                 av.add(bv, vmask).intoArray(r, i);
 119             }
 120         }
 121 
 122         bh.consume(r);
 123     }
 124 
 125     @Benchmark
 126     public void sub(Blackhole bh) {
 127         byte[] a = fa.apply(SPECIES.length());
 128         byte[] b = fb.apply(SPECIES.length());
 129         byte[] r = fr.apply(SPECIES.length());
 130 
 131         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 132             for (int i = 0; i < a.length; i += SPECIES.length()) {
 133                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 134                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 135                 av.sub(bv).intoArray(r, i);
 136             }
 137         }
 138 
 139         bh.consume(r);
 140     }
 141 
 142     @Benchmark
 143     public void subMasked(Blackhole bh) {
 144         byte[] a = fa.apply(SPECIES.length());
 145         byte[] b = fb.apply(SPECIES.length());
 146         byte[] r = fr.apply(SPECIES.length());
 147         boolean[] mask = fm.apply(SPECIES.length());
 148         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 149 
 150         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 151             for (int i = 0; i < a.length; i += SPECIES.length()) {
 152                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 153                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 154                 av.sub(bv, vmask).intoArray(r, i);
 155             }
 156         }
 157 
 158         bh.consume(r);
 159     }
 160 
 161 
 162 
 163     @Benchmark
 164     public void mul(Blackhole bh) {
 165         byte[] a = fa.apply(SPECIES.length());
 166         byte[] b = fb.apply(SPECIES.length());
 167         byte[] r = fr.apply(SPECIES.length());
 168 
 169         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 170             for (int i = 0; i < a.length; i += SPECIES.length()) {
 171                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 172                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 173                 av.mul(bv).intoArray(r, i);
 174             }
 175         }
 176 
 177         bh.consume(r);
 178     }
 179 
 180     @Benchmark
 181     public void mulMasked(Blackhole bh) {
 182         byte[] a = fa.apply(SPECIES.length());
 183         byte[] b = fb.apply(SPECIES.length());
 184         byte[] r = fr.apply(SPECIES.length());
 185         boolean[] mask = fm.apply(SPECIES.length());
 186         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 187 
 188         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 189             for (int i = 0; i < a.length; i += SPECIES.length()) {
 190                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 191                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 192                 av.mul(bv, vmask).intoArray(r, i);
 193             }
 194         }
 195 
 196         bh.consume(r);
 197     }
 198 
 199 
 200     @Benchmark
 201     public void and(Blackhole bh) {
 202         byte[] a = fa.apply(SPECIES.length());
 203         byte[] b = fb.apply(SPECIES.length());
 204         byte[] r = fr.apply(SPECIES.length());
 205 
 206         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 207             for (int i = 0; i < a.length; i += SPECIES.length()) {
 208                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 209                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 210                 av.and(bv).intoArray(r, i);
 211             }
 212         }
 213 
 214         bh.consume(r);
 215     }
 216 
 217 
 218 
 219     @Benchmark
 220     public void andMasked(Blackhole bh) {
 221         byte[] a = fa.apply(SPECIES.length());
 222         byte[] b = fb.apply(SPECIES.length());
 223         byte[] r = fr.apply(SPECIES.length());
 224         boolean[] mask = fm.apply(SPECIES.length());
 225         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 226 
 227         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 228             for (int i = 0; i < a.length; i += SPECIES.length()) {
 229                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 230                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 231                 av.and(bv, vmask).intoArray(r, i);
 232             }
 233         }
 234 
 235         bh.consume(r);
 236     }
 237 
 238 
 239 
 240     @Benchmark
 241     public void or(Blackhole bh) {
 242         byte[] a = fa.apply(SPECIES.length());
 243         byte[] b = fb.apply(SPECIES.length());
 244         byte[] r = fr.apply(SPECIES.length());
 245 
 246         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 247             for (int i = 0; i < a.length; i += SPECIES.length()) {
 248                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 249                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 250                 av.or(bv).intoArray(r, i);
 251             }
 252         }
 253 
 254         bh.consume(r);
 255     }
 256 
 257 
 258 
 259     @Benchmark
 260     public void orMasked(Blackhole bh) {
 261         byte[] a = fa.apply(SPECIES.length());
 262         byte[] b = fb.apply(SPECIES.length());
 263         byte[] r = fr.apply(SPECIES.length());
 264         boolean[] mask = fm.apply(SPECIES.length());
 265         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 266 
 267         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 268             for (int i = 0; i < a.length; i += SPECIES.length()) {
 269                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 270                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 271                 av.or(bv, vmask).intoArray(r, i);
 272             }
 273         }
 274 
 275         bh.consume(r);
 276     }
 277 
 278 
 279 
 280     @Benchmark
 281     public void xor(Blackhole bh) {
 282         byte[] a = fa.apply(SPECIES.length());
 283         byte[] b = fb.apply(SPECIES.length());
 284         byte[] r = fr.apply(SPECIES.length());
 285 
 286         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 287             for (int i = 0; i < a.length; i += SPECIES.length()) {
 288                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 289                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 290                 av.xor(bv).intoArray(r, i);
 291             }
 292         }
 293 
 294         bh.consume(r);
 295     }
 296 
 297 
 298 
 299     @Benchmark
 300     public void xorMasked(Blackhole bh) {
 301         byte[] a = fa.apply(SPECIES.length());
 302         byte[] b = fb.apply(SPECIES.length());
 303         byte[] r = fr.apply(SPECIES.length());
 304         boolean[] mask = fm.apply(SPECIES.length());
 305         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 306 
 307         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 308             for (int i = 0; i < a.length; i += SPECIES.length()) {
 309                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 310                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 311                 av.xor(bv, vmask).intoArray(r, i);
 312             }
 313         }
 314 
 315         bh.consume(r);
 316     }
 317 
 318 
 319 
 320 
 321 
 322 
 323 
 324 
 325 
 326 
 327 
 328 
 329 
 330 
 331 
 332     @Benchmark
 333     public void aShiftRShift(Blackhole bh) {
 334         byte[] a = fa.apply(SPECIES.length());
 335         byte[] b = fb.apply(SPECIES.length());
 336         byte[] r = fr.apply(SPECIES.length());
 337 
 338         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 339             for (int i = 0; i < a.length; i += SPECIES.length()) {
 340                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 341                 av.aShiftR((int)b[i]).intoArray(r, i);
 342             }
 343         }
 344 
 345         bh.consume(r);
 346     }
 347 
 348 
 349 
 350     @Benchmark
 351     public void aShiftRMaskedShift(Blackhole bh) {
 352         byte[] a = fa.apply(SPECIES.length());
 353         byte[] b = fb.apply(SPECIES.length());
 354         byte[] r = fr.apply(SPECIES.length());
 355         boolean[] mask = fm.apply(SPECIES.length());
 356         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 357 
 358         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 359             for (int i = 0; i < a.length; i += SPECIES.length()) {
 360                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 361                 av.aShiftR((int)b[i], vmask).intoArray(r, i);
 362             }
 363         }
 364 
 365         bh.consume(r);
 366     }
 367 
 368 
 369 
 370     @Benchmark
 371     public void shiftLShift(Blackhole bh) {
 372         byte[] a = fa.apply(SPECIES.length());
 373         byte[] b = fb.apply(SPECIES.length());
 374         byte[] r = fr.apply(SPECIES.length());
 375 
 376         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 377             for (int i = 0; i < a.length; i += SPECIES.length()) {
 378                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 379                 av.shiftL((int)b[i]).intoArray(r, i);
 380             }
 381         }
 382 
 383         bh.consume(r);
 384     }
 385 
 386 
 387 
 388     @Benchmark
 389     public void shiftLMaskedShift(Blackhole bh) {
 390         byte[] a = fa.apply(SPECIES.length());
 391         byte[] b = fb.apply(SPECIES.length());
 392         byte[] r = fr.apply(SPECIES.length());
 393         boolean[] mask = fm.apply(SPECIES.length());
 394         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 395 
 396         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 397             for (int i = 0; i < a.length; i += SPECIES.length()) {
 398                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 399                 av.shiftL((int)b[i], vmask).intoArray(r, i);
 400             }
 401         }
 402 
 403         bh.consume(r);
 404     }
 405 
 406 
 407 
 408     @Benchmark
 409     public void shiftRShift(Blackhole bh) {
 410         byte[] a = fa.apply(SPECIES.length());
 411         byte[] b = fb.apply(SPECIES.length());
 412         byte[] r = fr.apply(SPECIES.length());
 413 
 414         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 415             for (int i = 0; i < a.length; i += SPECIES.length()) {
 416                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 417                 av.shiftR((int)b[i]).intoArray(r, i);
 418             }
 419         }
 420 
 421         bh.consume(r);
 422     }
 423 
 424 
 425 
 426     @Benchmark
 427     public void shiftRMaskedShift(Blackhole bh) {
 428         byte[] a = fa.apply(SPECIES.length());
 429         byte[] b = fb.apply(SPECIES.length());
 430         byte[] r = fr.apply(SPECIES.length());
 431         boolean[] mask = fm.apply(SPECIES.length());
 432         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 433 
 434         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 435             for (int i = 0; i < a.length; i += SPECIES.length()) {
 436                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 437                 av.shiftR((int)b[i], vmask).intoArray(r, i);
 438             }
 439         }
 440 
 441         bh.consume(r);
 442     }
 443 
 444 
 445 
 446 
 447 
 448 
 449 
 450 
 451     @Benchmark
 452     public void max(Blackhole bh) {
 453         byte[] a = fa.apply(SPECIES.length());
 454         byte[] b = fb.apply(SPECIES.length());
 455         byte[] r = fr.apply(SPECIES.length());
 456 
 457         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 458             for (int i = 0; i < a.length; i += SPECIES.length()) {
 459                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 460                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 461                 av.max(bv).intoArray(r, i);
 462             }
 463         }
 464 
 465         bh.consume(r);
 466     }
 467 
 468     @Benchmark
 469     public void min(Blackhole bh) {
 470         byte[] a = fa.apply(SPECIES.length());
 471         byte[] b = fb.apply(SPECIES.length());
 472         byte[] r = fr.apply(SPECIES.length());
 473 
 474         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 475             for (int i = 0; i < a.length; i += SPECIES.length()) {
 476                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 477                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 478                 av.min(bv).intoArray(r, i);
 479             }
 480         }
 481 
 482         bh.consume(r);
 483     }
 484 
 485 
 486     @Benchmark
 487     public void andAll(Blackhole bh) {
 488         byte[] a = fa.apply(SPECIES.length());
 489         byte ra = -1;
 490 
 491         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 492             ra = -1;
 493             for (int i = 0; i < a.length; i += SPECIES.length()) {
 494                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 495                 ra &= av.andAll();
 496             }
 497         }
 498         bh.consume(ra);
 499     }
 500 
 501 
 502 
 503     @Benchmark
 504     public void orAll(Blackhole bh) {
 505         byte[] a = fa.apply(SPECIES.length());
 506         byte ra = 0;
 507 
 508         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 509             ra = 0;
 510             for (int i = 0; i < a.length; i += SPECIES.length()) {
 511                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 512                 ra |= av.orAll();
 513             }
 514         }
 515         bh.consume(ra);
 516     }
 517 
 518 
 519 
 520     @Benchmark
 521     public void xorAll(Blackhole bh) {
 522         byte[] a = fa.apply(SPECIES.length());
 523         byte ra = 0;
 524 
 525         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 526             ra = 0;
 527             for (int i = 0; i < a.length; i += SPECIES.length()) {
 528                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 529                 ra ^= av.xorAll();
 530             }
 531         }
 532         bh.consume(ra);
 533     }
 534 
 535 
 536     @Benchmark
 537     public void addAll(Blackhole bh) {
 538         byte[] a = fa.apply(SPECIES.length());
 539         byte ra = 0;
 540 
 541         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 542             ra = 0;
 543             for (int i = 0; i < a.length; i += SPECIES.length()) {
 544                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 545                 ra += av.addAll();
 546             }
 547         }
 548         bh.consume(ra);
 549     }
 550 
 551     @Benchmark
 552     public void mulAll(Blackhole bh) {
 553         byte[] a = fa.apply(SPECIES.length());
 554         byte ra = 1;
 555 
 556         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 557             ra = 1;
 558             for (int i = 0; i < a.length; i += SPECIES.length()) {
 559                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 560                 ra *= av.mulAll();
 561             }
 562         }
 563         bh.consume(ra);
 564     }
 565 
 566     @Benchmark
 567     public void minAll(Blackhole bh) {
 568         byte[] a = fa.apply(SPECIES.length());
 569         byte ra = Byte.MAX_VALUE;
 570 
 571         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 572             ra = Byte.MAX_VALUE;
 573             for (int i = 0; i < a.length; i += SPECIES.length()) {
 574                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 575                 ra = (byte)Math.min(ra, av.minAll());
 576             }
 577         }
 578         bh.consume(ra);
 579     }
 580 
 581     @Benchmark
 582     public void maxAll(Blackhole bh) {
 583         byte[] a = fa.apply(SPECIES.length());
 584         byte ra = Byte.MIN_VALUE;
 585 
 586         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 587             ra = Byte.MIN_VALUE;
 588             for (int i = 0; i < a.length; i += SPECIES.length()) {
 589                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 590                 ra = (byte)Math.max(ra, av.maxAll());
 591             }
 592         }
 593         bh.consume(ra);
 594     }
 595 
 596 
 597     @Benchmark
 598     public void anyTrue(Blackhole bh) {
 599         boolean[] mask = fm.apply(SPECIES.length());
 600         boolean[] r = fmr.apply(SPECIES.length());
 601 
 602         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 603             for (int i = 0; i < mask.length; i += SPECIES.length()) {
 604                 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i);
 605                 r[i] = vmask.anyTrue();
 606             }
 607         }
 608 
 609         bh.consume(r);
 610     }
 611 
 612 
 613 
 614     @Benchmark
 615     public void allTrue(Blackhole bh) {
 616         boolean[] mask = fm.apply(SPECIES.length());
 617         boolean[] r = fmr.apply(SPECIES.length());
 618 
 619         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 620             for (int i = 0; i < mask.length; i += SPECIES.length()) {
 621                 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i);
 622                 r[i] = vmask.allTrue();
 623             }
 624         }
 625 
 626         bh.consume(r);
 627     }
 628 
 629 
 630     @Benchmark
 631     public void with(Blackhole bh) {
 632         byte[] a = fa.apply(SPECIES.length());
 633         byte[] r = fr.apply(SPECIES.length());
 634 
 635         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 636             for (int i = 0; i < a.length; i += SPECIES.length()) {
 637                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 638                 av.with(0, (byte)4).intoArray(r, i);
 639             }
 640         }
 641 
 642         bh.consume(r);
 643     }
 644 
 645     @Benchmark
 646     public Object lessThan() {
 647         byte[] a = fa.apply(size);
 648         byte[] b = fb.apply(size);
 649         boolean[] ms = fm.apply(size);
 650         VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 651 
 652         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 653             for (int i = 0; i < a.length; i += SPECIES.length()) {
 654                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 655                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 656                 VectorMask<Byte> mv = av.lessThan(bv);
 657 
 658                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 659             }
 660         }
 661         return m;
 662     }
 663 
 664 
 665     @Benchmark
 666     public Object greaterThan() {
 667         byte[] a = fa.apply(size);
 668         byte[] b = fb.apply(size);
 669         boolean[] ms = fm.apply(size);
 670         VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 671 
 672         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 673             for (int i = 0; i < a.length; i += SPECIES.length()) {
 674                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 675                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 676                 VectorMask<Byte> mv = av.greaterThan(bv);
 677 
 678                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 679             }
 680         }
 681         return m;
 682     }
 683 
 684 
 685     @Benchmark
 686     public Object equal() {
 687         byte[] a = fa.apply(size);
 688         byte[] b = fb.apply(size);
 689         boolean[] ms = fm.apply(size);
 690         VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 691 
 692         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 693             for (int i = 0; i < a.length; i += SPECIES.length()) {
 694                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 695                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 696                 VectorMask<Byte> mv = av.equal(bv);
 697 
 698                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 699             }
 700         }
 701         return m;
 702     }
 703 
 704 
 705     @Benchmark
 706     public Object notEqual() {
 707         byte[] a = fa.apply(size);
 708         byte[] b = fb.apply(size);
 709         boolean[] ms = fm.apply(size);
 710         VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 711 
 712         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 713             for (int i = 0; i < a.length; i += SPECIES.length()) {
 714                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 715                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 716                 VectorMask<Byte> mv = av.notEqual(bv);
 717 
 718                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 719             }
 720         }
 721         return m;
 722     }
 723 
 724 
 725     @Benchmark
 726     public Object lessThanEq() {
 727         byte[] a = fa.apply(size);
 728         byte[] b = fb.apply(size);
 729         boolean[] ms = fm.apply(size);
 730         VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 731 
 732         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 733             for (int i = 0; i < a.length; i += SPECIES.length()) {
 734                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 735                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 736                 VectorMask<Byte> mv = av.lessThanEq(bv);
 737 
 738                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 739             }
 740         }
 741         return m;
 742     }
 743 
 744 
 745     @Benchmark
 746     public Object greaterThanEq() {
 747         byte[] a = fa.apply(size);
 748         byte[] b = fb.apply(size);
 749         boolean[] ms = fm.apply(size);
 750         VectorMask<Byte> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 751 
 752         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 753             for (int i = 0; i < a.length; i += SPECIES.length()) {
 754                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 755                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 756                 VectorMask<Byte> mv = av.greaterThanEq(bv);
 757 
 758                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 759             }
 760         }
 761         return m;
 762     }
 763 
 764 
 765     @Benchmark
 766     public void blend(Blackhole bh) {
 767         byte[] a = fa.apply(SPECIES.length());
 768         byte[] b = fb.apply(SPECIES.length());
 769         byte[] r = fr.apply(SPECIES.length());
 770         boolean[] mask = fm.apply(SPECIES.length());
 771         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 772 
 773         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 774             for (int i = 0; i < a.length; i += SPECIES.length()) {
 775                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 776                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 777                 av.blend(bv, vmask).intoArray(r, i);
 778             }
 779         }
 780 
 781         bh.consume(r);
 782     }
 783 
 784     @Benchmark
 785     public void rearrange(Blackhole bh) {
 786         byte[] a = fa.apply(SPECIES.length());
 787         int[] order = fs.apply(a.length, SPECIES.length());
 788         byte[] r = fr.apply(SPECIES.length());
 789 
 790         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 791             for (int i = 0; i < a.length; i += SPECIES.length()) {
 792                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 793                 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
 794             }
 795         }
 796 
 797         bh.consume(r);
 798     }
 799 
 800     @Benchmark
 801     public void extract(Blackhole bh) {
 802         byte[] a = fa.apply(SPECIES.length());
 803         byte[] r = fr.apply(SPECIES.length());
 804 
 805         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 806             for (int i = 0; i < a.length; i += SPECIES.length()) {
 807                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 808                 int num_lanes = SPECIES.length();
 809                 // Manually unroll because full unroll happens after intrinsification.
 810                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 811                 if (num_lanes == 1) {
 812                     r[i]=av.get(0);
 813                 } else if (num_lanes == 2) {
 814                     r[i]=av.get(0);
 815                     r[i+1]=av.get(1);
 816                 } else if (num_lanes == 4) {
 817                     r[i]=av.get(0);
 818                     r[i+1]=av.get(1);
 819                     r[i+2]=av.get(2);
 820                     r[i+3]=av.get(3);
 821                 } else if (num_lanes == 8) {
 822                     r[i]=av.get(0);
 823                     r[i+1]=av.get(1);
 824                     r[i+2]=av.get(2);
 825                     r[i+3]=av.get(3);
 826                     r[i+4]=av.get(4);
 827                     r[i+5]=av.get(5);
 828                     r[i+6]=av.get(6);
 829                     r[i+7]=av.get(7);
 830                 } else if (num_lanes == 16) {
 831                     r[i]=av.get(0);
 832                     r[i+1]=av.get(1);
 833                     r[i+2]=av.get(2);
 834                     r[i+3]=av.get(3);
 835                     r[i+4]=av.get(4);
 836                     r[i+5]=av.get(5);
 837                     r[i+6]=av.get(6);
 838                     r[i+7]=av.get(7);
 839                     r[i+8]=av.get(8);
 840                     r[i+9]=av.get(9);
 841                     r[i+10]=av.get(10);
 842                     r[i+11]=av.get(11);
 843                     r[i+12]=av.get(12);
 844                     r[i+13]=av.get(13);
 845                     r[i+14]=av.get(14);
 846                     r[i+15]=av.get(15);
 847                 } else if (num_lanes == 32) {
 848                     r[i]=av.get(0);
 849                     r[i+1]=av.get(1);
 850                     r[i+2]=av.get(2);
 851                     r[i+3]=av.get(3);
 852                     r[i+4]=av.get(4);
 853                     r[i+5]=av.get(5);
 854                     r[i+6]=av.get(6);
 855                     r[i+7]=av.get(7);
 856                     r[i+8]=av.get(8);
 857                     r[i+9]=av.get(9);
 858                     r[i+10]=av.get(10);
 859                     r[i+11]=av.get(11);
 860                     r[i+12]=av.get(12);
 861                     r[i+13]=av.get(13);
 862                     r[i+14]=av.get(14);
 863                     r[i+15]=av.get(15);
 864                     r[i+16]=av.get(16);
 865                     r[i+17]=av.get(17);
 866                     r[i+18]=av.get(18);
 867                     r[i+19]=av.get(19);
 868                     r[i+20]=av.get(20);
 869                     r[i+21]=av.get(21);
 870                     r[i+22]=av.get(22);
 871                     r[i+23]=av.get(23);
 872                     r[i+24]=av.get(24);
 873                     r[i+25]=av.get(25);
 874                     r[i+26]=av.get(26);
 875                     r[i+27]=av.get(27);
 876                     r[i+28]=av.get(28);
 877                     r[i+29]=av.get(29);
 878                     r[i+30]=av.get(30);
 879                     r[i+31]=av.get(31);
 880                 } else if (num_lanes == 64) {
 881                     r[i]=av.get(0);
 882                     r[i+1]=av.get(1);
 883                     r[i+2]=av.get(2);
 884                     r[i+3]=av.get(3);
 885                     r[i+4]=av.get(4);
 886                     r[i+5]=av.get(5);
 887                     r[i+6]=av.get(6);
 888                     r[i+7]=av.get(7);
 889                     r[i+8]=av.get(8);
 890                     r[i+9]=av.get(9);
 891                     r[i+10]=av.get(10);
 892                     r[i+11]=av.get(11);
 893                     r[i+12]=av.get(12);
 894                     r[i+13]=av.get(13);
 895                     r[i+14]=av.get(14);
 896                     r[i+15]=av.get(15);
 897                     r[i+16]=av.get(16);
 898                     r[i+17]=av.get(17);
 899                     r[i+18]=av.get(18);
 900                     r[i+19]=av.get(19);
 901                     r[i+20]=av.get(20);
 902                     r[i+21]=av.get(21);
 903                     r[i+22]=av.get(22);
 904                     r[i+23]=av.get(23);
 905                     r[i+24]=av.get(24);
 906                     r[i+25]=av.get(25);
 907                     r[i+26]=av.get(26);
 908                     r[i+27]=av.get(27);
 909                     r[i+28]=av.get(28);
 910                     r[i+29]=av.get(29);
 911                     r[i+30]=av.get(30);
 912                     r[i+31]=av.get(31);
 913                     r[i+32]=av.get(32);
 914                     r[i+33]=av.get(33);
 915                     r[i+34]=av.get(34);
 916                     r[i+35]=av.get(35);
 917                     r[i+36]=av.get(36);
 918                     r[i+37]=av.get(37);
 919                     r[i+38]=av.get(38);
 920                     r[i+39]=av.get(39);
 921                     r[i+40]=av.get(40);
 922                     r[i+41]=av.get(41);
 923                     r[i+42]=av.get(42);
 924                     r[i+43]=av.get(43);
 925                     r[i+44]=av.get(44);
 926                     r[i+45]=av.get(45);
 927                     r[i+46]=av.get(46);
 928                     r[i+47]=av.get(47);
 929                     r[i+48]=av.get(48);
 930                     r[i+49]=av.get(49);
 931                     r[i+50]=av.get(50);
 932                     r[i+51]=av.get(51);
 933                     r[i+52]=av.get(52);
 934                     r[i+53]=av.get(53);
 935                     r[i+54]=av.get(54);
 936                     r[i+55]=av.get(55);
 937                     r[i+56]=av.get(56);
 938                     r[i+57]=av.get(57);
 939                     r[i+58]=av.get(58);
 940                     r[i+59]=av.get(59);
 941                     r[i+60]=av.get(60);
 942                     r[i+61]=av.get(61);
 943                     r[i+62]=av.get(62);
 944                     r[i+63]=av.get(63);
 945                 } else {
 946                     for (int j = 0; j < SPECIES.length(); j++) {
 947                         r[i+j]=av.get(j);
 948                     }
 949                 }
 950             }
 951         }
 952 
 953         bh.consume(r);
 954     }
 955 
 956 
 957 
 958 
 959 
 960 
 961 
 962 
 963 
 964 
 965 
 966 
 967 
 968 
 969 
 970 
 971 
 972 
 973 
 974 
 975 
 976     @Benchmark
 977     public void neg(Blackhole bh) {
 978         byte[] a = fa.apply(SPECIES.length());
 979         byte[] r = fr.apply(SPECIES.length());
 980 
 981         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 982             for (int i = 0; i < a.length; i += SPECIES.length()) {
 983                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 984                 av.neg().intoArray(r, i);
 985             }
 986         }
 987 
 988         bh.consume(r);
 989     }
 990 
 991     @Benchmark
 992     public void negMasked(Blackhole bh) {
 993         byte[] a = fa.apply(SPECIES.length());
 994         byte[] r = fr.apply(SPECIES.length());
 995         boolean[] mask = fm.apply(SPECIES.length());
 996         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 997 
 998         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 999             for (int i = 0; i < a.length; i += SPECIES.length()) {
1000                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1001                 av.neg(vmask).intoArray(r, i);
1002             }
1003         }
1004 
1005         bh.consume(r);
1006     }
1007 
1008     @Benchmark
1009     public void abs(Blackhole bh) {
1010         byte[] a = fa.apply(SPECIES.length());
1011         byte[] r = fr.apply(SPECIES.length());
1012 
1013         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1014             for (int i = 0; i < a.length; i += SPECIES.length()) {
1015                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1016                 av.abs().intoArray(r, i);
1017             }
1018         }
1019 
1020         bh.consume(r);
1021     }
1022 
1023     @Benchmark
1024     public void absMasked(Blackhole bh) {
1025         byte[] a = fa.apply(SPECIES.length());
1026         byte[] r = fr.apply(SPECIES.length());
1027         boolean[] mask = fm.apply(SPECIES.length());
1028         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
1029 
1030         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1031             for (int i = 0; i < a.length; i += SPECIES.length()) {
1032                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1033                 av.abs(vmask).intoArray(r, i);
1034             }
1035         }
1036 
1037         bh.consume(r);
1038     }
1039 
1040 
1041     @Benchmark
1042     public void not(Blackhole bh) {
1043         byte[] a = fa.apply(SPECIES.length());
1044         byte[] r = fr.apply(SPECIES.length());
1045 
1046         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1047             for (int i = 0; i < a.length; i += SPECIES.length()) {
1048                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1049                 av.not().intoArray(r, i);
1050             }
1051         }
1052 
1053         bh.consume(r);
1054     }
1055 
1056 
1057 
1058     @Benchmark
1059     public void notMasked(Blackhole bh) {
1060         byte[] a = fa.apply(SPECIES.length());
1061         byte[] r = fr.apply(SPECIES.length());
1062         boolean[] mask = fm.apply(SPECIES.length());
1063         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
1064 
1065         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1066             for (int i = 0; i < a.length; i += SPECIES.length()) {
1067                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1068                 av.not(vmask).intoArray(r, i);
1069             }
1070         }
1071 
1072         bh.consume(r);
1073     }
1074 
1075 
1076 
1077 
1078 
1079 }
1080