1 /*
   2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.VectorMask;
  28 import jdk.incubator.vector.VectorShape;
  29 import jdk.incubator.vector.VectorSpecies;
  30 import jdk.incubator.vector.VectorShuffle;
  31 import jdk.incubator.vector.ByteVector;
  32 
  33 import java.util.concurrent.TimeUnit;
  34 import java.util.function.BiFunction;
  35 import java.util.function.IntFunction;
  36 
  37 import org.openjdk.jmh.annotations.*;
  38 import org.openjdk.jmh.infra.Blackhole;
  39 
  40 @BenchmarkMode(Mode.Throughput)
  41 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  42 @State(Scope.Benchmark)
  43 @Warmup(iterations = 3, time = 1)
  44 @Measurement(iterations = 5, time = 1)
  45 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  46 public class Byte64Vector extends AbstractVectorBenchmark {
  47     static final VectorSpecies<Byte> SPECIES = ByteVector.SPECIES_64;
  48 
  49     static final int INVOC_COUNT = 1; // get rid of outer loop
  50 
  51     @Param("1024")
  52     int size;
  53 
  54     byte[] fill(IntFunction<Byte> f) {
  55         byte[] array = new byte[size];
  56         for (int i = 0; i < array.length; i++) {
  57             array[i] = f.apply(i);
  58         }
  59         return array;
  60     }
  61 
  62     byte[] a, b, c, r;
  63     boolean[] m, rm;
  64     int[] s;
  65 
  66     @Setup
  67     public void init() {
  68         size += size % SPECIES.length(); // FIXME: add post-loops
  69 
  70         a = fill(i -> (byte)(2*i));
  71         b = fill(i -> (byte)(i+1));
  72         c = fill(i -> (byte)(i+5));
  73         r = fill(i -> (byte)0);
  74 
  75         m = fillMask(size, i -> (i % 2) == 0);
  76         rm = fillMask(size, i -> false);
  77 
  78         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  79     }
  80 
  81     final IntFunction<byte[]> fa = vl -> a;
  82     final IntFunction<byte[]> fb = vl -> b;
  83     final IntFunction<byte[]> fc = vl -> c;
  84     final IntFunction<byte[]> fr = vl -> r;
  85     final IntFunction<boolean[]> fm = vl -> m;
  86     final IntFunction<boolean[]> fmr = vl -> rm;
  87     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  88 
  89 
  90     @Benchmark
  91     public void add(Blackhole bh) {
  92         byte[] a = fa.apply(SPECIES.length());
  93         byte[] b = fb.apply(SPECIES.length());
  94         byte[] r = fr.apply(SPECIES.length());
  95 
  96         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  97             for (int i = 0; i < a.length; i += SPECIES.length()) {
  98                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
  99                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 100                 av.add(bv).intoArray(r, i);
 101             }
 102         }
 103 
 104         bh.consume(r);
 105     }
 106 
 107     @Benchmark
 108     public void addMasked(Blackhole bh) {
 109         byte[] a = fa.apply(SPECIES.length());
 110         byte[] b = fb.apply(SPECIES.length());
 111         byte[] r = fr.apply(SPECIES.length());
 112         boolean[] mask = fm.apply(SPECIES.length());
 113         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 114 
 115         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 116             for (int i = 0; i < a.length; i += SPECIES.length()) {
 117                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 118                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 119                 av.add(bv, vmask).intoArray(r, i);
 120             }
 121         }
 122 
 123         bh.consume(r);
 124     }
 125 
 126     @Benchmark
 127     public void sub(Blackhole bh) {
 128         byte[] a = fa.apply(SPECIES.length());
 129         byte[] b = fb.apply(SPECIES.length());
 130         byte[] r = fr.apply(SPECIES.length());
 131 
 132         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 133             for (int i = 0; i < a.length; i += SPECIES.length()) {
 134                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 135                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 136                 av.sub(bv).intoArray(r, i);
 137             }
 138         }
 139 
 140         bh.consume(r);
 141     }
 142 
 143     @Benchmark
 144     public void subMasked(Blackhole bh) {
 145         byte[] a = fa.apply(SPECIES.length());
 146         byte[] b = fb.apply(SPECIES.length());
 147         byte[] r = fr.apply(SPECIES.length());
 148         boolean[] mask = fm.apply(SPECIES.length());
 149         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 150 
 151         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 152             for (int i = 0; i < a.length; i += SPECIES.length()) {
 153                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 154                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 155                 av.sub(bv, vmask).intoArray(r, i);
 156             }
 157         }
 158 
 159         bh.consume(r);
 160     }
 161 
 162 
 163 
 164     @Benchmark
 165     public void mul(Blackhole bh) {
 166         byte[] a = fa.apply(SPECIES.length());
 167         byte[] b = fb.apply(SPECIES.length());
 168         byte[] r = fr.apply(SPECIES.length());
 169 
 170         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 171             for (int i = 0; i < a.length; i += SPECIES.length()) {
 172                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 173                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 174                 av.mul(bv).intoArray(r, i);
 175             }
 176         }
 177 
 178         bh.consume(r);
 179     }
 180 
 181     @Benchmark
 182     public void mulMasked(Blackhole bh) {
 183         byte[] a = fa.apply(SPECIES.length());
 184         byte[] b = fb.apply(SPECIES.length());
 185         byte[] r = fr.apply(SPECIES.length());
 186         boolean[] mask = fm.apply(SPECIES.length());
 187         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 188 
 189         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 190             for (int i = 0; i < a.length; i += SPECIES.length()) {
 191                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 192                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 193                 av.mul(bv, vmask).intoArray(r, i);
 194             }
 195         }
 196 
 197         bh.consume(r);
 198     }
 199 
 200 
 201     @Benchmark
 202     public void and(Blackhole bh) {
 203         byte[] a = fa.apply(SPECIES.length());
 204         byte[] b = fb.apply(SPECIES.length());
 205         byte[] r = fr.apply(SPECIES.length());
 206 
 207         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 208             for (int i = 0; i < a.length; i += SPECIES.length()) {
 209                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 210                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 211                 av.and(bv).intoArray(r, i);
 212             }
 213         }
 214 
 215         bh.consume(r);
 216     }
 217 
 218 
 219 
 220     @Benchmark
 221     public void andMasked(Blackhole bh) {
 222         byte[] a = fa.apply(SPECIES.length());
 223         byte[] b = fb.apply(SPECIES.length());
 224         byte[] r = fr.apply(SPECIES.length());
 225         boolean[] mask = fm.apply(SPECIES.length());
 226         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 227 
 228         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 229             for (int i = 0; i < a.length; i += SPECIES.length()) {
 230                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 231                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 232                 av.and(bv, vmask).intoArray(r, i);
 233             }
 234         }
 235 
 236         bh.consume(r);
 237     }
 238 
 239 
 240 
 241     @Benchmark
 242     public void or(Blackhole bh) {
 243         byte[] a = fa.apply(SPECIES.length());
 244         byte[] b = fb.apply(SPECIES.length());
 245         byte[] r = fr.apply(SPECIES.length());
 246 
 247         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 248             for (int i = 0; i < a.length; i += SPECIES.length()) {
 249                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 250                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 251                 av.or(bv).intoArray(r, i);
 252             }
 253         }
 254 
 255         bh.consume(r);
 256     }
 257 
 258 
 259 
 260     @Benchmark
 261     public void orMasked(Blackhole bh) {
 262         byte[] a = fa.apply(SPECIES.length());
 263         byte[] b = fb.apply(SPECIES.length());
 264         byte[] r = fr.apply(SPECIES.length());
 265         boolean[] mask = fm.apply(SPECIES.length());
 266         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 267 
 268         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 269             for (int i = 0; i < a.length; i += SPECIES.length()) {
 270                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 271                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 272                 av.or(bv, vmask).intoArray(r, i);
 273             }
 274         }
 275 
 276         bh.consume(r);
 277     }
 278 
 279 
 280 
 281     @Benchmark
 282     public void xor(Blackhole bh) {
 283         byte[] a = fa.apply(SPECIES.length());
 284         byte[] b = fb.apply(SPECIES.length());
 285         byte[] r = fr.apply(SPECIES.length());
 286 
 287         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 288             for (int i = 0; i < a.length; i += SPECIES.length()) {
 289                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 290                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 291                 av.xor(bv).intoArray(r, i);
 292             }
 293         }
 294 
 295         bh.consume(r);
 296     }
 297 
 298 
 299 
 300     @Benchmark
 301     public void xorMasked(Blackhole bh) {
 302         byte[] a = fa.apply(SPECIES.length());
 303         byte[] b = fb.apply(SPECIES.length());
 304         byte[] r = fr.apply(SPECIES.length());
 305         boolean[] mask = fm.apply(SPECIES.length());
 306         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 307 
 308         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 309             for (int i = 0; i < a.length; i += SPECIES.length()) {
 310                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 311                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 312                 av.xor(bv, vmask).intoArray(r, i);
 313             }
 314         }
 315 
 316         bh.consume(r);
 317     }
 318 
 319 
 320 
 321 
 322 
 323     @Benchmark
 324     public void shiftLeft(Blackhole bh) {
 325         byte[] a = fa.apply(SPECIES.length());
 326         byte[] b = fb.apply(SPECIES.length());
 327         byte[] r = fr.apply(SPECIES.length());
 328 
 329         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 330             for (int i = 0; i < a.length; i += SPECIES.length()) {
 331                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 332                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 333                 av.shiftLeft(bv).intoArray(r, i);
 334             }
 335         }
 336 
 337         bh.consume(r);
 338     }
 339 
 340 
 341 
 342     @Benchmark
 343     public void shiftLeftMasked(Blackhole bh) {
 344         byte[] a = fa.apply(SPECIES.length());
 345         byte[] b = fb.apply(SPECIES.length());
 346         byte[] r = fr.apply(SPECIES.length());
 347         boolean[] mask = fm.apply(SPECIES.length());
 348         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 349 
 350         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 351             for (int i = 0; i < a.length; i += SPECIES.length()) {
 352                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 353                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 354                 av.shiftLeft(bv, vmask).intoArray(r, i);
 355             }
 356         }
 357 
 358         bh.consume(r);
 359     }
 360 
 361 
 362 
 363 
 364 
 365 
 366 
 367     @Benchmark
 368     public void shiftRight(Blackhole bh) {
 369         byte[] a = fa.apply(SPECIES.length());
 370         byte[] b = fb.apply(SPECIES.length());
 371         byte[] r = fr.apply(SPECIES.length());
 372 
 373         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 374             for (int i = 0; i < a.length; i += SPECIES.length()) {
 375                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 376                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 377                 av.shiftRight(bv).intoArray(r, i);
 378             }
 379         }
 380 
 381         bh.consume(r);
 382     }
 383 
 384 
 385 
 386     @Benchmark
 387     public void shiftRightMasked(Blackhole bh) {
 388         byte[] a = fa.apply(SPECIES.length());
 389         byte[] b = fb.apply(SPECIES.length());
 390         byte[] r = fr.apply(SPECIES.length());
 391         boolean[] mask = fm.apply(SPECIES.length());
 392         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 393 
 394         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 395             for (int i = 0; i < a.length; i += SPECIES.length()) {
 396                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 397                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 398                 av.shiftRight(bv, vmask).intoArray(r, i);
 399             }
 400         }
 401 
 402         bh.consume(r);
 403     }
 404 
 405 
 406 
 407 
 408 
 409 
 410 
 411     @Benchmark
 412     public void shiftArithmeticRight(Blackhole bh) {
 413         byte[] a = fa.apply(SPECIES.length());
 414         byte[] b = fb.apply(SPECIES.length());
 415         byte[] r = fr.apply(SPECIES.length());
 416 
 417         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 418             for (int i = 0; i < a.length; i += SPECIES.length()) {
 419                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 420                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 421                 av.shiftArithmeticRight(bv).intoArray(r, i);
 422             }
 423         }
 424 
 425         bh.consume(r);
 426     }
 427 
 428 
 429 
 430     @Benchmark
 431     public void shiftArithmeticRightMasked(Blackhole bh) {
 432         byte[] a = fa.apply(SPECIES.length());
 433         byte[] b = fb.apply(SPECIES.length());
 434         byte[] r = fr.apply(SPECIES.length());
 435         boolean[] mask = fm.apply(SPECIES.length());
 436         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 437 
 438         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 439             for (int i = 0; i < a.length; i += SPECIES.length()) {
 440                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 441                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 442                 av.shiftArithmeticRight(bv, vmask).intoArray(r, i);
 443             }
 444         }
 445 
 446         bh.consume(r);
 447     }
 448 
 449 
 450 
 451 
 452 
 453 
 454 
 455     @Benchmark
 456     public void shiftLeftShift(Blackhole bh) {
 457         byte[] a = fa.apply(SPECIES.length());
 458         byte[] b = fb.apply(SPECIES.length());
 459         byte[] r = fr.apply(SPECIES.length());
 460 
 461         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 462             for (int i = 0; i < a.length; i += SPECIES.length()) {
 463                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 464                 av.shiftLeft((int)b[i]).intoArray(r, i);
 465             }
 466         }
 467 
 468         bh.consume(r);
 469     }
 470 
 471 
 472 
 473     @Benchmark
 474     public void shiftLeftMaskedShift(Blackhole bh) {
 475         byte[] a = fa.apply(SPECIES.length());
 476         byte[] b = fb.apply(SPECIES.length());
 477         byte[] r = fr.apply(SPECIES.length());
 478         boolean[] mask = fm.apply(SPECIES.length());
 479         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 480 
 481         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 482             for (int i = 0; i < a.length; i += SPECIES.length()) {
 483                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 484                 av.shiftLeft((int)b[i], vmask).intoArray(r, i);
 485             }
 486         }
 487 
 488         bh.consume(r);
 489     }
 490 
 491 
 492 
 493 
 494 
 495 
 496 
 497     @Benchmark
 498     public void shiftRightShift(Blackhole bh) {
 499         byte[] a = fa.apply(SPECIES.length());
 500         byte[] b = fb.apply(SPECIES.length());
 501         byte[] r = fr.apply(SPECIES.length());
 502 
 503         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 504             for (int i = 0; i < a.length; i += SPECIES.length()) {
 505                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 506                 av.shiftRight((int)b[i]).intoArray(r, i);
 507             }
 508         }
 509 
 510         bh.consume(r);
 511     }
 512 
 513 
 514 
 515     @Benchmark
 516     public void shiftRightMaskedShift(Blackhole bh) {
 517         byte[] a = fa.apply(SPECIES.length());
 518         byte[] b = fb.apply(SPECIES.length());
 519         byte[] r = fr.apply(SPECIES.length());
 520         boolean[] mask = fm.apply(SPECIES.length());
 521         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 522 
 523         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 524             for (int i = 0; i < a.length; i += SPECIES.length()) {
 525                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 526                 av.shiftRight((int)b[i], vmask).intoArray(r, i);
 527             }
 528         }
 529 
 530         bh.consume(r);
 531     }
 532 
 533 
 534 
 535 
 536 
 537 
 538 
 539     @Benchmark
 540     public void shiftArithmeticRightShift(Blackhole bh) {
 541         byte[] a = fa.apply(SPECIES.length());
 542         byte[] b = fb.apply(SPECIES.length());
 543         byte[] r = fr.apply(SPECIES.length());
 544 
 545         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 546             for (int i = 0; i < a.length; i += SPECIES.length()) {
 547                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 548                 av.shiftArithmeticRight((int)b[i]).intoArray(r, i);
 549             }
 550         }
 551 
 552         bh.consume(r);
 553     }
 554 
 555 
 556 
 557     @Benchmark
 558     public void shiftArithmeticRightMaskedShift(Blackhole bh) {
 559         byte[] a = fa.apply(SPECIES.length());
 560         byte[] b = fb.apply(SPECIES.length());
 561         byte[] r = fr.apply(SPECIES.length());
 562         boolean[] mask = fm.apply(SPECIES.length());
 563         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 564 
 565         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 566             for (int i = 0; i < a.length; i += SPECIES.length()) {
 567                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 568                 av.shiftArithmeticRight((int)b[i], vmask).intoArray(r, i);
 569             }
 570         }
 571 
 572         bh.consume(r);
 573     }
 574 
 575 
 576 
 577 
 578     @Benchmark
 579     public void max(Blackhole bh) {
 580         byte[] a = fa.apply(SPECIES.length());
 581         byte[] b = fb.apply(SPECIES.length());
 582         byte[] r = fr.apply(SPECIES.length());
 583 
 584         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 585             for (int i = 0; i < a.length; i += SPECIES.length()) {
 586                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 587                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 588                 av.max(bv).intoArray(r, i);
 589             }
 590         }
 591 
 592         bh.consume(r);
 593     }
 594 
 595     @Benchmark
 596     public void min(Blackhole bh) {
 597         byte[] a = fa.apply(SPECIES.length());
 598         byte[] b = fb.apply(SPECIES.length());
 599         byte[] r = fr.apply(SPECIES.length());
 600 
 601         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 602             for (int i = 0; i < a.length; i += SPECIES.length()) {
 603                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 604                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 605                 av.min(bv).intoArray(r, i);
 606             }
 607         }
 608 
 609         bh.consume(r);
 610     }
 611 
 612 
 613     @Benchmark
 614     public void andLanes(Blackhole bh) {
 615         byte[] a = fa.apply(SPECIES.length());
 616         byte ra = -1;
 617 
 618         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 619             ra = -1;
 620             for (int i = 0; i < a.length; i += SPECIES.length()) {
 621                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 622                 ra &= av.andLanes();
 623             }
 624         }
 625         bh.consume(ra);
 626     }
 627 
 628 
 629 
 630     @Benchmark
 631     public void orLanes(Blackhole bh) {
 632         byte[] a = fa.apply(SPECIES.length());
 633         byte ra = 0;
 634 
 635         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 636             ra = 0;
 637             for (int i = 0; i < a.length; i += SPECIES.length()) {
 638                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 639                 ra |= av.orLanes();
 640             }
 641         }
 642         bh.consume(ra);
 643     }
 644 
 645 
 646 
 647     @Benchmark
 648     public void xorLanes(Blackhole bh) {
 649         byte[] a = fa.apply(SPECIES.length());
 650         byte ra = 0;
 651 
 652         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 653             ra = 0;
 654             for (int i = 0; i < a.length; i += SPECIES.length()) {
 655                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 656                 ra ^= av.xorLanes();
 657             }
 658         }
 659         bh.consume(ra);
 660     }
 661 
 662 
 663     @Benchmark
 664     public void addLanes(Blackhole bh) {
 665         byte[] a = fa.apply(SPECIES.length());
 666         byte ra = 0;
 667 
 668         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 669             ra = 0;
 670             for (int i = 0; i < a.length; i += SPECIES.length()) {
 671                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 672                 ra += av.addLanes();
 673             }
 674         }
 675         bh.consume(ra);
 676     }
 677 
 678     @Benchmark
 679     public void mulLanes(Blackhole bh) {
 680         byte[] a = fa.apply(SPECIES.length());
 681         byte ra = 1;
 682 
 683         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 684             ra = 1;
 685             for (int i = 0; i < a.length; i += SPECIES.length()) {
 686                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 687                 ra *= av.mulLanes();
 688             }
 689         }
 690         bh.consume(ra);
 691     }
 692 
 693     @Benchmark
 694     public void minLanes(Blackhole bh) {
 695         byte[] a = fa.apply(SPECIES.length());
 696         byte ra = Byte.MAX_VALUE;
 697 
 698         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 699             ra = Byte.MAX_VALUE;
 700             for (int i = 0; i < a.length; i += SPECIES.length()) {
 701                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 702                 ra = (byte)Math.min(ra, av.minLanes());
 703             }
 704         }
 705         bh.consume(ra);
 706     }
 707 
 708     @Benchmark
 709     public void maxLanes(Blackhole bh) {
 710         byte[] a = fa.apply(SPECIES.length());
 711         byte ra = Byte.MIN_VALUE;
 712 
 713         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 714             ra = Byte.MIN_VALUE;
 715             for (int i = 0; i < a.length; i += SPECIES.length()) {
 716                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 717                 ra = (byte)Math.max(ra, av.maxLanes());
 718             }
 719         }
 720         bh.consume(ra);
 721     }
 722 
 723 
 724     @Benchmark
 725     public void anyTrue(Blackhole bh) {
 726         boolean[] mask = fm.apply(SPECIES.length());
 727         boolean[] r = fmr.apply(SPECIES.length());
 728 
 729         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 730             for (int i = 0; i < mask.length; i += SPECIES.length()) {
 731                 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i);
 732                 r[i] = vmask.anyTrue();
 733             }
 734         }
 735 
 736         bh.consume(r);
 737     }
 738 
 739 
 740 
 741     @Benchmark
 742     public void allTrue(Blackhole bh) {
 743         boolean[] mask = fm.apply(SPECIES.length());
 744         boolean[] r = fmr.apply(SPECIES.length());
 745 
 746         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 747             for (int i = 0; i < mask.length; i += SPECIES.length()) {
 748                 VectorMask<Byte> vmask = VectorMask.fromArray(SPECIES, mask, i);
 749                 r[i] = vmask.allTrue();
 750             }
 751         }
 752 
 753         bh.consume(r);
 754     }
 755 
 756 
 757     @Benchmark
 758     public void with(Blackhole bh) {
 759         byte[] a = fa.apply(SPECIES.length());
 760         byte[] r = fr.apply(SPECIES.length());
 761 
 762         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 763             for (int i = 0; i < a.length; i += SPECIES.length()) {
 764                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 765                 av.with(0, (byte)4).intoArray(r, i);
 766             }
 767         }
 768 
 769         bh.consume(r);
 770     }
 771 
 772     @Benchmark
 773     public Object lessThan() {
 774         byte[] a = fa.apply(size);
 775         byte[] b = fb.apply(size);
 776         boolean[] ms = fm.apply(size);
 777         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
 778 
 779         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 780             for (int i = 0; i < a.length; i += SPECIES.length()) {
 781                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 782                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 783                 VectorMask<Byte> mv = av.lessThan(bv);
 784 
 785                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 786             }
 787         }
 788         return m;
 789     }
 790 
 791 
 792     @Benchmark
 793     public Object greaterThan() {
 794         byte[] a = fa.apply(size);
 795         byte[] b = fb.apply(size);
 796         boolean[] ms = fm.apply(size);
 797         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
 798 
 799         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 800             for (int i = 0; i < a.length; i += SPECIES.length()) {
 801                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 802                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 803                 VectorMask<Byte> mv = av.greaterThan(bv);
 804 
 805                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 806             }
 807         }
 808         return m;
 809     }
 810 
 811 
 812     @Benchmark
 813     public Object equal() {
 814         byte[] a = fa.apply(size);
 815         byte[] b = fb.apply(size);
 816         boolean[] ms = fm.apply(size);
 817         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
 818 
 819         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 820             for (int i = 0; i < a.length; i += SPECIES.length()) {
 821                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 822                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 823                 VectorMask<Byte> mv = av.equal(bv);
 824 
 825                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 826             }
 827         }
 828         return m;
 829     }
 830 
 831 
 832     @Benchmark
 833     public Object notEqual() {
 834         byte[] a = fa.apply(size);
 835         byte[] b = fb.apply(size);
 836         boolean[] ms = fm.apply(size);
 837         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
 838 
 839         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 840             for (int i = 0; i < a.length; i += SPECIES.length()) {
 841                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 842                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 843                 VectorMask<Byte> mv = av.notEqual(bv);
 844 
 845                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 846             }
 847         }
 848         return m;
 849     }
 850 
 851 
 852     @Benchmark
 853     public Object lessThanEq() {
 854         byte[] a = fa.apply(size);
 855         byte[] b = fb.apply(size);
 856         boolean[] ms = fm.apply(size);
 857         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
 858 
 859         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 860             for (int i = 0; i < a.length; i += SPECIES.length()) {
 861                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 862                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 863                 VectorMask<Byte> mv = av.lessThanEq(bv);
 864 
 865                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 866             }
 867         }
 868         return m;
 869     }
 870 
 871 
 872     @Benchmark
 873     public Object greaterThanEq() {
 874         byte[] a = fa.apply(size);
 875         byte[] b = fb.apply(size);
 876         boolean[] ms = fm.apply(size);
 877         VectorMask<Byte> m = VectorMask.fromArray(SPECIES, ms, 0);
 878 
 879         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 880             for (int i = 0; i < a.length; i += SPECIES.length()) {
 881                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 882                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 883                 VectorMask<Byte> mv = av.greaterThanEq(bv);
 884 
 885                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 886             }
 887         }
 888         return m;
 889     }
 890 
 891 
 892     @Benchmark
 893     public void blend(Blackhole bh) {
 894         byte[] a = fa.apply(SPECIES.length());
 895         byte[] b = fb.apply(SPECIES.length());
 896         byte[] r = fr.apply(SPECIES.length());
 897         boolean[] mask = fm.apply(SPECIES.length());
 898         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
 899 
 900         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 901             for (int i = 0; i < a.length; i += SPECIES.length()) {
 902                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 903                 ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
 904                 av.blend(bv, vmask).intoArray(r, i);
 905             }
 906         }
 907 
 908         bh.consume(r);
 909     }
 910 
 911     @Benchmark
 912     public void rearrange(Blackhole bh) {
 913         byte[] a = fa.apply(SPECIES.length());
 914         int[] order = fs.apply(a.length, SPECIES.length());
 915         byte[] r = fr.apply(SPECIES.length());
 916 
 917         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 918             for (int i = 0; i < a.length; i += SPECIES.length()) {
 919                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 920                 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
 921             }
 922         }
 923 
 924         bh.consume(r);
 925     }
 926 
 927     @Benchmark
 928     public void extract(Blackhole bh) {
 929         byte[] a = fa.apply(SPECIES.length());
 930         byte[] r = fr.apply(SPECIES.length());
 931 
 932         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 933             for (int i = 0; i < a.length; i += SPECIES.length()) {
 934                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
 935                 int num_lanes = SPECIES.length();
 936                 // Manually unroll because full unroll happens after intrinsification.
 937                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 938                 if (num_lanes == 1) {
 939                     r[i]=av.lane(0);
 940                 } else if (num_lanes == 2) {
 941                     r[i]=av.lane(0);
 942                     r[i+1]=av.lane(1);
 943                 } else if (num_lanes == 4) {
 944                     r[i]=av.lane(0);
 945                     r[i+1]=av.lane(1);
 946                     r[i+2]=av.lane(2);
 947                     r[i+3]=av.lane(3);
 948                 } else if (num_lanes == 8) {
 949                     r[i]=av.lane(0);
 950                     r[i+1]=av.lane(1);
 951                     r[i+2]=av.lane(2);
 952                     r[i+3]=av.lane(3);
 953                     r[i+4]=av.lane(4);
 954                     r[i+5]=av.lane(5);
 955                     r[i+6]=av.lane(6);
 956                     r[i+7]=av.lane(7);
 957                 } else if (num_lanes == 16) {
 958                     r[i]=av.lane(0);
 959                     r[i+1]=av.lane(1);
 960                     r[i+2]=av.lane(2);
 961                     r[i+3]=av.lane(3);
 962                     r[i+4]=av.lane(4);
 963                     r[i+5]=av.lane(5);
 964                     r[i+6]=av.lane(6);
 965                     r[i+7]=av.lane(7);
 966                     r[i+8]=av.lane(8);
 967                     r[i+9]=av.lane(9);
 968                     r[i+10]=av.lane(10);
 969                     r[i+11]=av.lane(11);
 970                     r[i+12]=av.lane(12);
 971                     r[i+13]=av.lane(13);
 972                     r[i+14]=av.lane(14);
 973                     r[i+15]=av.lane(15);
 974                 } else if (num_lanes == 32) {
 975                     r[i]=av.lane(0);
 976                     r[i+1]=av.lane(1);
 977                     r[i+2]=av.lane(2);
 978                     r[i+3]=av.lane(3);
 979                     r[i+4]=av.lane(4);
 980                     r[i+5]=av.lane(5);
 981                     r[i+6]=av.lane(6);
 982                     r[i+7]=av.lane(7);
 983                     r[i+8]=av.lane(8);
 984                     r[i+9]=av.lane(9);
 985                     r[i+10]=av.lane(10);
 986                     r[i+11]=av.lane(11);
 987                     r[i+12]=av.lane(12);
 988                     r[i+13]=av.lane(13);
 989                     r[i+14]=av.lane(14);
 990                     r[i+15]=av.lane(15);
 991                     r[i+16]=av.lane(16);
 992                     r[i+17]=av.lane(17);
 993                     r[i+18]=av.lane(18);
 994                     r[i+19]=av.lane(19);
 995                     r[i+20]=av.lane(20);
 996                     r[i+21]=av.lane(21);
 997                     r[i+22]=av.lane(22);
 998                     r[i+23]=av.lane(23);
 999                     r[i+24]=av.lane(24);
1000                     r[i+25]=av.lane(25);
1001                     r[i+26]=av.lane(26);
1002                     r[i+27]=av.lane(27);
1003                     r[i+28]=av.lane(28);
1004                     r[i+29]=av.lane(29);
1005                     r[i+30]=av.lane(30);
1006                     r[i+31]=av.lane(31);
1007                 } else if (num_lanes == 64) {
1008                     r[i]=av.lane(0);
1009                     r[i+1]=av.lane(1);
1010                     r[i+2]=av.lane(2);
1011                     r[i+3]=av.lane(3);
1012                     r[i+4]=av.lane(4);
1013                     r[i+5]=av.lane(5);
1014                     r[i+6]=av.lane(6);
1015                     r[i+7]=av.lane(7);
1016                     r[i+8]=av.lane(8);
1017                     r[i+9]=av.lane(9);
1018                     r[i+10]=av.lane(10);
1019                     r[i+11]=av.lane(11);
1020                     r[i+12]=av.lane(12);
1021                     r[i+13]=av.lane(13);
1022                     r[i+14]=av.lane(14);
1023                     r[i+15]=av.lane(15);
1024                     r[i+16]=av.lane(16);
1025                     r[i+17]=av.lane(17);
1026                     r[i+18]=av.lane(18);
1027                     r[i+19]=av.lane(19);
1028                     r[i+20]=av.lane(20);
1029                     r[i+21]=av.lane(21);
1030                     r[i+22]=av.lane(22);
1031                     r[i+23]=av.lane(23);
1032                     r[i+24]=av.lane(24);
1033                     r[i+25]=av.lane(25);
1034                     r[i+26]=av.lane(26);
1035                     r[i+27]=av.lane(27);
1036                     r[i+28]=av.lane(28);
1037                     r[i+29]=av.lane(29);
1038                     r[i+30]=av.lane(30);
1039                     r[i+31]=av.lane(31);
1040                     r[i+32]=av.lane(32);
1041                     r[i+33]=av.lane(33);
1042                     r[i+34]=av.lane(34);
1043                     r[i+35]=av.lane(35);
1044                     r[i+36]=av.lane(36);
1045                     r[i+37]=av.lane(37);
1046                     r[i+38]=av.lane(38);
1047                     r[i+39]=av.lane(39);
1048                     r[i+40]=av.lane(40);
1049                     r[i+41]=av.lane(41);
1050                     r[i+42]=av.lane(42);
1051                     r[i+43]=av.lane(43);
1052                     r[i+44]=av.lane(44);
1053                     r[i+45]=av.lane(45);
1054                     r[i+46]=av.lane(46);
1055                     r[i+47]=av.lane(47);
1056                     r[i+48]=av.lane(48);
1057                     r[i+49]=av.lane(49);
1058                     r[i+50]=av.lane(50);
1059                     r[i+51]=av.lane(51);
1060                     r[i+52]=av.lane(52);
1061                     r[i+53]=av.lane(53);
1062                     r[i+54]=av.lane(54);
1063                     r[i+55]=av.lane(55);
1064                     r[i+56]=av.lane(56);
1065                     r[i+57]=av.lane(57);
1066                     r[i+58]=av.lane(58);
1067                     r[i+59]=av.lane(59);
1068                     r[i+60]=av.lane(60);
1069                     r[i+61]=av.lane(61);
1070                     r[i+62]=av.lane(62);
1071                     r[i+63]=av.lane(63);
1072                 } else {
1073                     for (int j = 0; j < SPECIES.length(); j++) {
1074                         r[i+j]=av.lane(j);
1075                     }
1076                 }
1077             }
1078         }
1079 
1080         bh.consume(r);
1081     }
1082 
1083 
1084 
1085 
1086 
1087 
1088 
1089 
1090 
1091 
1092 
1093 
1094 
1095 
1096 
1097 
1098 
1099 
1100 
1101 
1102 
1103     @Benchmark
1104     public void neg(Blackhole bh) {
1105         byte[] a = fa.apply(SPECIES.length());
1106         byte[] r = fr.apply(SPECIES.length());
1107 
1108         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1109             for (int i = 0; i < a.length; i += SPECIES.length()) {
1110                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1111                 av.neg().intoArray(r, i);
1112             }
1113         }
1114 
1115         bh.consume(r);
1116     }
1117 
1118     @Benchmark
1119     public void negMasked(Blackhole bh) {
1120         byte[] a = fa.apply(SPECIES.length());
1121         byte[] r = fr.apply(SPECIES.length());
1122         boolean[] mask = fm.apply(SPECIES.length());
1123         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
1124 
1125         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1126             for (int i = 0; i < a.length; i += SPECIES.length()) {
1127                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1128                 av.neg(vmask).intoArray(r, i);
1129             }
1130         }
1131 
1132         bh.consume(r);
1133     }
1134 
1135     @Benchmark
1136     public void abs(Blackhole bh) {
1137         byte[] a = fa.apply(SPECIES.length());
1138         byte[] r = fr.apply(SPECIES.length());
1139 
1140         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1141             for (int i = 0; i < a.length; i += SPECIES.length()) {
1142                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1143                 av.abs().intoArray(r, i);
1144             }
1145         }
1146 
1147         bh.consume(r);
1148     }
1149 
1150     @Benchmark
1151     public void absMasked(Blackhole bh) {
1152         byte[] a = fa.apply(SPECIES.length());
1153         byte[] r = fr.apply(SPECIES.length());
1154         boolean[] mask = fm.apply(SPECIES.length());
1155         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
1156 
1157         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1158             for (int i = 0; i < a.length; i += SPECIES.length()) {
1159                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1160                 av.abs(vmask).intoArray(r, i);
1161             }
1162         }
1163 
1164         bh.consume(r);
1165     }
1166 
1167 
1168     @Benchmark
1169     public void not(Blackhole bh) {
1170         byte[] a = fa.apply(SPECIES.length());
1171         byte[] r = fr.apply(SPECIES.length());
1172 
1173         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1174             for (int i = 0; i < a.length; i += SPECIES.length()) {
1175                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1176                 av.not().intoArray(r, i);
1177             }
1178         }
1179 
1180         bh.consume(r);
1181     }
1182 
1183 
1184 
1185     @Benchmark
1186     public void notMasked(Blackhole bh) {
1187         byte[] a = fa.apply(SPECIES.length());
1188         byte[] r = fr.apply(SPECIES.length());
1189         boolean[] mask = fm.apply(SPECIES.length());
1190         VectorMask<Byte> vmask = VectorMask.fromValues(SPECIES, mask);
1191 
1192         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1193             for (int i = 0; i < a.length; i += SPECIES.length()) {
1194                 ByteVector av = ByteVector.fromArray(SPECIES, a, i);
1195                 av.not(vmask).intoArray(r, i);
1196             }
1197         }
1198 
1199         bh.consume(r);
1200     }
1201 
1202 
1203 
1204 
1205 
1206 }
1207