1 /*
   2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.VectorShape;
  28 import jdk.incubator.vector.VectorSpecies;
  29 import jdk.incubator.vector.VectorShuffle;
  30 import jdk.incubator.vector.ShortVector;
  31 
  32 import java.util.concurrent.TimeUnit;
  33 import java.util.function.BiFunction;
  34 import java.util.function.IntFunction;
  35 
  36 import org.openjdk.jmh.annotations.*;
  37 import org.openjdk.jmh.infra.Blackhole;
  38 
  39 @BenchmarkMode(Mode.Throughput)
  40 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  41 @State(Scope.Benchmark)
  42 @Warmup(iterations = 3, time = 1)
  43 @Measurement(iterations = 5, time = 1)
  44 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  45 public class ShortMaxVector extends AbstractVectorBenchmark {
  46     static final VectorSpecies<Short> SPECIES = ShortVector.SPECIES_MAX;
  47 
  48     static final int INVOC_COUNT = 1; // get rid of outer loop
  49 
  50     @Param("1024")
  51     int size;
  52 
  53     short[] fill(IntFunction<Short> f) {
  54         short[] array = new short[size];
  55         for (int i = 0; i < array.length; i++) {
  56             array[i] = f.apply(i);
  57         }
  58         return array;
  59     }
  60 
  61     short[] a, b, c, r;
  62     boolean[] m, rm;
  63     int[] s;
  64 
  65     @Setup
  66     public void init() {
  67         size += size % SPECIES.length(); // FIXME: add post-loops
  68 
  69         a = fill(i -> (short)(2*i));
  70         b = fill(i -> (short)(i+1));
  71         c = fill(i -> (short)(i+5));
  72         r = fill(i -> (short)0);
  73 
  74         m = fillMask(size, i -> (i % 2) == 0);
  75         rm = fillMask(size, i -> false);
  76 
  77         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  78     }
  79 
  80     final IntFunction<short[]> fa = vl -> a;
  81     final IntFunction<short[]> fb = vl -> b;
  82     final IntFunction<short[]> fc = vl -> c;
  83     final IntFunction<short[]> fr = vl -> r;
  84     final IntFunction<boolean[]> fm = vl -> m;
  85     final IntFunction<boolean[]> fmr = vl -> rm;
  86     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  87 
  88 
  89     @Benchmark
  90     public void add(Blackhole bh) {
  91         short[] a = fa.apply(SPECIES.length());
  92         short[] b = fb.apply(SPECIES.length());
  93         short[] r = fr.apply(SPECIES.length());
  94 
  95         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  96             for (int i = 0; i < a.length; i += SPECIES.length()) {
  97                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
  98                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
  99                 av.add(bv).intoArray(r, i);
 100             }
 101         }
 102 
 103         bh.consume(r);
 104     }
 105 
 106     @Benchmark
 107     public void addMasked(Blackhole bh) {
 108         short[] a = fa.apply(SPECIES.length());
 109         short[] b = fb.apply(SPECIES.length());
 110         short[] r = fr.apply(SPECIES.length());
 111         boolean[] mask = fm.apply(SPECIES.length());
 112         VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask);
 113 
 114         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 115             for (int i = 0; i < a.length; i += SPECIES.length()) {
 116                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 117                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 118                 av.add(bv, vmask).intoArray(r, i);
 119             }
 120         }
 121 
 122         bh.consume(r);
 123     }
 124 
 125     @Benchmark
 126     public void sub(Blackhole bh) {
 127         short[] a = fa.apply(SPECIES.length());
 128         short[] b = fb.apply(SPECIES.length());
 129         short[] r = fr.apply(SPECIES.length());
 130 
 131         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 132             for (int i = 0; i < a.length; i += SPECIES.length()) {
 133                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 134                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 135                 av.sub(bv).intoArray(r, i);
 136             }
 137         }
 138 
 139         bh.consume(r);
 140     }
 141 
 142     @Benchmark
 143     public void subMasked(Blackhole bh) {
 144         short[] a = fa.apply(SPECIES.length());
 145         short[] b = fb.apply(SPECIES.length());
 146         short[] r = fr.apply(SPECIES.length());
 147         boolean[] mask = fm.apply(SPECIES.length());
 148         VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask);
 149 
 150         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 151             for (int i = 0; i < a.length; i += SPECIES.length()) {
 152                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 153                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 154                 av.sub(bv, vmask).intoArray(r, i);
 155             }
 156         }
 157 
 158         bh.consume(r);
 159     }
 160 
 161 
 162 
 163     @Benchmark
 164     public void mul(Blackhole bh) {
 165         short[] a = fa.apply(SPECIES.length());
 166         short[] b = fb.apply(SPECIES.length());
 167         short[] r = fr.apply(SPECIES.length());
 168 
 169         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 170             for (int i = 0; i < a.length; i += SPECIES.length()) {
 171                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 172                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 173                 av.mul(bv).intoArray(r, i);
 174             }
 175         }
 176 
 177         bh.consume(r);
 178     }
 179 
 180     @Benchmark
 181     public void mulMasked(Blackhole bh) {
 182         short[] a = fa.apply(SPECIES.length());
 183         short[] b = fb.apply(SPECIES.length());
 184         short[] r = fr.apply(SPECIES.length());
 185         boolean[] mask = fm.apply(SPECIES.length());
 186         VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask);
 187 
 188         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 189             for (int i = 0; i < a.length; i += SPECIES.length()) {
 190                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 191                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 192                 av.mul(bv, vmask).intoArray(r, i);
 193             }
 194         }
 195 
 196         bh.consume(r);
 197     }
 198 
 199 
 200     @Benchmark
 201     public void and(Blackhole bh) {
 202         short[] a = fa.apply(SPECIES.length());
 203         short[] b = fb.apply(SPECIES.length());
 204         short[] r = fr.apply(SPECIES.length());
 205 
 206         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 207             for (int i = 0; i < a.length; i += SPECIES.length()) {
 208                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 209                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 210                 av.and(bv).intoArray(r, i);
 211             }
 212         }
 213 
 214         bh.consume(r);
 215     }
 216 
 217 
 218 
 219     @Benchmark
 220     public void andMasked(Blackhole bh) {
 221         short[] a = fa.apply(SPECIES.length());
 222         short[] b = fb.apply(SPECIES.length());
 223         short[] r = fr.apply(SPECIES.length());
 224         boolean[] mask = fm.apply(SPECIES.length());
 225         VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask);
 226 
 227         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 228             for (int i = 0; i < a.length; i += SPECIES.length()) {
 229                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 230                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 231                 av.and(bv, vmask).intoArray(r, i);
 232             }
 233         }
 234 
 235         bh.consume(r);
 236     }
 237 
 238 
 239 
 240     @Benchmark
 241     public void or(Blackhole bh) {
 242         short[] a = fa.apply(SPECIES.length());
 243         short[] b = fb.apply(SPECIES.length());
 244         short[] r = fr.apply(SPECIES.length());
 245 
 246         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 247             for (int i = 0; i < a.length; i += SPECIES.length()) {
 248                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 249                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 250                 av.or(bv).intoArray(r, i);
 251             }
 252         }
 253 
 254         bh.consume(r);
 255     }
 256 
 257 
 258 
 259     @Benchmark
 260     public void orMasked(Blackhole bh) {
 261         short[] a = fa.apply(SPECIES.length());
 262         short[] b = fb.apply(SPECIES.length());
 263         short[] r = fr.apply(SPECIES.length());
 264         boolean[] mask = fm.apply(SPECIES.length());
 265         VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask);
 266 
 267         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 268             for (int i = 0; i < a.length; i += SPECIES.length()) {
 269                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 270                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 271                 av.or(bv, vmask).intoArray(r, i);
 272             }
 273         }
 274 
 275         bh.consume(r);
 276     }
 277 
 278 
 279 
 280     @Benchmark
 281     public void xor(Blackhole bh) {
 282         short[] a = fa.apply(SPECIES.length());
 283         short[] b = fb.apply(SPECIES.length());
 284         short[] r = fr.apply(SPECIES.length());
 285 
 286         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 287             for (int i = 0; i < a.length; i += SPECIES.length()) {
 288                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 289                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 290                 av.xor(bv).intoArray(r, i);
 291             }
 292         }
 293 
 294         bh.consume(r);
 295     }
 296 
 297 
 298 
 299     @Benchmark
 300     public void xorMasked(Blackhole bh) {
 301         short[] a = fa.apply(SPECIES.length());
 302         short[] b = fb.apply(SPECIES.length());
 303         short[] r = fr.apply(SPECIES.length());
 304         boolean[] mask = fm.apply(SPECIES.length());
 305         VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask);
 306 
 307         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 308             for (int i = 0; i < a.length; i += SPECIES.length()) {
 309                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 310                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 311                 av.xor(bv, vmask).intoArray(r, i);
 312             }
 313         }
 314 
 315         bh.consume(r);
 316     }
 317 
 318 
 319 
 320 
 321 
 322 
 323 
 324 
 325 
 326 
 327 
 328 
 329 
 330 
 331 
 332 
 333 
 334 
 335 
 336 
 337 
 338     @Benchmark
 339     public void aShiftRShift(Blackhole bh) {
 340         short[] a = fa.apply(SPECIES.length());
 341         short[] b = fb.apply(SPECIES.length());
 342         short[] r = fr.apply(SPECIES.length());
 343 
 344         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 345             for (int i = 0; i < a.length; i += SPECIES.length()) {
 346                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 347                 av.aShiftR((int)b[i]).intoArray(r, i);
 348             }
 349         }
 350 
 351         bh.consume(r);
 352     }
 353 
 354 
 355 
 356     @Benchmark
 357     public void aShiftRMaskedShift(Blackhole bh) {
 358         short[] a = fa.apply(SPECIES.length());
 359         short[] b = fb.apply(SPECIES.length());
 360         short[] r = fr.apply(SPECIES.length());
 361         boolean[] mask = fm.apply(SPECIES.length());
 362         VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask);
 363 
 364         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 365             for (int i = 0; i < a.length; i += SPECIES.length()) {
 366                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 367                 av.aShiftR((int)b[i], vmask).intoArray(r, i);
 368             }
 369         }
 370 
 371         bh.consume(r);
 372     }
 373 
 374 
 375 
 376     @Benchmark
 377     public void shiftLShift(Blackhole bh) {
 378         short[] a = fa.apply(SPECIES.length());
 379         short[] b = fb.apply(SPECIES.length());
 380         short[] r = fr.apply(SPECIES.length());
 381 
 382         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 383             for (int i = 0; i < a.length; i += SPECIES.length()) {
 384                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 385                 av.shiftL((int)b[i]).intoArray(r, i);
 386             }
 387         }
 388 
 389         bh.consume(r);
 390     }
 391 
 392 
 393 
 394     @Benchmark
 395     public void shiftLMaskedShift(Blackhole bh) {
 396         short[] a = fa.apply(SPECIES.length());
 397         short[] b = fb.apply(SPECIES.length());
 398         short[] r = fr.apply(SPECIES.length());
 399         boolean[] mask = fm.apply(SPECIES.length());
 400         VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask);
 401 
 402         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 403             for (int i = 0; i < a.length; i += SPECIES.length()) {
 404                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 405                 av.shiftL((int)b[i], vmask).intoArray(r, i);
 406             }
 407         }
 408 
 409         bh.consume(r);
 410     }
 411 
 412 
 413 
 414     @Benchmark
 415     public void shiftRShift(Blackhole bh) {
 416         short[] a = fa.apply(SPECIES.length());
 417         short[] b = fb.apply(SPECIES.length());
 418         short[] r = fr.apply(SPECIES.length());
 419 
 420         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 421             for (int i = 0; i < a.length; i += SPECIES.length()) {
 422                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 423                 av.shiftR((int)b[i]).intoArray(r, i);
 424             }
 425         }
 426 
 427         bh.consume(r);
 428     }
 429 
 430 
 431 
 432     @Benchmark
 433     public void shiftRMaskedShift(Blackhole bh) {
 434         short[] a = fa.apply(SPECIES.length());
 435         short[] b = fb.apply(SPECIES.length());
 436         short[] r = fr.apply(SPECIES.length());
 437         boolean[] mask = fm.apply(SPECIES.length());
 438         VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask);
 439 
 440         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 441             for (int i = 0; i < a.length; i += SPECIES.length()) {
 442                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 443                 av.shiftR((int)b[i], vmask).intoArray(r, i);
 444             }
 445         }
 446 
 447         bh.consume(r);
 448     }
 449 
 450 
 451     @Benchmark
 452     public void max(Blackhole bh) {
 453         short[] a = fa.apply(SPECIES.length());
 454         short[] b = fb.apply(SPECIES.length());
 455         short[] r = fr.apply(SPECIES.length());
 456 
 457         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 458             for (int i = 0; i < a.length; i += SPECIES.length()) {
 459                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 460                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 461                 av.max(bv).intoArray(r, i);
 462             }
 463         }
 464 
 465         bh.consume(r);
 466     }
 467 
 468     @Benchmark
 469     public void min(Blackhole bh) {
 470         short[] a = fa.apply(SPECIES.length());
 471         short[] b = fb.apply(SPECIES.length());
 472         short[] r = fr.apply(SPECIES.length());
 473 
 474         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 475             for (int i = 0; i < a.length; i += SPECIES.length()) {
 476                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 477                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 478                 av.min(bv).intoArray(r, i);
 479             }
 480         }
 481 
 482         bh.consume(r);
 483     }
 484 
 485 
 486     @Benchmark
 487     public void andAll(Blackhole bh) {
 488         short[] a = fa.apply(SPECIES.length());
 489         short ra = -1;
 490 
 491         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 492             ra = -1;
 493             for (int i = 0; i < a.length; i += SPECIES.length()) {
 494                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 495                 ra &= av.andAll();
 496             }
 497         }
 498         bh.consume(ra);
 499     }
 500 
 501 
 502 
 503     @Benchmark
 504     public void orAll(Blackhole bh) {
 505         short[] a = fa.apply(SPECIES.length());
 506         short ra = 0;
 507 
 508         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 509             ra = 0;
 510             for (int i = 0; i < a.length; i += SPECIES.length()) {
 511                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 512                 ra |= av.orAll();
 513             }
 514         }
 515         bh.consume(ra);
 516     }
 517 
 518 
 519 
 520     @Benchmark
 521     public void xorAll(Blackhole bh) {
 522         short[] a = fa.apply(SPECIES.length());
 523         short ra = 0;
 524 
 525         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 526             ra = 0;
 527             for (int i = 0; i < a.length; i += SPECIES.length()) {
 528                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 529                 ra ^= av.xorAll();
 530             }
 531         }
 532         bh.consume(ra);
 533     }
 534 
 535 
 536     @Benchmark
 537     public void addAll(Blackhole bh) {
 538         short[] a = fa.apply(SPECIES.length());
 539         short ra = 0;
 540 
 541         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 542             ra = 0;
 543             for (int i = 0; i < a.length; i += SPECIES.length()) {
 544                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 545                 ra += av.addAll();
 546             }
 547         }
 548         bh.consume(ra);
 549     }
 550 
 551     @Benchmark
 552     public void mulAll(Blackhole bh) {
 553         short[] a = fa.apply(SPECIES.length());
 554         short ra = 1;
 555 
 556         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 557             ra = 1;
 558             for (int i = 0; i < a.length; i += SPECIES.length()) {
 559                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 560                 ra *= av.mulAll();
 561             }
 562         }
 563         bh.consume(ra);
 564     }
 565 
 566     @Benchmark
 567     public void minAll(Blackhole bh) {
 568         short[] a = fa.apply(SPECIES.length());
 569         short ra = Short.MAX_VALUE;
 570 
 571         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 572             ra = Short.MAX_VALUE;
 573             for (int i = 0; i < a.length; i += SPECIES.length()) {
 574                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 575                 ra = (short)Math.min(ra, av.minAll());
 576             }
 577         }
 578         bh.consume(ra);
 579     }
 580 
 581     @Benchmark
 582     public void maxAll(Blackhole bh) {
 583         short[] a = fa.apply(SPECIES.length());
 584         short ra = Short.MIN_VALUE;
 585 
 586         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 587             ra = Short.MIN_VALUE;
 588             for (int i = 0; i < a.length; i += SPECIES.length()) {
 589                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 590                 ra = (short)Math.max(ra, av.maxAll());
 591             }
 592         }
 593         bh.consume(ra);
 594     }
 595 
 596 
 597     @Benchmark
 598     public void anyTrue(Blackhole bh) {
 599         boolean[] mask = fm.apply(SPECIES.length());
 600         boolean[] r = fmr.apply(SPECIES.length());
 601 
 602         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 603             for (int i = 0; i < mask.length; i += SPECIES.length()) {
 604                 VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, i);
 605                 r[i] = vmask.anyTrue();
 606             }
 607         }
 608 
 609         bh.consume(r);
 610     }
 611 
 612 
 613 
 614     @Benchmark
 615     public void allTrue(Blackhole bh) {
 616         boolean[] mask = fm.apply(SPECIES.length());
 617         boolean[] r = fmr.apply(SPECIES.length());
 618 
 619         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 620             for (int i = 0; i < mask.length; i += SPECIES.length()) {
 621                 VectorMask<Short> vmask = VectorMask.fromArray(SPECIES, mask, i);
 622                 r[i] = vmask.allTrue();
 623             }
 624         }
 625 
 626         bh.consume(r);
 627     }
 628 
 629 
 630     @Benchmark
 631     public void with(Blackhole bh) {
 632         short[] a = fa.apply(SPECIES.length());
 633         short[] r = fr.apply(SPECIES.length());
 634 
 635         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 636             for (int i = 0; i < a.length; i += SPECIES.length()) {
 637                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 638                 av.with(0, (short)4).intoArray(r, i);
 639             }
 640         }
 641 
 642         bh.consume(r);
 643     }
 644 
 645     @Benchmark
 646     public Object lessThan() {
 647         short[] a = fa.apply(size);
 648         short[] b = fb.apply(size);
 649         boolean[] ms = fm.apply(size);
 650         VectorMask<Short> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 651 
 652         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 653             for (int i = 0; i < a.length; i += SPECIES.length()) {
 654                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 655                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 656                 VectorMask<Short> mv = av.lessThan(bv);
 657 
 658                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 659             }
 660         }
 661         return m;
 662     }
 663 
 664 
 665     @Benchmark
 666     public Object greaterThan() {
 667         short[] a = fa.apply(size);
 668         short[] b = fb.apply(size);
 669         boolean[] ms = fm.apply(size);
 670         VectorMask<Short> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 671 
 672         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 673             for (int i = 0; i < a.length; i += SPECIES.length()) {
 674                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 675                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 676                 VectorMask<Short> mv = av.greaterThan(bv);
 677 
 678                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 679             }
 680         }
 681         return m;
 682     }
 683 
 684 
 685     @Benchmark
 686     public Object equal() {
 687         short[] a = fa.apply(size);
 688         short[] b = fb.apply(size);
 689         boolean[] ms = fm.apply(size);
 690         VectorMask<Short> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 691 
 692         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 693             for (int i = 0; i < a.length; i += SPECIES.length()) {
 694                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 695                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 696                 VectorMask<Short> mv = av.equal(bv);
 697 
 698                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 699             }
 700         }
 701         return m;
 702     }
 703 
 704 
 705     @Benchmark
 706     public Object notEqual() {
 707         short[] a = fa.apply(size);
 708         short[] b = fb.apply(size);
 709         boolean[] ms = fm.apply(size);
 710         VectorMask<Short> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 711 
 712         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 713             for (int i = 0; i < a.length; i += SPECIES.length()) {
 714                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 715                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 716                 VectorMask<Short> mv = av.notEqual(bv);
 717 
 718                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 719             }
 720         }
 721         return m;
 722     }
 723 
 724 
 725     @Benchmark
 726     public Object lessThanEq() {
 727         short[] a = fa.apply(size);
 728         short[] b = fb.apply(size);
 729         boolean[] ms = fm.apply(size);
 730         VectorMask<Short> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 731 
 732         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 733             for (int i = 0; i < a.length; i += SPECIES.length()) {
 734                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 735                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 736                 VectorMask<Short> mv = av.lessThanEq(bv);
 737 
 738                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 739             }
 740         }
 741         return m;
 742     }
 743 
 744 
 745     @Benchmark
 746     public Object greaterThanEq() {
 747         short[] a = fa.apply(size);
 748         short[] b = fb.apply(size);
 749         boolean[] ms = fm.apply(size);
 750         VectorMask<Short> m = VectorMask.maskFromArray(SPECIES, ms, 0);
 751 
 752         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 753             for (int i = 0; i < a.length; i += SPECIES.length()) {
 754                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 755                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 756                 VectorMask<Short> mv = av.greaterThanEq(bv);
 757 
 758                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 759             }
 760         }
 761         return m;
 762     }
 763 
 764 
 765     @Benchmark
 766     public void blend(Blackhole bh) {
 767         short[] a = fa.apply(SPECIES.length());
 768         short[] b = fb.apply(SPECIES.length());
 769         short[] r = fr.apply(SPECIES.length());
 770         boolean[] mask = fm.apply(SPECIES.length());
 771         VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask);
 772 
 773         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 774             for (int i = 0; i < a.length; i += SPECIES.length()) {
 775                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 776                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 777                 av.blend(bv, vmask).intoArray(r, i);
 778             }
 779         }
 780 
 781         bh.consume(r);
 782     }
 783 
 784     @Benchmark
 785     public void rearrange(Blackhole bh) {
 786         short[] a = fa.apply(SPECIES.length());
 787         int[] order = fs.apply(a.length, SPECIES.length());
 788         short[] r = fr.apply(SPECIES.length());
 789 
 790         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 791             for (int i = 0; i < a.length; i += SPECIES.length()) {
 792                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 793                 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
 794             }
 795         }
 796 
 797         bh.consume(r);
 798     }
 799 
 800     @Benchmark
 801     public void extract(Blackhole bh) {
 802         short[] a = fa.apply(SPECIES.length());
 803         short[] r = fr.apply(SPECIES.length());
 804 
 805         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 806             for (int i = 0; i < a.length; i += SPECIES.length()) {
 807                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 808                 int num_lanes = SPECIES.length();
 809                 // Manually unroll because full unroll happens after intrinsification.
 810                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 811                 if (num_lanes == 1) {
 812                     r[i]=av.lane(0);
 813                 } else if (num_lanes == 2) {
 814                     r[i]=av.lane(0);
 815                     r[i+1]=av.lane(1);
 816                 } else if (num_lanes == 4) {
 817                     r[i]=av.lane(0);
 818                     r[i+1]=av.lane(1);
 819                     r[i+2]=av.lane(2);
 820                     r[i+3]=av.lane(3);
 821                 } else if (num_lanes == 8) {
 822                     r[i]=av.lane(0);
 823                     r[i+1]=av.lane(1);
 824                     r[i+2]=av.lane(2);
 825                     r[i+3]=av.lane(3);
 826                     r[i+4]=av.lane(4);
 827                     r[i+5]=av.lane(5);
 828                     r[i+6]=av.lane(6);
 829                     r[i+7]=av.lane(7);
 830                 } else if (num_lanes == 16) {
 831                     r[i]=av.lane(0);
 832                     r[i+1]=av.lane(1);
 833                     r[i+2]=av.lane(2);
 834                     r[i+3]=av.lane(3);
 835                     r[i+4]=av.lane(4);
 836                     r[i+5]=av.lane(5);
 837                     r[i+6]=av.lane(6);
 838                     r[i+7]=av.lane(7);
 839                     r[i+8]=av.lane(8);
 840                     r[i+9]=av.lane(9);
 841                     r[i+10]=av.lane(10);
 842                     r[i+11]=av.lane(11);
 843                     r[i+12]=av.lane(12);
 844                     r[i+13]=av.lane(13);
 845                     r[i+14]=av.lane(14);
 846                     r[i+15]=av.lane(15);
 847                 } else if (num_lanes == 32) {
 848                     r[i]=av.lane(0);
 849                     r[i+1]=av.lane(1);
 850                     r[i+2]=av.lane(2);
 851                     r[i+3]=av.lane(3);
 852                     r[i+4]=av.lane(4);
 853                     r[i+5]=av.lane(5);
 854                     r[i+6]=av.lane(6);
 855                     r[i+7]=av.lane(7);
 856                     r[i+8]=av.lane(8);
 857                     r[i+9]=av.lane(9);
 858                     r[i+10]=av.lane(10);
 859                     r[i+11]=av.lane(11);
 860                     r[i+12]=av.lane(12);
 861                     r[i+13]=av.lane(13);
 862                     r[i+14]=av.lane(14);
 863                     r[i+15]=av.lane(15);
 864                     r[i+16]=av.lane(16);
 865                     r[i+17]=av.lane(17);
 866                     r[i+18]=av.lane(18);
 867                     r[i+19]=av.lane(19);
 868                     r[i+20]=av.lane(20);
 869                     r[i+21]=av.lane(21);
 870                     r[i+22]=av.lane(22);
 871                     r[i+23]=av.lane(23);
 872                     r[i+24]=av.lane(24);
 873                     r[i+25]=av.lane(25);
 874                     r[i+26]=av.lane(26);
 875                     r[i+27]=av.lane(27);
 876                     r[i+28]=av.lane(28);
 877                     r[i+29]=av.lane(29);
 878                     r[i+30]=av.lane(30);
 879                     r[i+31]=av.lane(31);
 880                 } else if (num_lanes == 64) {
 881                     r[i]=av.lane(0);
 882                     r[i+1]=av.lane(1);
 883                     r[i+2]=av.lane(2);
 884                     r[i+3]=av.lane(3);
 885                     r[i+4]=av.lane(4);
 886                     r[i+5]=av.lane(5);
 887                     r[i+6]=av.lane(6);
 888                     r[i+7]=av.lane(7);
 889                     r[i+8]=av.lane(8);
 890                     r[i+9]=av.lane(9);
 891                     r[i+10]=av.lane(10);
 892                     r[i+11]=av.lane(11);
 893                     r[i+12]=av.lane(12);
 894                     r[i+13]=av.lane(13);
 895                     r[i+14]=av.lane(14);
 896                     r[i+15]=av.lane(15);
 897                     r[i+16]=av.lane(16);
 898                     r[i+17]=av.lane(17);
 899                     r[i+18]=av.lane(18);
 900                     r[i+19]=av.lane(19);
 901                     r[i+20]=av.lane(20);
 902                     r[i+21]=av.lane(21);
 903                     r[i+22]=av.lane(22);
 904                     r[i+23]=av.lane(23);
 905                     r[i+24]=av.lane(24);
 906                     r[i+25]=av.lane(25);
 907                     r[i+26]=av.lane(26);
 908                     r[i+27]=av.lane(27);
 909                     r[i+28]=av.lane(28);
 910                     r[i+29]=av.lane(29);
 911                     r[i+30]=av.lane(30);
 912                     r[i+31]=av.lane(31);
 913                     r[i+32]=av.lane(32);
 914                     r[i+33]=av.lane(33);
 915                     r[i+34]=av.lane(34);
 916                     r[i+35]=av.lane(35);
 917                     r[i+36]=av.lane(36);
 918                     r[i+37]=av.lane(37);
 919                     r[i+38]=av.lane(38);
 920                     r[i+39]=av.lane(39);
 921                     r[i+40]=av.lane(40);
 922                     r[i+41]=av.lane(41);
 923                     r[i+42]=av.lane(42);
 924                     r[i+43]=av.lane(43);
 925                     r[i+44]=av.lane(44);
 926                     r[i+45]=av.lane(45);
 927                     r[i+46]=av.lane(46);
 928                     r[i+47]=av.lane(47);
 929                     r[i+48]=av.lane(48);
 930                     r[i+49]=av.lane(49);
 931                     r[i+50]=av.lane(50);
 932                     r[i+51]=av.lane(51);
 933                     r[i+52]=av.lane(52);
 934                     r[i+53]=av.lane(53);
 935                     r[i+54]=av.lane(54);
 936                     r[i+55]=av.lane(55);
 937                     r[i+56]=av.lane(56);
 938                     r[i+57]=av.lane(57);
 939                     r[i+58]=av.lane(58);
 940                     r[i+59]=av.lane(59);
 941                     r[i+60]=av.lane(60);
 942                     r[i+61]=av.lane(61);
 943                     r[i+62]=av.lane(62);
 944                     r[i+63]=av.lane(63);
 945                 } else {
 946                     for (int j = 0; j < SPECIES.length(); j++) {
 947                         r[i+j]=av.lane(j);
 948                     }
 949                 }
 950             }
 951         }
 952 
 953         bh.consume(r);
 954     }
 955 
 956 
 957 
 958 
 959 
 960 
 961 
 962 
 963 
 964 
 965 
 966 
 967 
 968 
 969 
 970 
 971 
 972 
 973 
 974 
 975 
 976     @Benchmark
 977     public void neg(Blackhole bh) {
 978         short[] a = fa.apply(SPECIES.length());
 979         short[] r = fr.apply(SPECIES.length());
 980 
 981         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 982             for (int i = 0; i < a.length; i += SPECIES.length()) {
 983                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 984                 av.neg().intoArray(r, i);
 985             }
 986         }
 987 
 988         bh.consume(r);
 989     }
 990 
 991     @Benchmark
 992     public void negMasked(Blackhole bh) {
 993         short[] a = fa.apply(SPECIES.length());
 994         short[] r = fr.apply(SPECIES.length());
 995         boolean[] mask = fm.apply(SPECIES.length());
 996         VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask);
 997 
 998         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 999             for (int i = 0; i < a.length; i += SPECIES.length()) {
1000                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1001                 av.neg(vmask).intoArray(r, i);
1002             }
1003         }
1004 
1005         bh.consume(r);
1006     }
1007 
1008     @Benchmark
1009     public void abs(Blackhole bh) {
1010         short[] a = fa.apply(SPECIES.length());
1011         short[] r = fr.apply(SPECIES.length());
1012 
1013         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1014             for (int i = 0; i < a.length; i += SPECIES.length()) {
1015                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1016                 av.abs().intoArray(r, i);
1017             }
1018         }
1019 
1020         bh.consume(r);
1021     }
1022 
1023     @Benchmark
1024     public void absMasked(Blackhole bh) {
1025         short[] a = fa.apply(SPECIES.length());
1026         short[] r = fr.apply(SPECIES.length());
1027         boolean[] mask = fm.apply(SPECIES.length());
1028         VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask);
1029 
1030         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1031             for (int i = 0; i < a.length; i += SPECIES.length()) {
1032                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1033                 av.abs(vmask).intoArray(r, i);
1034             }
1035         }
1036 
1037         bh.consume(r);
1038     }
1039 
1040 
1041     @Benchmark
1042     public void not(Blackhole bh) {
1043         short[] a = fa.apply(SPECIES.length());
1044         short[] r = fr.apply(SPECIES.length());
1045 
1046         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1047             for (int i = 0; i < a.length; i += SPECIES.length()) {
1048                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1049                 av.not().intoArray(r, i);
1050             }
1051         }
1052 
1053         bh.consume(r);
1054     }
1055 
1056 
1057 
1058     @Benchmark
1059     public void notMasked(Blackhole bh) {
1060         short[] a = fa.apply(SPECIES.length());
1061         short[] r = fr.apply(SPECIES.length());
1062         boolean[] mask = fm.apply(SPECIES.length());
1063         VectorMask<Short> vmask = VectorMask.fromValues(SPECIES, mask);
1064 
1065         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1066             for (int i = 0; i < a.length; i += SPECIES.length()) {
1067                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1068                 av.not(vmask).intoArray(r, i);
1069             }
1070         }
1071 
1072         bh.consume(r);
1073     }
1074 
1075 
1076 
1077 
1078 
1079 }
1080