1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.Vector.Shape;
  28 import jdk.incubator.vector.ShortVector;
  29 
  30 import java.util.concurrent.TimeUnit;
  31 import java.util.function.BiFunction;
  32 import java.util.function.IntFunction;
  33 
  34 import org.openjdk.jmh.annotations.*;
  35 import org.openjdk.jmh.infra.Blackhole;
  36 
  37 @BenchmarkMode(Mode.Throughput)
  38 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  39 @State(Scope.Benchmark)
  40 @Warmup(iterations = 3, time = 1)
  41 @Measurement(iterations = 5, time = 1)
  42 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  43 public class Short128Vector extends AbstractVectorBenchmark {
  44     static final ShortVector.ShortSpecies SPECIES = ShortVector.species(Shape.S_128_BIT);
  45 
  46     static final int INVOC_COUNT = 1; // get rid of outer loop
  47 
  48     @Param("1024")
  49     int size;
  50 
  51     short[] fill(IntFunction<Short> f) {
  52         short[] array = new short[size];
  53         for (int i = 0; i < array.length; i++) {
  54             array[i] = f.apply(i);
  55         }
  56         return array;
  57     }
  58 
  59     short[] a, b, c, r;
  60     boolean[] m, rm;
  61     int[] s;
  62 
  63     @Setup
  64     public void init() {
  65         size += size % SPECIES.length(); // FIXME: add post-loops
  66 
  67         a = fill(i -> (short)(2*i));
  68         b = fill(i -> (short)(i+1));
  69         c = fill(i -> (short)(i+5));
  70         r = fill(i -> (short)0);
  71 
  72         m = fillMask(size, i -> (i % 2) == 0);
  73         rm = fillMask(size, i -> false);
  74 
  75         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  76     }
  77 
  78     final IntFunction<short[]> fa = vl -> a;
  79     final IntFunction<short[]> fb = vl -> b;
  80     final IntFunction<short[]> fc = vl -> c;
  81     final IntFunction<short[]> fr = vl -> r;
  82     final IntFunction<boolean[]> fm = vl -> m;
  83     final IntFunction<boolean[]> fmr = vl -> rm;
  84     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  85 
  86 
  87     @Benchmark
  88     public void add(Blackhole bh) {
  89         short[] a = fa.apply(SPECIES.length());
  90         short[] b = fb.apply(SPECIES.length());
  91         short[] r = fr.apply(SPECIES.length());
  92 
  93         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  94             for (int i = 0; i < a.length; i += SPECIES.length()) {
  95                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
  96                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
  97                 av.add(bv).intoArray(r, i);
  98             }
  99         }
 100 
 101         bh.consume(r);
 102     }
 103 
 104     @Benchmark
 105     public void addMasked(Blackhole bh) {
 106         short[] a = fa.apply(SPECIES.length());
 107         short[] b = fb.apply(SPECIES.length());
 108         short[] r = fr.apply(SPECIES.length());
 109         boolean[] mask = fm.apply(SPECIES.length());
 110         Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask);
 111 
 112         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 113             for (int i = 0; i < a.length; i += SPECIES.length()) {
 114                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 115                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 116                 av.add(bv, vmask).intoArray(r, i);
 117             }
 118         }
 119 
 120         bh.consume(r);
 121     }
 122 
 123     @Benchmark
 124     public void sub(Blackhole bh) {
 125         short[] a = fa.apply(SPECIES.length());
 126         short[] b = fb.apply(SPECIES.length());
 127         short[] r = fr.apply(SPECIES.length());
 128 
 129         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 130             for (int i = 0; i < a.length; i += SPECIES.length()) {
 131                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 132                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 133                 av.sub(bv).intoArray(r, i);
 134             }
 135         }
 136 
 137         bh.consume(r);
 138     }
 139 
 140     @Benchmark
 141     public void subMasked(Blackhole bh) {
 142         short[] a = fa.apply(SPECIES.length());
 143         short[] b = fb.apply(SPECIES.length());
 144         short[] r = fr.apply(SPECIES.length());
 145         boolean[] mask = fm.apply(SPECIES.length());
 146         Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask);
 147 
 148         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 149             for (int i = 0; i < a.length; i += SPECIES.length()) {
 150                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 151                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 152                 av.sub(bv, vmask).intoArray(r, i);
 153             }
 154         }
 155 
 156         bh.consume(r);
 157     }
 158 
 159 
 160 
 161     @Benchmark
 162     public void mul(Blackhole bh) {
 163         short[] a = fa.apply(SPECIES.length());
 164         short[] b = fb.apply(SPECIES.length());
 165         short[] r = fr.apply(SPECIES.length());
 166 
 167         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 168             for (int i = 0; i < a.length; i += SPECIES.length()) {
 169                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 170                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 171                 av.mul(bv).intoArray(r, i);
 172             }
 173         }
 174 
 175         bh.consume(r);
 176     }
 177 
 178     @Benchmark
 179     public void mulMasked(Blackhole bh) {
 180         short[] a = fa.apply(SPECIES.length());
 181         short[] b = fb.apply(SPECIES.length());
 182         short[] r = fr.apply(SPECIES.length());
 183         boolean[] mask = fm.apply(SPECIES.length());
 184         Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask);
 185 
 186         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 187             for (int i = 0; i < a.length; i += SPECIES.length()) {
 188                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 189                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 190                 av.mul(bv, vmask).intoArray(r, i);
 191             }
 192         }
 193 
 194         bh.consume(r);
 195     }
 196 
 197 
 198     @Benchmark
 199     public void and(Blackhole bh) {
 200         short[] a = fa.apply(SPECIES.length());
 201         short[] b = fb.apply(SPECIES.length());
 202         short[] r = fr.apply(SPECIES.length());
 203 
 204         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 205             for (int i = 0; i < a.length; i += SPECIES.length()) {
 206                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 207                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 208                 av.and(bv).intoArray(r, i);
 209             }
 210         }
 211 
 212         bh.consume(r);
 213     }
 214 
 215 
 216 
 217     @Benchmark
 218     public void andMasked(Blackhole bh) {
 219         short[] a = fa.apply(SPECIES.length());
 220         short[] b = fb.apply(SPECIES.length());
 221         short[] r = fr.apply(SPECIES.length());
 222         boolean[] mask = fm.apply(SPECIES.length());
 223         Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask);
 224 
 225         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 226             for (int i = 0; i < a.length; i += SPECIES.length()) {
 227                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 228                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 229                 av.and(bv, vmask).intoArray(r, i);
 230             }
 231         }
 232 
 233         bh.consume(r);
 234     }
 235 
 236 
 237 
 238     @Benchmark
 239     public void or(Blackhole bh) {
 240         short[] a = fa.apply(SPECIES.length());
 241         short[] b = fb.apply(SPECIES.length());
 242         short[] r = fr.apply(SPECIES.length());
 243 
 244         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 245             for (int i = 0; i < a.length; i += SPECIES.length()) {
 246                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 247                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 248                 av.or(bv).intoArray(r, i);
 249             }
 250         }
 251 
 252         bh.consume(r);
 253     }
 254 
 255 
 256 
 257     @Benchmark
 258     public void orMasked(Blackhole bh) {
 259         short[] a = fa.apply(SPECIES.length());
 260         short[] b = fb.apply(SPECIES.length());
 261         short[] r = fr.apply(SPECIES.length());
 262         boolean[] mask = fm.apply(SPECIES.length());
 263         Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask);
 264 
 265         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 266             for (int i = 0; i < a.length; i += SPECIES.length()) {
 267                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 268                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 269                 av.or(bv, vmask).intoArray(r, i);
 270             }
 271         }
 272 
 273         bh.consume(r);
 274     }
 275 
 276 
 277 
 278     @Benchmark
 279     public void xor(Blackhole bh) {
 280         short[] a = fa.apply(SPECIES.length());
 281         short[] b = fb.apply(SPECIES.length());
 282         short[] r = fr.apply(SPECIES.length());
 283 
 284         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 285             for (int i = 0; i < a.length; i += SPECIES.length()) {
 286                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 287                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 288                 av.xor(bv).intoArray(r, i);
 289             }
 290         }
 291 
 292         bh.consume(r);
 293     }
 294 
 295 
 296 
 297     @Benchmark
 298     public void xorMasked(Blackhole bh) {
 299         short[] a = fa.apply(SPECIES.length());
 300         short[] b = fb.apply(SPECIES.length());
 301         short[] r = fr.apply(SPECIES.length());
 302         boolean[] mask = fm.apply(SPECIES.length());
 303         Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask);
 304 
 305         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 306             for (int i = 0; i < a.length; i += SPECIES.length()) {
 307                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 308                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 309                 av.xor(bv, vmask).intoArray(r, i);
 310             }
 311         }
 312 
 313         bh.consume(r);
 314     }
 315 
 316 
 317 
 318 
 319 
 320 
 321 
 322 
 323 
 324 
 325 
 326 
 327 
 328 
 329 
 330 
 331 
 332 
 333 
 334 
 335 
 336     @Benchmark
 337     public void aShiftRShift(Blackhole bh) {
 338         short[] a = fa.apply(SPECIES.length());
 339         short[] b = fb.apply(SPECIES.length());
 340         short[] r = fr.apply(SPECIES.length());
 341 
 342         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 343             for (int i = 0; i < a.length; i += SPECIES.length()) {
 344                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 345                 av.aShiftR((int)b[i]).intoArray(r, i);
 346             }
 347         }
 348 
 349         bh.consume(r);
 350     }
 351 
 352 
 353 
 354     @Benchmark
 355     public void aShiftRMaskedShift(Blackhole bh) {
 356         short[] a = fa.apply(SPECIES.length());
 357         short[] b = fb.apply(SPECIES.length());
 358         short[] r = fr.apply(SPECIES.length());
 359         boolean[] mask = fm.apply(SPECIES.length());
 360         Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask);
 361 
 362         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 363             for (int i = 0; i < a.length; i += SPECIES.length()) {
 364                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 365                 av.aShiftR((int)b[i], vmask).intoArray(r, i);
 366             }
 367         }
 368 
 369         bh.consume(r);
 370     }
 371 
 372 
 373 
 374     @Benchmark
 375     public void shiftLShift(Blackhole bh) {
 376         short[] a = fa.apply(SPECIES.length());
 377         short[] b = fb.apply(SPECIES.length());
 378         short[] r = fr.apply(SPECIES.length());
 379 
 380         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 381             for (int i = 0; i < a.length; i += SPECIES.length()) {
 382                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 383                 av.shiftL((int)b[i]).intoArray(r, i);
 384             }
 385         }
 386 
 387         bh.consume(r);
 388     }
 389 
 390 
 391 
 392     @Benchmark
 393     public void shiftLMaskedShift(Blackhole bh) {
 394         short[] a = fa.apply(SPECIES.length());
 395         short[] b = fb.apply(SPECIES.length());
 396         short[] r = fr.apply(SPECIES.length());
 397         boolean[] mask = fm.apply(SPECIES.length());
 398         Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask);
 399 
 400         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 401             for (int i = 0; i < a.length; i += SPECIES.length()) {
 402                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 403                 av.shiftL((int)b[i], vmask).intoArray(r, i);
 404             }
 405         }
 406 
 407         bh.consume(r);
 408     }
 409 
 410 
 411 
 412     @Benchmark
 413     public void shiftRShift(Blackhole bh) {
 414         short[] a = fa.apply(SPECIES.length());
 415         short[] b = fb.apply(SPECIES.length());
 416         short[] r = fr.apply(SPECIES.length());
 417 
 418         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 419             for (int i = 0; i < a.length; i += SPECIES.length()) {
 420                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 421                 av.shiftR((int)b[i]).intoArray(r, i);
 422             }
 423         }
 424 
 425         bh.consume(r);
 426     }
 427 
 428 
 429 
 430     @Benchmark
 431     public void shiftRMaskedShift(Blackhole bh) {
 432         short[] a = fa.apply(SPECIES.length());
 433         short[] b = fb.apply(SPECIES.length());
 434         short[] r = fr.apply(SPECIES.length());
 435         boolean[] mask = fm.apply(SPECIES.length());
 436         Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask);
 437 
 438         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 439             for (int i = 0; i < a.length; i += SPECIES.length()) {
 440                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 441                 av.shiftR((int)b[i], vmask).intoArray(r, i);
 442             }
 443         }
 444 
 445         bh.consume(r);
 446     }
 447 
 448 
 449     @Benchmark
 450     public void max(Blackhole bh) {
 451         short[] a = fa.apply(SPECIES.length());
 452         short[] b = fb.apply(SPECIES.length());
 453         short[] r = fr.apply(SPECIES.length());
 454 
 455         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 456             for (int i = 0; i < a.length; i += SPECIES.length()) {
 457                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 458                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 459                 av.max(bv).intoArray(r, i);
 460             }
 461         }
 462 
 463         bh.consume(r);
 464     }
 465 
 466     @Benchmark
 467     public void min(Blackhole bh) {
 468         short[] a = fa.apply(SPECIES.length());
 469         short[] b = fb.apply(SPECIES.length());
 470         short[] r = fr.apply(SPECIES.length());
 471 
 472         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 473             for (int i = 0; i < a.length; i += SPECIES.length()) {
 474                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 475                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 476                 av.min(bv).intoArray(r, i);
 477             }
 478         }
 479 
 480         bh.consume(r);
 481     }
 482 
 483 
 484     @Benchmark
 485     public void andAll(Blackhole bh) {
 486         short[] a = fa.apply(SPECIES.length());
 487         short[] r = fr.apply(SPECIES.length());
 488         short ra = -1;
 489 
 490         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 491             for (int i = 0; i < a.length; i += SPECIES.length()) {
 492                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 493                 r[i] = av.andAll();
 494             }
 495         }
 496 
 497         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 498             ra = -1;
 499             for (int i = 0; i < a.length; i += SPECIES.length()) {
 500                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 501                 ra &= av.andAll();
 502             }
 503         }
 504 
 505         bh.consume(ra);
 506         bh.consume(r);
 507     }
 508 
 509 
 510 
 511     @Benchmark
 512     public void orAll(Blackhole bh) {
 513         short[] a = fa.apply(SPECIES.length());
 514         short[] r = fr.apply(SPECIES.length());
 515         short ra = 0;
 516 
 517         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 518             for (int i = 0; i < a.length; i += SPECIES.length()) {
 519                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 520                 r[i] = av.orAll();
 521             }
 522         }
 523 
 524         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 525             ra = 0;
 526             for (int i = 0; i < a.length; i += SPECIES.length()) {
 527                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 528                 ra |= av.orAll();
 529             }
 530         }
 531 
 532         bh.consume(ra);
 533         bh.consume(r);
 534     }
 535 
 536 
 537 
 538     @Benchmark
 539     public void xorAll(Blackhole bh) {
 540         short[] a = fa.apply(SPECIES.length());
 541         short[] r = fr.apply(SPECIES.length());
 542         short ra = 0;
 543 
 544         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 545             for (int i = 0; i < a.length; i += SPECIES.length()) {
 546                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 547                 r[i] = av.xorAll();
 548             }
 549         }
 550 
 551         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 552             ra = 0;
 553             for (int i = 0; i < a.length; i += SPECIES.length()) {
 554                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 555                 ra ^= av.xorAll();
 556             }
 557         }
 558 
 559         bh.consume(ra);
 560         bh.consume(r);
 561     }
 562 
 563 
 564     @Benchmark
 565     public void addAll(Blackhole bh) {
 566         short[] a = fa.apply(SPECIES.length());
 567         short[] r = fr.apply(SPECIES.length());
 568         short ra = 0;
 569 
 570         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 571             for (int i = 0; i < a.length; i += SPECIES.length()) {
 572                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 573                 r[i] = av.addAll();
 574             }
 575         }
 576 
 577         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 578             ra = 0;
 579             for (int i = 0; i < a.length; i += SPECIES.length()) {
 580                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 581                 ra += av.addAll();
 582             }
 583         }
 584 
 585         bh.consume(ra);
 586         bh.consume(r);
 587     }
 588 
 589     @Benchmark
 590     public void mulAll(Blackhole bh) {
 591         short[] a = fa.apply(SPECIES.length());
 592         short[] r = fr.apply(SPECIES.length());
 593         short ra = 1;
 594 
 595         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 596             for (int i = 0; i < a.length; i += SPECIES.length()) {
 597                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 598                 r[i] = av.mulAll();
 599             }
 600         }
 601 
 602         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 603             ra = 1;
 604             for (int i = 0; i < a.length; i += SPECIES.length()) {
 605                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 606                 ra *= av.mulAll();
 607             }
 608         }
 609 
 610         bh.consume(ra);
 611         bh.consume(r);
 612     }
 613 
 614     @Benchmark
 615     public void minAll(Blackhole bh) {
 616         short[] a = fa.apply(SPECIES.length());
 617         short[] r = fr.apply(SPECIES.length());
 618         short ra = Short.MAX_VALUE;
 619 
 620         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 621             for (int i = 0; i < a.length; i += SPECIES.length()) {
 622                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 623                 r[i] = av.minAll();
 624             }
 625         }
 626 
 627         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 628             ra = Short.MAX_VALUE;
 629             for (int i = 0; i < a.length; i += SPECIES.length()) {
 630                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 631                 ra = (short)Math.min(ra, av.minAll());
 632             }
 633         }
 634 
 635         bh.consume(ra);
 636         bh.consume(r);
 637     }
 638 
 639     @Benchmark
 640     public void maxAll(Blackhole bh) {
 641         short[] a = fa.apply(SPECIES.length());
 642         short[] r = fr.apply(SPECIES.length());
 643         short ra = Short.MIN_VALUE;
 644 
 645         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 646             for (int i = 0; i < a.length; i += SPECIES.length()) {
 647                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 648                 r[i] = av.maxAll();
 649             }
 650         }
 651 
 652         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 653             ra = Short.MIN_VALUE;
 654             for (int i = 0; i < a.length; i += SPECIES.length()) {
 655                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 656                 ra = (short)Math.max(ra, av.maxAll());
 657             }
 658         }
 659 
 660         bh.consume(ra);
 661         bh.consume(r);
 662     }
 663 
 664 
 665     @Benchmark
 666     public void anyTrue(Blackhole bh) {
 667         boolean[] mask = fm.apply(SPECIES.length());
 668         boolean[] r = fmr.apply(SPECIES.length());
 669 
 670         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 671             for (int i = 0; i < mask.length; i += SPECIES.length()) {
 672                 Vector.Mask<Short> vmask = ShortVector.maskFromArray(SPECIES, mask, i);
 673                 r[i] = vmask.anyTrue();
 674             }
 675         }
 676 
 677         bh.consume(r);
 678     }
 679 
 680 
 681 
 682     @Benchmark
 683     public void allTrue(Blackhole bh) {
 684         boolean[] mask = fm.apply(SPECIES.length());
 685         boolean[] r = fmr.apply(SPECIES.length());
 686 
 687         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 688             for (int i = 0; i < mask.length; i += SPECIES.length()) {
 689                 Vector.Mask<Short> vmask = ShortVector.maskFromArray(SPECIES, mask, i);
 690                 r[i] = vmask.allTrue();
 691             }
 692         }
 693 
 694         bh.consume(r);
 695     }
 696 
 697 
 698     @Benchmark
 699     public void with(Blackhole bh) {
 700         short[] a = fa.apply(SPECIES.length());
 701         short[] r = fr.apply(SPECIES.length());
 702 
 703         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 704             for (int i = 0; i < a.length; i += SPECIES.length()) {
 705                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 706                 av.with(0, (short)4).intoArray(r, i);
 707             }
 708         }
 709 
 710         bh.consume(r);
 711     }
 712 
 713     @Benchmark
 714     public Object lessThan() {
 715         short[] a = fa.apply(size);
 716         short[] b = fb.apply(size);
 717         boolean[] ms = fm.apply(size);
 718         Vector.Mask<Short> m = ShortVector.maskFromArray(SPECIES, ms, 0);
 719 
 720         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 721             for (int i = 0; i < a.length; i += SPECIES.length()) {
 722                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 723                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 724                 Vector.Mask<Short> mv = av.lessThan(bv);
 725 
 726                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 727             }
 728         }
 729         return m;
 730     }
 731 
 732 
 733     @Benchmark
 734     public Object greaterThan() {
 735         short[] a = fa.apply(size);
 736         short[] b = fb.apply(size);
 737         boolean[] ms = fm.apply(size);
 738         Vector.Mask<Short> m = ShortVector.maskFromArray(SPECIES, ms, 0);
 739 
 740         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 741             for (int i = 0; i < a.length; i += SPECIES.length()) {
 742                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 743                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 744                 Vector.Mask<Short> mv = av.greaterThan(bv);
 745 
 746                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 747             }
 748         }
 749         return m;
 750     }
 751 
 752 
 753     @Benchmark
 754     public Object equal() {
 755         short[] a = fa.apply(size);
 756         short[] b = fb.apply(size);
 757         boolean[] ms = fm.apply(size);
 758         Vector.Mask<Short> m = ShortVector.maskFromArray(SPECIES, ms, 0);
 759 
 760         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 761             for (int i = 0; i < a.length; i += SPECIES.length()) {
 762                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 763                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 764                 Vector.Mask<Short> mv = av.equal(bv);
 765 
 766                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 767             }
 768         }
 769         return m;
 770     }
 771 
 772 
 773     @Benchmark
 774     public Object notEqual() {
 775         short[] a = fa.apply(size);
 776         short[] b = fb.apply(size);
 777         boolean[] ms = fm.apply(size);
 778         Vector.Mask<Short> m = ShortVector.maskFromArray(SPECIES, ms, 0);
 779 
 780         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 781             for (int i = 0; i < a.length; i += SPECIES.length()) {
 782                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 783                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 784                 Vector.Mask<Short> mv = av.notEqual(bv);
 785 
 786                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 787             }
 788         }
 789         return m;
 790     }
 791 
 792 
 793     @Benchmark
 794     public Object lessThanEq() {
 795         short[] a = fa.apply(size);
 796         short[] b = fb.apply(size);
 797         boolean[] ms = fm.apply(size);
 798         Vector.Mask<Short> m = ShortVector.maskFromArray(SPECIES, ms, 0);
 799 
 800         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 801             for (int i = 0; i < a.length; i += SPECIES.length()) {
 802                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 803                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 804                 Vector.Mask<Short> mv = av.lessThanEq(bv);
 805 
 806                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 807             }
 808         }
 809         return m;
 810     }
 811 
 812 
 813     @Benchmark
 814     public Object greaterThanEq() {
 815         short[] a = fa.apply(size);
 816         short[] b = fb.apply(size);
 817         boolean[] ms = fm.apply(size);
 818         Vector.Mask<Short> m = ShortVector.maskFromArray(SPECIES, ms, 0);
 819 
 820         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 821             for (int i = 0; i < a.length; i += SPECIES.length()) {
 822                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 823                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 824                 Vector.Mask<Short> mv = av.greaterThanEq(bv);
 825 
 826                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 827             }
 828         }
 829         return m;
 830     }
 831 
 832 
 833     @Benchmark
 834     public void blend(Blackhole bh) {
 835         short[] a = fa.apply(SPECIES.length());
 836         short[] b = fb.apply(SPECIES.length());
 837         short[] r = fr.apply(SPECIES.length());
 838         boolean[] mask = fm.apply(SPECIES.length());
 839         Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask);
 840 
 841         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 842             for (int i = 0; i < a.length; i += SPECIES.length()) {
 843                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 844                 ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
 845                 av.blend(bv, vmask).intoArray(r, i);
 846             }
 847         }
 848 
 849         bh.consume(r);
 850     }
 851 
 852     @Benchmark
 853     public void rearrange(Blackhole bh) {
 854         short[] a = fa.apply(SPECIES.length());
 855         int[] order = fs.apply(a.length, SPECIES.length());
 856         short[] r = fr.apply(SPECIES.length());
 857 
 858         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 859             for (int i = 0; i < a.length; i += SPECIES.length()) {
 860                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 861                 av.rearrange(ShortVector.shuffleFromArray(SPECIES, order, i)).intoArray(r, i);
 862             }
 863         }
 864 
 865         bh.consume(r);
 866     }
 867 
 868     @Benchmark
 869     public void extract(Blackhole bh) {
 870         short[] a = fa.apply(SPECIES.length());
 871         short[] r = fr.apply(SPECIES.length());
 872 
 873         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 874             for (int i = 0; i < a.length; i += SPECIES.length()) {
 875                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
 876                 int num_lanes = SPECIES.length();
 877                 // Manually unroll because full unroll happens after intrinsification.
 878                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 879                 if (num_lanes == 1) {
 880                     r[i]=av.get(0);
 881                 } else if (num_lanes == 2) {
 882                     r[i]=av.get(0);
 883                     r[i+1]=av.get(1);
 884                 } else if (num_lanes == 4) {
 885                     r[i]=av.get(0);
 886                     r[i+1]=av.get(1);
 887                     r[i+2]=av.get(2);
 888                     r[i+3]=av.get(3);
 889                 } else if (num_lanes == 8) {
 890                     r[i]=av.get(0);
 891                     r[i+1]=av.get(1);
 892                     r[i+2]=av.get(2);
 893                     r[i+3]=av.get(3);
 894                     r[i+4]=av.get(4);
 895                     r[i+5]=av.get(5);
 896                     r[i+6]=av.get(6);
 897                     r[i+7]=av.get(7);
 898                 } else if (num_lanes == 16) {
 899                     r[i]=av.get(0);
 900                     r[i+1]=av.get(1);
 901                     r[i+2]=av.get(2);
 902                     r[i+3]=av.get(3);
 903                     r[i+4]=av.get(4);
 904                     r[i+5]=av.get(5);
 905                     r[i+6]=av.get(6);
 906                     r[i+7]=av.get(7);
 907                     r[i+8]=av.get(8);
 908                     r[i+9]=av.get(9);
 909                     r[i+10]=av.get(10);
 910                     r[i+11]=av.get(11);
 911                     r[i+12]=av.get(12);
 912                     r[i+13]=av.get(13);
 913                     r[i+14]=av.get(14);
 914                     r[i+15]=av.get(15);
 915                 } else if (num_lanes == 32) {
 916                     r[i]=av.get(0);
 917                     r[i+1]=av.get(1);
 918                     r[i+2]=av.get(2);
 919                     r[i+3]=av.get(3);
 920                     r[i+4]=av.get(4);
 921                     r[i+5]=av.get(5);
 922                     r[i+6]=av.get(6);
 923                     r[i+7]=av.get(7);
 924                     r[i+8]=av.get(8);
 925                     r[i+9]=av.get(9);
 926                     r[i+10]=av.get(10);
 927                     r[i+11]=av.get(11);
 928                     r[i+12]=av.get(12);
 929                     r[i+13]=av.get(13);
 930                     r[i+14]=av.get(14);
 931                     r[i+15]=av.get(15);
 932                     r[i+16]=av.get(16);
 933                     r[i+17]=av.get(17);
 934                     r[i+18]=av.get(18);
 935                     r[i+19]=av.get(19);
 936                     r[i+20]=av.get(20);
 937                     r[i+21]=av.get(21);
 938                     r[i+22]=av.get(22);
 939                     r[i+23]=av.get(23);
 940                     r[i+24]=av.get(24);
 941                     r[i+25]=av.get(25);
 942                     r[i+26]=av.get(26);
 943                     r[i+27]=av.get(27);
 944                     r[i+28]=av.get(28);
 945                     r[i+29]=av.get(29);
 946                     r[i+30]=av.get(30);
 947                     r[i+31]=av.get(31);
 948                 } else if (num_lanes == 64) {
 949                     r[i]=av.get(0);
 950                     r[i+1]=av.get(1);
 951                     r[i+2]=av.get(2);
 952                     r[i+3]=av.get(3);
 953                     r[i+4]=av.get(4);
 954                     r[i+5]=av.get(5);
 955                     r[i+6]=av.get(6);
 956                     r[i+7]=av.get(7);
 957                     r[i+8]=av.get(8);
 958                     r[i+9]=av.get(9);
 959                     r[i+10]=av.get(10);
 960                     r[i+11]=av.get(11);
 961                     r[i+12]=av.get(12);
 962                     r[i+13]=av.get(13);
 963                     r[i+14]=av.get(14);
 964                     r[i+15]=av.get(15);
 965                     r[i+16]=av.get(16);
 966                     r[i+17]=av.get(17);
 967                     r[i+18]=av.get(18);
 968                     r[i+19]=av.get(19);
 969                     r[i+20]=av.get(20);
 970                     r[i+21]=av.get(21);
 971                     r[i+22]=av.get(22);
 972                     r[i+23]=av.get(23);
 973                     r[i+24]=av.get(24);
 974                     r[i+25]=av.get(25);
 975                     r[i+26]=av.get(26);
 976                     r[i+27]=av.get(27);
 977                     r[i+28]=av.get(28);
 978                     r[i+29]=av.get(29);
 979                     r[i+30]=av.get(30);
 980                     r[i+31]=av.get(31);
 981                     r[i+32]=av.get(32);
 982                     r[i+33]=av.get(33);
 983                     r[i+34]=av.get(34);
 984                     r[i+35]=av.get(35);
 985                     r[i+36]=av.get(36);
 986                     r[i+37]=av.get(37);
 987                     r[i+38]=av.get(38);
 988                     r[i+39]=av.get(39);
 989                     r[i+40]=av.get(40);
 990                     r[i+41]=av.get(41);
 991                     r[i+42]=av.get(42);
 992                     r[i+43]=av.get(43);
 993                     r[i+44]=av.get(44);
 994                     r[i+45]=av.get(45);
 995                     r[i+46]=av.get(46);
 996                     r[i+47]=av.get(47);
 997                     r[i+48]=av.get(48);
 998                     r[i+49]=av.get(49);
 999                     r[i+50]=av.get(50);
1000                     r[i+51]=av.get(51);
1001                     r[i+52]=av.get(52);
1002                     r[i+53]=av.get(53);
1003                     r[i+54]=av.get(54);
1004                     r[i+55]=av.get(55);
1005                     r[i+56]=av.get(56);
1006                     r[i+57]=av.get(57);
1007                     r[i+58]=av.get(58);
1008                     r[i+59]=av.get(59);
1009                     r[i+60]=av.get(60);
1010                     r[i+61]=av.get(61);
1011                     r[i+62]=av.get(62);
1012                     r[i+63]=av.get(63);
1013                 } else {
1014                     for (int j = 0; j < SPECIES.length(); j++) {
1015                         r[i+j]=av.get(j);
1016                     }
1017                 }
1018             }
1019         }
1020 
1021         bh.consume(r);
1022     }
1023 
1024 
1025 
1026 
1027 
1028 
1029 
1030 
1031 
1032 
1033 
1034 
1035 
1036 
1037 
1038 
1039 
1040 
1041 
1042 
1043 
1044     @Benchmark
1045     public void neg(Blackhole bh) {
1046         short[] a = fa.apply(SPECIES.length());
1047         short[] r = fr.apply(SPECIES.length());
1048 
1049         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1050             for (int i = 0; i < a.length; i += SPECIES.length()) {
1051                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1052                 av.neg().intoArray(r, i);
1053             }
1054         }
1055 
1056         bh.consume(r);
1057     }
1058 
1059     @Benchmark
1060     public void negMasked(Blackhole bh) {
1061         short[] a = fa.apply(SPECIES.length());
1062         short[] r = fr.apply(SPECIES.length());
1063         boolean[] mask = fm.apply(SPECIES.length());
1064         Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask);
1065 
1066         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1067             for (int i = 0; i < a.length; i += SPECIES.length()) {
1068                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1069                 av.neg(vmask).intoArray(r, i);
1070             }
1071         }
1072 
1073         bh.consume(r);
1074     }
1075 
1076     @Benchmark
1077     public void abs(Blackhole bh) {
1078         short[] a = fa.apply(SPECIES.length());
1079         short[] r = fr.apply(SPECIES.length());
1080 
1081         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1082             for (int i = 0; i < a.length; i += SPECIES.length()) {
1083                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1084                 av.abs().intoArray(r, i);
1085             }
1086         }
1087 
1088         bh.consume(r);
1089     }
1090 
1091     @Benchmark
1092     public void absMasked(Blackhole bh) {
1093         short[] a = fa.apply(SPECIES.length());
1094         short[] r = fr.apply(SPECIES.length());
1095         boolean[] mask = fm.apply(SPECIES.length());
1096         Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask);
1097 
1098         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1099             for (int i = 0; i < a.length; i += SPECIES.length()) {
1100                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1101                 av.abs(vmask).intoArray(r, i);
1102             }
1103         }
1104 
1105         bh.consume(r);
1106     }
1107 
1108 
1109     @Benchmark
1110     public void not(Blackhole bh) {
1111         short[] a = fa.apply(SPECIES.length());
1112         short[] r = fr.apply(SPECIES.length());
1113 
1114         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1115             for (int i = 0; i < a.length; i += SPECIES.length()) {
1116                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1117                 av.not().intoArray(r, i);
1118             }
1119         }
1120 
1121         bh.consume(r);
1122     }
1123 
1124 
1125 
1126     @Benchmark
1127     public void notMasked(Blackhole bh) {
1128         short[] a = fa.apply(SPECIES.length());
1129         short[] r = fr.apply(SPECIES.length());
1130         boolean[] mask = fm.apply(SPECIES.length());
1131         Vector.Mask<Short> vmask = ShortVector.maskFromValues(SPECIES, mask);
1132 
1133         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1134             for (int i = 0; i < a.length; i += SPECIES.length()) {
1135                 ShortVector av = ShortVector.fromArray(SPECIES, a, i);
1136                 av.not(vmask).intoArray(r, i);
1137             }
1138         }
1139 
1140         bh.consume(r);
1141     }
1142 
1143 
1144 
1145 
1146 
1147 }
1148