1 /*
   2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.VectorMask;
  28 import jdk.incubator.vector.VectorShape;
  29 import jdk.incubator.vector.VectorSpecies;
  30 import jdk.incubator.vector.VectorShuffle;
  31 import jdk.incubator.vector.LongVector;
  32 
  33 import java.util.concurrent.TimeUnit;
  34 import java.util.function.BiFunction;
  35 import java.util.function.IntFunction;
  36 
  37 import org.openjdk.jmh.annotations.*;
  38 import org.openjdk.jmh.infra.Blackhole;
  39 
  40 @BenchmarkMode(Mode.Throughput)
  41 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  42 @State(Scope.Benchmark)
  43 @Warmup(iterations = 3, time = 1)
  44 @Measurement(iterations = 5, time = 1)
  45 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  46 public class Long256Vector extends AbstractVectorBenchmark {
  47     static final VectorSpecies<Long> SPECIES = LongVector.SPECIES_256;
  48 
  49     static final int INVOC_COUNT = 1; // get rid of outer loop
  50 
  51     @Param("1024")
  52     int size;
  53 
  54     long[] fill(IntFunction<Long> f) {
  55         long[] array = new long[size];
  56         for (int i = 0; i < array.length; i++) {
  57             array[i] = f.apply(i);
  58         }
  59         return array;
  60     }
  61 
  62     long[] a, b, c, r;
  63     boolean[] m, rm;
  64     int[] s;
  65 
  66     @Setup
  67     public void init() {
  68         size += size % SPECIES.length(); // FIXME: add post-loops
  69 
  70         a = fill(i -> (long)(2*i));
  71         b = fill(i -> (long)(i+1));
  72         c = fill(i -> (long)(i+5));
  73         r = fill(i -> (long)0);
  74 
  75         m = fillMask(size, i -> (i % 2) == 0);
  76         rm = fillMask(size, i -> false);
  77 
  78         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  79     }
  80 
  81     final IntFunction<long[]> fa = vl -> a;
  82     final IntFunction<long[]> fb = vl -> b;
  83     final IntFunction<long[]> fc = vl -> c;
  84     final IntFunction<long[]> fr = vl -> r;
  85     final IntFunction<boolean[]> fm = vl -> m;
  86     final IntFunction<boolean[]> fmr = vl -> rm;
  87     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  88 
  89 
  90     @Benchmark
  91     public void add(Blackhole bh) {
  92         long[] a = fa.apply(SPECIES.length());
  93         long[] b = fb.apply(SPECIES.length());
  94         long[] r = fr.apply(SPECIES.length());
  95 
  96         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  97             for (int i = 0; i < a.length; i += SPECIES.length()) {
  98                 LongVector av = LongVector.fromArray(SPECIES, a, i);
  99                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 100                 av.add(bv).intoArray(r, i);
 101             }
 102         }
 103 
 104         bh.consume(r);
 105     }
 106 
 107     @Benchmark
 108     public void addMasked(Blackhole bh) {
 109         long[] a = fa.apply(SPECIES.length());
 110         long[] b = fb.apply(SPECIES.length());
 111         long[] r = fr.apply(SPECIES.length());
 112         boolean[] mask = fm.apply(SPECIES.length());
 113         VectorMask<Long> vmask = VectorMask.fromValues(SPECIES, mask);
 114 
 115         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 116             for (int i = 0; i < a.length; i += SPECIES.length()) {
 117                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 118                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 119                 av.add(bv, vmask).intoArray(r, i);
 120             }
 121         }
 122 
 123         bh.consume(r);
 124     }
 125 
 126     @Benchmark
 127     public void sub(Blackhole bh) {
 128         long[] a = fa.apply(SPECIES.length());
 129         long[] b = fb.apply(SPECIES.length());
 130         long[] r = fr.apply(SPECIES.length());
 131 
 132         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 133             for (int i = 0; i < a.length; i += SPECIES.length()) {
 134                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 135                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 136                 av.sub(bv).intoArray(r, i);
 137             }
 138         }
 139 
 140         bh.consume(r);
 141     }
 142 
 143     @Benchmark
 144     public void subMasked(Blackhole bh) {
 145         long[] a = fa.apply(SPECIES.length());
 146         long[] b = fb.apply(SPECIES.length());
 147         long[] r = fr.apply(SPECIES.length());
 148         boolean[] mask = fm.apply(SPECIES.length());
 149         VectorMask<Long> vmask = VectorMask.fromValues(SPECIES, mask);
 150 
 151         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 152             for (int i = 0; i < a.length; i += SPECIES.length()) {
 153                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 154                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 155                 av.sub(bv, vmask).intoArray(r, i);
 156             }
 157         }
 158 
 159         bh.consume(r);
 160     }
 161 
 162 
 163 
 164     @Benchmark
 165     public void mul(Blackhole bh) {
 166         long[] a = fa.apply(SPECIES.length());
 167         long[] b = fb.apply(SPECIES.length());
 168         long[] r = fr.apply(SPECIES.length());
 169 
 170         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 171             for (int i = 0; i < a.length; i += SPECIES.length()) {
 172                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 173                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 174                 av.mul(bv).intoArray(r, i);
 175             }
 176         }
 177 
 178         bh.consume(r);
 179     }
 180 
 181     @Benchmark
 182     public void mulMasked(Blackhole bh) {
 183         long[] a = fa.apply(SPECIES.length());
 184         long[] b = fb.apply(SPECIES.length());
 185         long[] r = fr.apply(SPECIES.length());
 186         boolean[] mask = fm.apply(SPECIES.length());
 187         VectorMask<Long> vmask = VectorMask.fromValues(SPECIES, mask);
 188 
 189         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 190             for (int i = 0; i < a.length; i += SPECIES.length()) {
 191                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 192                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 193                 av.mul(bv, vmask).intoArray(r, i);
 194             }
 195         }
 196 
 197         bh.consume(r);
 198     }
 199 
 200 
 201     @Benchmark
 202     public void and(Blackhole bh) {
 203         long[] a = fa.apply(SPECIES.length());
 204         long[] b = fb.apply(SPECIES.length());
 205         long[] r = fr.apply(SPECIES.length());
 206 
 207         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 208             for (int i = 0; i < a.length; i += SPECIES.length()) {
 209                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 210                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 211                 av.and(bv).intoArray(r, i);
 212             }
 213         }
 214 
 215         bh.consume(r);
 216     }
 217 
 218 
 219 
 220     @Benchmark
 221     public void andMasked(Blackhole bh) {
 222         long[] a = fa.apply(SPECIES.length());
 223         long[] b = fb.apply(SPECIES.length());
 224         long[] r = fr.apply(SPECIES.length());
 225         boolean[] mask = fm.apply(SPECIES.length());
 226         VectorMask<Long> vmask = VectorMask.fromValues(SPECIES, mask);
 227 
 228         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 229             for (int i = 0; i < a.length; i += SPECIES.length()) {
 230                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 231                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 232                 av.and(bv, vmask).intoArray(r, i);
 233             }
 234         }
 235 
 236         bh.consume(r);
 237     }
 238 
 239 
 240 
 241     @Benchmark
 242     public void or(Blackhole bh) {
 243         long[] a = fa.apply(SPECIES.length());
 244         long[] b = fb.apply(SPECIES.length());
 245         long[] r = fr.apply(SPECIES.length());
 246 
 247         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 248             for (int i = 0; i < a.length; i += SPECIES.length()) {
 249                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 250                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 251                 av.or(bv).intoArray(r, i);
 252             }
 253         }
 254 
 255         bh.consume(r);
 256     }
 257 
 258 
 259 
 260     @Benchmark
 261     public void orMasked(Blackhole bh) {
 262         long[] a = fa.apply(SPECIES.length());
 263         long[] b = fb.apply(SPECIES.length());
 264         long[] r = fr.apply(SPECIES.length());
 265         boolean[] mask = fm.apply(SPECIES.length());
 266         VectorMask<Long> vmask = VectorMask.fromValues(SPECIES, mask);
 267 
 268         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 269             for (int i = 0; i < a.length; i += SPECIES.length()) {
 270                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 271                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 272                 av.or(bv, vmask).intoArray(r, i);
 273             }
 274         }
 275 
 276         bh.consume(r);
 277     }
 278 
 279 
 280 
 281     @Benchmark
 282     public void xor(Blackhole bh) {
 283         long[] a = fa.apply(SPECIES.length());
 284         long[] b = fb.apply(SPECIES.length());
 285         long[] r = fr.apply(SPECIES.length());
 286 
 287         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 288             for (int i = 0; i < a.length; i += SPECIES.length()) {
 289                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 290                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 291                 av.xor(bv).intoArray(r, i);
 292             }
 293         }
 294 
 295         bh.consume(r);
 296     }
 297 
 298 
 299 
 300     @Benchmark
 301     public void xorMasked(Blackhole bh) {
 302         long[] a = fa.apply(SPECIES.length());
 303         long[] b = fb.apply(SPECIES.length());
 304         long[] r = fr.apply(SPECIES.length());
 305         boolean[] mask = fm.apply(SPECIES.length());
 306         VectorMask<Long> vmask = VectorMask.fromValues(SPECIES, mask);
 307 
 308         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 309             for (int i = 0; i < a.length; i += SPECIES.length()) {
 310                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 311                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 312                 av.xor(bv, vmask).intoArray(r, i);
 313             }
 314         }
 315 
 316         bh.consume(r);
 317     }
 318 
 319 
 320 
 321     @Benchmark
 322     public void shiftLeft(Blackhole bh) {
 323         long[] a = fa.apply(SPECIES.length());
 324         long[] b = fb.apply(SPECIES.length());
 325         long[] r = fr.apply(SPECIES.length());
 326 
 327         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 328             for (int i = 0; i < a.length; i += SPECIES.length()) {
 329                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 330                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 331                 av.shiftLeft(bv).intoArray(r, i);
 332             }
 333         }
 334 
 335         bh.consume(r);
 336     }
 337 
 338 
 339 
 340     @Benchmark
 341     public void shiftLeftMasked(Blackhole bh) {
 342         long[] a = fa.apply(SPECIES.length());
 343         long[] b = fb.apply(SPECIES.length());
 344         long[] r = fr.apply(SPECIES.length());
 345         boolean[] mask = fm.apply(SPECIES.length());
 346         VectorMask<Long> vmask = VectorMask.fromValues(SPECIES, mask);
 347 
 348         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 349             for (int i = 0; i < a.length; i += SPECIES.length()) {
 350                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 351                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 352                 av.shiftLeft(bv, vmask).intoArray(r, i);
 353             }
 354         }
 355 
 356         bh.consume(r);
 357     }
 358 
 359 
 360 
 361 
 362 
 363 
 364 
 365     @Benchmark
 366     public void shiftRight(Blackhole bh) {
 367         long[] a = fa.apply(SPECIES.length());
 368         long[] b = fb.apply(SPECIES.length());
 369         long[] r = fr.apply(SPECIES.length());
 370 
 371         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 372             for (int i = 0; i < a.length; i += SPECIES.length()) {
 373                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 374                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 375                 av.shiftRight(bv).intoArray(r, i);
 376             }
 377         }
 378 
 379         bh.consume(r);
 380     }
 381 
 382 
 383 
 384     @Benchmark
 385     public void shiftRightMasked(Blackhole bh) {
 386         long[] a = fa.apply(SPECIES.length());
 387         long[] b = fb.apply(SPECIES.length());
 388         long[] r = fr.apply(SPECIES.length());
 389         boolean[] mask = fm.apply(SPECIES.length());
 390         VectorMask<Long> vmask = VectorMask.fromValues(SPECIES, mask);
 391 
 392         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 393             for (int i = 0; i < a.length; i += SPECIES.length()) {
 394                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 395                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 396                 av.shiftRight(bv, vmask).intoArray(r, i);
 397             }
 398         }
 399 
 400         bh.consume(r);
 401     }
 402 
 403 
 404 
 405 
 406 
 407 
 408 
 409     @Benchmark
 410     public void shiftArithmeticRight(Blackhole bh) {
 411         long[] a = fa.apply(SPECIES.length());
 412         long[] b = fb.apply(SPECIES.length());
 413         long[] r = fr.apply(SPECIES.length());
 414 
 415         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 416             for (int i = 0; i < a.length; i += SPECIES.length()) {
 417                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 418                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 419                 av.shiftArithmeticRight(bv).intoArray(r, i);
 420             }
 421         }
 422 
 423         bh.consume(r);
 424     }
 425 
 426 
 427 
 428     @Benchmark
 429     public void shiftArithmeticRightMasked(Blackhole bh) {
 430         long[] a = fa.apply(SPECIES.length());
 431         long[] b = fb.apply(SPECIES.length());
 432         long[] r = fr.apply(SPECIES.length());
 433         boolean[] mask = fm.apply(SPECIES.length());
 434         VectorMask<Long> vmask = VectorMask.fromValues(SPECIES, mask);
 435 
 436         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 437             for (int i = 0; i < a.length; i += SPECIES.length()) {
 438                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 439                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 440                 av.shiftArithmeticRight(bv, vmask).intoArray(r, i);
 441             }
 442         }
 443 
 444         bh.consume(r);
 445     }
 446 
 447 
 448 
 449 
 450 
 451 
 452 
 453     @Benchmark
 454     public void shiftLeftShift(Blackhole bh) {
 455         long[] a = fa.apply(SPECIES.length());
 456         long[] b = fb.apply(SPECIES.length());
 457         long[] r = fr.apply(SPECIES.length());
 458 
 459         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 460             for (int i = 0; i < a.length; i += SPECIES.length()) {
 461                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 462                 av.shiftLeft((int)b[i]).intoArray(r, i);
 463             }
 464         }
 465 
 466         bh.consume(r);
 467     }
 468 
 469 
 470 
 471     @Benchmark
 472     public void shiftLeftMaskedShift(Blackhole bh) {
 473         long[] a = fa.apply(SPECIES.length());
 474         long[] b = fb.apply(SPECIES.length());
 475         long[] r = fr.apply(SPECIES.length());
 476         boolean[] mask = fm.apply(SPECIES.length());
 477         VectorMask<Long> vmask = VectorMask.fromValues(SPECIES, mask);
 478 
 479         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 480             for (int i = 0; i < a.length; i += SPECIES.length()) {
 481                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 482                 av.shiftLeft((int)b[i], vmask).intoArray(r, i);
 483             }
 484         }
 485 
 486         bh.consume(r);
 487     }
 488 
 489 
 490 
 491 
 492 
 493 
 494 
 495     @Benchmark
 496     public void shiftRightShift(Blackhole bh) {
 497         long[] a = fa.apply(SPECIES.length());
 498         long[] b = fb.apply(SPECIES.length());
 499         long[] r = fr.apply(SPECIES.length());
 500 
 501         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 502             for (int i = 0; i < a.length; i += SPECIES.length()) {
 503                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 504                 av.shiftRight((int)b[i]).intoArray(r, i);
 505             }
 506         }
 507 
 508         bh.consume(r);
 509     }
 510 
 511 
 512 
 513     @Benchmark
 514     public void shiftRightMaskedShift(Blackhole bh) {
 515         long[] a = fa.apply(SPECIES.length());
 516         long[] b = fb.apply(SPECIES.length());
 517         long[] r = fr.apply(SPECIES.length());
 518         boolean[] mask = fm.apply(SPECIES.length());
 519         VectorMask<Long> vmask = VectorMask.fromValues(SPECIES, mask);
 520 
 521         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 522             for (int i = 0; i < a.length; i += SPECIES.length()) {
 523                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 524                 av.shiftRight((int)b[i], vmask).intoArray(r, i);
 525             }
 526         }
 527 
 528         bh.consume(r);
 529     }
 530 
 531 
 532 
 533 
 534 
 535 
 536 
 537     @Benchmark
 538     public void shiftArithmeticRightShift(Blackhole bh) {
 539         long[] a = fa.apply(SPECIES.length());
 540         long[] b = fb.apply(SPECIES.length());
 541         long[] r = fr.apply(SPECIES.length());
 542 
 543         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 544             for (int i = 0; i < a.length; i += SPECIES.length()) {
 545                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 546                 av.shiftArithmeticRight((int)b[i]).intoArray(r, i);
 547             }
 548         }
 549 
 550         bh.consume(r);
 551     }
 552 
 553 
 554 
 555     @Benchmark
 556     public void shiftArithmeticRightMaskedShift(Blackhole bh) {
 557         long[] a = fa.apply(SPECIES.length());
 558         long[] b = fb.apply(SPECIES.length());
 559         long[] r = fr.apply(SPECIES.length());
 560         boolean[] mask = fm.apply(SPECIES.length());
 561         VectorMask<Long> vmask = VectorMask.fromValues(SPECIES, mask);
 562 
 563         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 564             for (int i = 0; i < a.length; i += SPECIES.length()) {
 565                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 566                 av.shiftArithmeticRight((int)b[i], vmask).intoArray(r, i);
 567             }
 568         }
 569 
 570         bh.consume(r);
 571     }
 572 
 573 
 574 
 575 
 576 
 577 
 578     @Benchmark
 579     public void max(Blackhole bh) {
 580         long[] a = fa.apply(SPECIES.length());
 581         long[] b = fb.apply(SPECIES.length());
 582         long[] r = fr.apply(SPECIES.length());
 583 
 584         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 585             for (int i = 0; i < a.length; i += SPECIES.length()) {
 586                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 587                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 588                 av.max(bv).intoArray(r, i);
 589             }
 590         }
 591 
 592         bh.consume(r);
 593     }
 594 
 595     @Benchmark
 596     public void min(Blackhole bh) {
 597         long[] a = fa.apply(SPECIES.length());
 598         long[] b = fb.apply(SPECIES.length());
 599         long[] r = fr.apply(SPECIES.length());
 600 
 601         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 602             for (int i = 0; i < a.length; i += SPECIES.length()) {
 603                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 604                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 605                 av.min(bv).intoArray(r, i);
 606             }
 607         }
 608 
 609         bh.consume(r);
 610     }
 611 
 612 
 613     @Benchmark
 614     public void andLanes(Blackhole bh) {
 615         long[] a = fa.apply(SPECIES.length());
 616         long ra = -1;
 617 
 618         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 619             ra = -1;
 620             for (int i = 0; i < a.length; i += SPECIES.length()) {
 621                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 622                 ra &= av.andLanes();
 623             }
 624         }
 625         bh.consume(ra);
 626     }
 627 
 628 
 629 
 630     @Benchmark
 631     public void orLanes(Blackhole bh) {
 632         long[] a = fa.apply(SPECIES.length());
 633         long ra = 0;
 634 
 635         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 636             ra = 0;
 637             for (int i = 0; i < a.length; i += SPECIES.length()) {
 638                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 639                 ra |= av.orLanes();
 640             }
 641         }
 642         bh.consume(ra);
 643     }
 644 
 645 
 646 
 647     @Benchmark
 648     public void xorLanes(Blackhole bh) {
 649         long[] a = fa.apply(SPECIES.length());
 650         long ra = 0;
 651 
 652         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 653             ra = 0;
 654             for (int i = 0; i < a.length; i += SPECIES.length()) {
 655                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 656                 ra ^= av.xorLanes();
 657             }
 658         }
 659         bh.consume(ra);
 660     }
 661 
 662 
 663     @Benchmark
 664     public void addLanes(Blackhole bh) {
 665         long[] a = fa.apply(SPECIES.length());
 666         long ra = 0;
 667 
 668         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 669             ra = 0;
 670             for (int i = 0; i < a.length; i += SPECIES.length()) {
 671                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 672                 ra += av.addLanes();
 673             }
 674         }
 675         bh.consume(ra);
 676     }
 677 
 678     @Benchmark
 679     public void mulLanes(Blackhole bh) {
 680         long[] a = fa.apply(SPECIES.length());
 681         long ra = 1;
 682 
 683         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 684             ra = 1;
 685             for (int i = 0; i < a.length; i += SPECIES.length()) {
 686                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 687                 ra *= av.mulLanes();
 688             }
 689         }
 690         bh.consume(ra);
 691     }
 692 
 693     @Benchmark
 694     public void minLanes(Blackhole bh) {
 695         long[] a = fa.apply(SPECIES.length());
 696         long ra = Long.MAX_VALUE;
 697 
 698         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 699             ra = Long.MAX_VALUE;
 700             for (int i = 0; i < a.length; i += SPECIES.length()) {
 701                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 702                 ra = (long)Math.min(ra, av.minLanes());
 703             }
 704         }
 705         bh.consume(ra);
 706     }
 707 
 708     @Benchmark
 709     public void maxLanes(Blackhole bh) {
 710         long[] a = fa.apply(SPECIES.length());
 711         long ra = Long.MIN_VALUE;
 712 
 713         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 714             ra = Long.MIN_VALUE;
 715             for (int i = 0; i < a.length; i += SPECIES.length()) {
 716                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 717                 ra = (long)Math.max(ra, av.maxLanes());
 718             }
 719         }
 720         bh.consume(ra);
 721     }
 722 
 723 
 724     @Benchmark
 725     public void anyTrue(Blackhole bh) {
 726         boolean[] mask = fm.apply(SPECIES.length());
 727         boolean[] r = fmr.apply(SPECIES.length());
 728 
 729         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 730             for (int i = 0; i < mask.length; i += SPECIES.length()) {
 731                 VectorMask<Long> vmask = VectorMask.fromArray(SPECIES, mask, i);
 732                 r[i] = vmask.anyTrue();
 733             }
 734         }
 735 
 736         bh.consume(r);
 737     }
 738 
 739 
 740 
 741     @Benchmark
 742     public void allTrue(Blackhole bh) {
 743         boolean[] mask = fm.apply(SPECIES.length());
 744         boolean[] r = fmr.apply(SPECIES.length());
 745 
 746         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 747             for (int i = 0; i < mask.length; i += SPECIES.length()) {
 748                 VectorMask<Long> vmask = VectorMask.fromArray(SPECIES, mask, i);
 749                 r[i] = vmask.allTrue();
 750             }
 751         }
 752 
 753         bh.consume(r);
 754     }
 755 
 756 
 757     @Benchmark
 758     public void with(Blackhole bh) {
 759         long[] a = fa.apply(SPECIES.length());
 760         long[] r = fr.apply(SPECIES.length());
 761 
 762         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 763             for (int i = 0; i < a.length; i += SPECIES.length()) {
 764                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 765                 av.with(0, (long)4).intoArray(r, i);
 766             }
 767         }
 768 
 769         bh.consume(r);
 770     }
 771 
 772     @Benchmark
 773     public Object lessThan() {
 774         long[] a = fa.apply(size);
 775         long[] b = fb.apply(size);
 776         boolean[] ms = fm.apply(size);
 777         VectorMask<Long> m = VectorMask.fromArray(SPECIES, ms, 0);
 778 
 779         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 780             for (int i = 0; i < a.length; i += SPECIES.length()) {
 781                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 782                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 783                 VectorMask<Long> mv = av.lessThan(bv);
 784 
 785                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 786             }
 787         }
 788         return m;
 789     }
 790 
 791 
 792     @Benchmark
 793     public Object greaterThan() {
 794         long[] a = fa.apply(size);
 795         long[] b = fb.apply(size);
 796         boolean[] ms = fm.apply(size);
 797         VectorMask<Long> m = VectorMask.fromArray(SPECIES, ms, 0);
 798 
 799         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 800             for (int i = 0; i < a.length; i += SPECIES.length()) {
 801                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 802                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 803                 VectorMask<Long> mv = av.greaterThan(bv);
 804 
 805                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 806             }
 807         }
 808         return m;
 809     }
 810 
 811 
 812     @Benchmark
 813     public Object equal() {
 814         long[] a = fa.apply(size);
 815         long[] b = fb.apply(size);
 816         boolean[] ms = fm.apply(size);
 817         VectorMask<Long> m = VectorMask.fromArray(SPECIES, ms, 0);
 818 
 819         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 820             for (int i = 0; i < a.length; i += SPECIES.length()) {
 821                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 822                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 823                 VectorMask<Long> mv = av.equal(bv);
 824 
 825                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 826             }
 827         }
 828         return m;
 829     }
 830 
 831 
 832     @Benchmark
 833     public Object notEqual() {
 834         long[] a = fa.apply(size);
 835         long[] b = fb.apply(size);
 836         boolean[] ms = fm.apply(size);
 837         VectorMask<Long> m = VectorMask.fromArray(SPECIES, ms, 0);
 838 
 839         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 840             for (int i = 0; i < a.length; i += SPECIES.length()) {
 841                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 842                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 843                 VectorMask<Long> mv = av.notEqual(bv);
 844 
 845                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 846             }
 847         }
 848         return m;
 849     }
 850 
 851 
 852     @Benchmark
 853     public Object lessThanEq() {
 854         long[] a = fa.apply(size);
 855         long[] b = fb.apply(size);
 856         boolean[] ms = fm.apply(size);
 857         VectorMask<Long> m = VectorMask.fromArray(SPECIES, ms, 0);
 858 
 859         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 860             for (int i = 0; i < a.length; i += SPECIES.length()) {
 861                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 862                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 863                 VectorMask<Long> mv = av.lessThanEq(bv);
 864 
 865                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 866             }
 867         }
 868         return m;
 869     }
 870 
 871 
 872     @Benchmark
 873     public Object greaterThanEq() {
 874         long[] a = fa.apply(size);
 875         long[] b = fb.apply(size);
 876         boolean[] ms = fm.apply(size);
 877         VectorMask<Long> m = VectorMask.fromArray(SPECIES, ms, 0);
 878 
 879         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 880             for (int i = 0; i < a.length; i += SPECIES.length()) {
 881                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 882                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 883                 VectorMask<Long> mv = av.greaterThanEq(bv);
 884 
 885                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 886             }
 887         }
 888         return m;
 889     }
 890 
 891 
 892     @Benchmark
 893     public void blend(Blackhole bh) {
 894         long[] a = fa.apply(SPECIES.length());
 895         long[] b = fb.apply(SPECIES.length());
 896         long[] r = fr.apply(SPECIES.length());
 897         boolean[] mask = fm.apply(SPECIES.length());
 898         VectorMask<Long> vmask = VectorMask.fromValues(SPECIES, mask);
 899 
 900         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 901             for (int i = 0; i < a.length; i += SPECIES.length()) {
 902                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 903                 LongVector bv = LongVector.fromArray(SPECIES, b, i);
 904                 av.blend(bv, vmask).intoArray(r, i);
 905             }
 906         }
 907 
 908         bh.consume(r);
 909     }
 910 
 911     @Benchmark
 912     public void rearrange(Blackhole bh) {
 913         long[] a = fa.apply(SPECIES.length());
 914         int[] order = fs.apply(a.length, SPECIES.length());
 915         long[] r = fr.apply(SPECIES.length());
 916 
 917         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 918             for (int i = 0; i < a.length; i += SPECIES.length()) {
 919                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 920                 av.rearrange(VectorShuffle.fromArray(SPECIES, order, i)).intoArray(r, i);
 921             }
 922         }
 923 
 924         bh.consume(r);
 925     }
 926 
 927     @Benchmark
 928     public void extract(Blackhole bh) {
 929         long[] a = fa.apply(SPECIES.length());
 930         long[] r = fr.apply(SPECIES.length());
 931 
 932         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 933             for (int i = 0; i < a.length; i += SPECIES.length()) {
 934                 LongVector av = LongVector.fromArray(SPECIES, a, i);
 935                 int num_lanes = SPECIES.length();
 936                 // Manually unroll because full unroll happens after intrinsification.
 937                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 938                 if (num_lanes == 1) {
 939                     r[i]=av.lane(0);
 940                 } else if (num_lanes == 2) {
 941                     r[i]=av.lane(0);
 942                     r[i+1]=av.lane(1);
 943                 } else if (num_lanes == 4) {
 944                     r[i]=av.lane(0);
 945                     r[i+1]=av.lane(1);
 946                     r[i+2]=av.lane(2);
 947                     r[i+3]=av.lane(3);
 948                 } else if (num_lanes == 8) {
 949                     r[i]=av.lane(0);
 950                     r[i+1]=av.lane(1);
 951                     r[i+2]=av.lane(2);
 952                     r[i+3]=av.lane(3);
 953                     r[i+4]=av.lane(4);
 954                     r[i+5]=av.lane(5);
 955                     r[i+6]=av.lane(6);
 956                     r[i+7]=av.lane(7);
 957                 } else if (num_lanes == 16) {
 958                     r[i]=av.lane(0);
 959                     r[i+1]=av.lane(1);
 960                     r[i+2]=av.lane(2);
 961                     r[i+3]=av.lane(3);
 962                     r[i+4]=av.lane(4);
 963                     r[i+5]=av.lane(5);
 964                     r[i+6]=av.lane(6);
 965                     r[i+7]=av.lane(7);
 966                     r[i+8]=av.lane(8);
 967                     r[i+9]=av.lane(9);
 968                     r[i+10]=av.lane(10);
 969                     r[i+11]=av.lane(11);
 970                     r[i+12]=av.lane(12);
 971                     r[i+13]=av.lane(13);
 972                     r[i+14]=av.lane(14);
 973                     r[i+15]=av.lane(15);
 974                 } else if (num_lanes == 32) {
 975                     r[i]=av.lane(0);
 976                     r[i+1]=av.lane(1);
 977                     r[i+2]=av.lane(2);
 978                     r[i+3]=av.lane(3);
 979                     r[i+4]=av.lane(4);
 980                     r[i+5]=av.lane(5);
 981                     r[i+6]=av.lane(6);
 982                     r[i+7]=av.lane(7);
 983                     r[i+8]=av.lane(8);
 984                     r[i+9]=av.lane(9);
 985                     r[i+10]=av.lane(10);
 986                     r[i+11]=av.lane(11);
 987                     r[i+12]=av.lane(12);
 988                     r[i+13]=av.lane(13);
 989                     r[i+14]=av.lane(14);
 990                     r[i+15]=av.lane(15);
 991                     r[i+16]=av.lane(16);
 992                     r[i+17]=av.lane(17);
 993                     r[i+18]=av.lane(18);
 994                     r[i+19]=av.lane(19);
 995                     r[i+20]=av.lane(20);
 996                     r[i+21]=av.lane(21);
 997                     r[i+22]=av.lane(22);
 998                     r[i+23]=av.lane(23);
 999                     r[i+24]=av.lane(24);
1000                     r[i+25]=av.lane(25);
1001                     r[i+26]=av.lane(26);
1002                     r[i+27]=av.lane(27);
1003                     r[i+28]=av.lane(28);
1004                     r[i+29]=av.lane(29);
1005                     r[i+30]=av.lane(30);
1006                     r[i+31]=av.lane(31);
1007                 } else if (num_lanes == 64) {
1008                     r[i]=av.lane(0);
1009                     r[i+1]=av.lane(1);
1010                     r[i+2]=av.lane(2);
1011                     r[i+3]=av.lane(3);
1012                     r[i+4]=av.lane(4);
1013                     r[i+5]=av.lane(5);
1014                     r[i+6]=av.lane(6);
1015                     r[i+7]=av.lane(7);
1016                     r[i+8]=av.lane(8);
1017                     r[i+9]=av.lane(9);
1018                     r[i+10]=av.lane(10);
1019                     r[i+11]=av.lane(11);
1020                     r[i+12]=av.lane(12);
1021                     r[i+13]=av.lane(13);
1022                     r[i+14]=av.lane(14);
1023                     r[i+15]=av.lane(15);
1024                     r[i+16]=av.lane(16);
1025                     r[i+17]=av.lane(17);
1026                     r[i+18]=av.lane(18);
1027                     r[i+19]=av.lane(19);
1028                     r[i+20]=av.lane(20);
1029                     r[i+21]=av.lane(21);
1030                     r[i+22]=av.lane(22);
1031                     r[i+23]=av.lane(23);
1032                     r[i+24]=av.lane(24);
1033                     r[i+25]=av.lane(25);
1034                     r[i+26]=av.lane(26);
1035                     r[i+27]=av.lane(27);
1036                     r[i+28]=av.lane(28);
1037                     r[i+29]=av.lane(29);
1038                     r[i+30]=av.lane(30);
1039                     r[i+31]=av.lane(31);
1040                     r[i+32]=av.lane(32);
1041                     r[i+33]=av.lane(33);
1042                     r[i+34]=av.lane(34);
1043                     r[i+35]=av.lane(35);
1044                     r[i+36]=av.lane(36);
1045                     r[i+37]=av.lane(37);
1046                     r[i+38]=av.lane(38);
1047                     r[i+39]=av.lane(39);
1048                     r[i+40]=av.lane(40);
1049                     r[i+41]=av.lane(41);
1050                     r[i+42]=av.lane(42);
1051                     r[i+43]=av.lane(43);
1052                     r[i+44]=av.lane(44);
1053                     r[i+45]=av.lane(45);
1054                     r[i+46]=av.lane(46);
1055                     r[i+47]=av.lane(47);
1056                     r[i+48]=av.lane(48);
1057                     r[i+49]=av.lane(49);
1058                     r[i+50]=av.lane(50);
1059                     r[i+51]=av.lane(51);
1060                     r[i+52]=av.lane(52);
1061                     r[i+53]=av.lane(53);
1062                     r[i+54]=av.lane(54);
1063                     r[i+55]=av.lane(55);
1064                     r[i+56]=av.lane(56);
1065                     r[i+57]=av.lane(57);
1066                     r[i+58]=av.lane(58);
1067                     r[i+59]=av.lane(59);
1068                     r[i+60]=av.lane(60);
1069                     r[i+61]=av.lane(61);
1070                     r[i+62]=av.lane(62);
1071                     r[i+63]=av.lane(63);
1072                 } else {
1073                     for (int j = 0; j < SPECIES.length(); j++) {
1074                         r[i+j]=av.lane(j);
1075                     }
1076                 }
1077             }
1078         }
1079 
1080         bh.consume(r);
1081     }
1082 
1083 
1084 
1085 
1086 
1087 
1088 
1089 
1090 
1091 
1092 
1093 
1094 
1095 
1096 
1097 
1098 
1099 
1100 
1101 
1102 
1103     @Benchmark
1104     public void neg(Blackhole bh) {
1105         long[] a = fa.apply(SPECIES.length());
1106         long[] r = fr.apply(SPECIES.length());
1107 
1108         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1109             for (int i = 0; i < a.length; i += SPECIES.length()) {
1110                 LongVector av = LongVector.fromArray(SPECIES, a, i);
1111                 av.neg().intoArray(r, i);
1112             }
1113         }
1114 
1115         bh.consume(r);
1116     }
1117 
1118     @Benchmark
1119     public void negMasked(Blackhole bh) {
1120         long[] a = fa.apply(SPECIES.length());
1121         long[] r = fr.apply(SPECIES.length());
1122         boolean[] mask = fm.apply(SPECIES.length());
1123         VectorMask<Long> vmask = VectorMask.fromValues(SPECIES, mask);
1124 
1125         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1126             for (int i = 0; i < a.length; i += SPECIES.length()) {
1127                 LongVector av = LongVector.fromArray(SPECIES, a, i);
1128                 av.neg(vmask).intoArray(r, i);
1129             }
1130         }
1131 
1132         bh.consume(r);
1133     }
1134 
1135     @Benchmark
1136     public void abs(Blackhole bh) {
1137         long[] a = fa.apply(SPECIES.length());
1138         long[] r = fr.apply(SPECIES.length());
1139 
1140         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1141             for (int i = 0; i < a.length; i += SPECIES.length()) {
1142                 LongVector av = LongVector.fromArray(SPECIES, a, i);
1143                 av.abs().intoArray(r, i);
1144             }
1145         }
1146 
1147         bh.consume(r);
1148     }
1149 
1150     @Benchmark
1151     public void absMasked(Blackhole bh) {
1152         long[] a = fa.apply(SPECIES.length());
1153         long[] r = fr.apply(SPECIES.length());
1154         boolean[] mask = fm.apply(SPECIES.length());
1155         VectorMask<Long> vmask = VectorMask.fromValues(SPECIES, mask);
1156 
1157         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1158             for (int i = 0; i < a.length; i += SPECIES.length()) {
1159                 LongVector av = LongVector.fromArray(SPECIES, a, i);
1160                 av.abs(vmask).intoArray(r, i);
1161             }
1162         }
1163 
1164         bh.consume(r);
1165     }
1166 
1167 
1168     @Benchmark
1169     public void not(Blackhole bh) {
1170         long[] a = fa.apply(SPECIES.length());
1171         long[] r = fr.apply(SPECIES.length());
1172 
1173         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1174             for (int i = 0; i < a.length; i += SPECIES.length()) {
1175                 LongVector av = LongVector.fromArray(SPECIES, a, i);
1176                 av.not().intoArray(r, i);
1177             }
1178         }
1179 
1180         bh.consume(r);
1181     }
1182 
1183 
1184 
1185     @Benchmark
1186     public void notMasked(Blackhole bh) {
1187         long[] a = fa.apply(SPECIES.length());
1188         long[] r = fr.apply(SPECIES.length());
1189         boolean[] mask = fm.apply(SPECIES.length());
1190         VectorMask<Long> vmask = VectorMask.fromValues(SPECIES, mask);
1191 
1192         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1193             for (int i = 0; i < a.length; i += SPECIES.length()) {
1194                 LongVector av = LongVector.fromArray(SPECIES, a, i);
1195                 av.not(vmask).intoArray(r, i);
1196             }
1197         }
1198 
1199         bh.consume(r);
1200     }
1201 
1202 
1203 
1204 
1205 
1206     @Benchmark
1207     public void gather(Blackhole bh) {
1208         long[] a = fa.apply(SPECIES.length());
1209         int[] b    = fs.apply(a.length, SPECIES.length());
1210         long[] r = new long[a.length];
1211 
1212         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1213             for (int i = 0; i < a.length; i += SPECIES.length()) {
1214                 LongVector av = LongVector.fromArray(SPECIES, a, i, b, i);
1215                 av.intoArray(r, i);
1216             }
1217         }
1218 
1219         bh.consume(r);
1220     }
1221 
1222 
1223 
1224     @Benchmark
1225     public void scatter(Blackhole bh) {
1226         long[] a = fa.apply(SPECIES.length());
1227         int[] b = fs.apply(a.length, SPECIES.length());
1228         long[] r = new long[a.length];
1229 
1230         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1231             for (int i = 0; i < a.length; i += SPECIES.length()) {
1232                 LongVector av = LongVector.fromArray(SPECIES, a, i);
1233                 av.intoArray(r, i, b, i);
1234             }
1235         }
1236 
1237         bh.consume(r);
1238     }
1239 
1240 }
1241