1 /*
   2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.Vector.Shape;
  28 import jdk.incubator.vector.Vector.Species;
  29 import jdk.incubator.vector.DoubleVector;
  30 
  31 import java.util.concurrent.TimeUnit;
  32 import java.util.function.BiFunction;
  33 import java.util.function.IntFunction;
  34 
  35 import org.openjdk.jmh.annotations.*;
  36 import org.openjdk.jmh.infra.Blackhole;
  37 
  38 @BenchmarkMode(Mode.Throughput)
  39 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  40 @State(Scope.Benchmark)
  41 @Warmup(iterations = 3, time = 1)
  42 @Measurement(iterations = 5, time = 1)
  43 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  44 public class Double256Vector extends AbstractVectorBenchmark {
  45     static final Species<Double> SPECIES = DoubleVector.SPECIES_256;
  46 
  47     static final int INVOC_COUNT = 1; // get rid of outer loop
  48 
  49     @Param("1024")
  50     int size;
  51 
  52     double[] fill(IntFunction<Double> f) {
  53         double[] array = new double[size];
  54         for (int i = 0; i < array.length; i++) {
  55             array[i] = f.apply(i);
  56         }
  57         return array;
  58     }
  59 
  60     double[] a, b, c, r;
  61     boolean[] m, rm;
  62     int[] s;
  63 
  64     @Setup
  65     public void init() {
  66         size += size % SPECIES.length(); // FIXME: add post-loops
  67 
  68         a = fill(i -> (double)(2*i));
  69         b = fill(i -> (double)(i+1));
  70         c = fill(i -> (double)(i+5));
  71         r = fill(i -> (double)0);
  72 
  73         m = fillMask(size, i -> (i % 2) == 0);
  74         rm = fillMask(size, i -> false);
  75 
  76         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  77     }
  78 
  79     final IntFunction<double[]> fa = vl -> a;
  80     final IntFunction<double[]> fb = vl -> b;
  81     final IntFunction<double[]> fc = vl -> c;
  82     final IntFunction<double[]> fr = vl -> r;
  83     final IntFunction<boolean[]> fm = vl -> m;
  84     final IntFunction<boolean[]> fmr = vl -> rm;
  85     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  86 
  87 
  88     @Benchmark
  89     public void add(Blackhole bh) {
  90         double[] a = fa.apply(SPECIES.length());
  91         double[] b = fb.apply(SPECIES.length());
  92         double[] r = fr.apply(SPECIES.length());
  93 
  94         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  95             for (int i = 0; i < a.length; i += SPECIES.length()) {
  96                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
  97                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
  98                 av.add(bv).intoArray(r, i);
  99             }
 100         }
 101 
 102         bh.consume(r);
 103     }
 104 
 105     @Benchmark
 106     public void addMasked(Blackhole bh) {
 107         double[] a = fa.apply(SPECIES.length());
 108         double[] b = fb.apply(SPECIES.length());
 109         double[] r = fr.apply(SPECIES.length());
 110         boolean[] mask = fm.apply(SPECIES.length());
 111         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
 112 
 113         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 114             for (int i = 0; i < a.length; i += SPECIES.length()) {
 115                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 116                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 117                 av.add(bv, vmask).intoArray(r, i);
 118             }
 119         }
 120 
 121         bh.consume(r);
 122     }
 123 
 124     @Benchmark
 125     public void sub(Blackhole bh) {
 126         double[] a = fa.apply(SPECIES.length());
 127         double[] b = fb.apply(SPECIES.length());
 128         double[] r = fr.apply(SPECIES.length());
 129 
 130         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 131             for (int i = 0; i < a.length; i += SPECIES.length()) {
 132                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 133                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 134                 av.sub(bv).intoArray(r, i);
 135             }
 136         }
 137 
 138         bh.consume(r);
 139     }
 140 
 141     @Benchmark
 142     public void subMasked(Blackhole bh) {
 143         double[] a = fa.apply(SPECIES.length());
 144         double[] b = fb.apply(SPECIES.length());
 145         double[] r = fr.apply(SPECIES.length());
 146         boolean[] mask = fm.apply(SPECIES.length());
 147         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
 148 
 149         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 150             for (int i = 0; i < a.length; i += SPECIES.length()) {
 151                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 152                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 153                 av.sub(bv, vmask).intoArray(r, i);
 154             }
 155         }
 156 
 157         bh.consume(r);
 158     }
 159 
 160 
 161     @Benchmark
 162     public void div(Blackhole bh) {
 163         double[] a = fa.apply(SPECIES.length());
 164         double[] b = fb.apply(SPECIES.length());
 165         double[] r = fr.apply(SPECIES.length());
 166 
 167         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 168             for (int i = 0; i < a.length; i += SPECIES.length()) {
 169                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 170                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 171                 av.div(bv).intoArray(r, i);
 172             }
 173         }
 174 
 175         bh.consume(r);
 176     }
 177 
 178 
 179 
 180     @Benchmark
 181     public void divMasked(Blackhole bh) {
 182         double[] a = fa.apply(SPECIES.length());
 183         double[] b = fb.apply(SPECIES.length());
 184         double[] r = fr.apply(SPECIES.length());
 185         boolean[] mask = fm.apply(SPECIES.length());
 186         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
 187 
 188         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 189             for (int i = 0; i < a.length; i += SPECIES.length()) {
 190                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 191                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 192                 av.div(bv, vmask).intoArray(r, i);
 193             }
 194         }
 195 
 196         bh.consume(r);
 197     }
 198 
 199 
 200     @Benchmark
 201     public void mul(Blackhole bh) {
 202         double[] a = fa.apply(SPECIES.length());
 203         double[] b = fb.apply(SPECIES.length());
 204         double[] r = fr.apply(SPECIES.length());
 205 
 206         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 207             for (int i = 0; i < a.length; i += SPECIES.length()) {
 208                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 209                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 210                 av.mul(bv).intoArray(r, i);
 211             }
 212         }
 213 
 214         bh.consume(r);
 215     }
 216 
 217     @Benchmark
 218     public void mulMasked(Blackhole bh) {
 219         double[] a = fa.apply(SPECIES.length());
 220         double[] b = fb.apply(SPECIES.length());
 221         double[] r = fr.apply(SPECIES.length());
 222         boolean[] mask = fm.apply(SPECIES.length());
 223         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
 224 
 225         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 226             for (int i = 0; i < a.length; i += SPECIES.length()) {
 227                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 228                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 229                 av.mul(bv, vmask).intoArray(r, i);
 230             }
 231         }
 232 
 233         bh.consume(r);
 234     }
 235 
 236 
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250 
 251 
 252 
 253 
 254 
 255 
 256 
 257 
 258 
 259 
 260 
 261 
 262 
 263 
 264 
 265 
 266     @Benchmark
 267     public void max(Blackhole bh) {
 268         double[] a = fa.apply(SPECIES.length());
 269         double[] b = fb.apply(SPECIES.length());
 270         double[] r = fr.apply(SPECIES.length());
 271 
 272         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 273             for (int i = 0; i < a.length; i += SPECIES.length()) {
 274                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 275                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 276                 av.max(bv).intoArray(r, i);
 277             }
 278         }
 279 
 280         bh.consume(r);
 281     }
 282 
 283     @Benchmark
 284     public void min(Blackhole bh) {
 285         double[] a = fa.apply(SPECIES.length());
 286         double[] b = fb.apply(SPECIES.length());
 287         double[] r = fr.apply(SPECIES.length());
 288 
 289         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 290             for (int i = 0; i < a.length; i += SPECIES.length()) {
 291                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 292                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 293                 av.min(bv).intoArray(r, i);
 294             }
 295         }
 296 
 297         bh.consume(r);
 298     }
 299 
 300 
 301 
 302 
 303     @Benchmark
 304     public void addAll(Blackhole bh) {
 305         double[] a = fa.apply(SPECIES.length());
 306         double ra = 0;
 307 
 308         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 309             ra = 0;
 310             for (int i = 0; i < a.length; i += SPECIES.length()) {
 311                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 312                 ra += av.addAll();
 313             }
 314         }
 315         bh.consume(ra);
 316     }
 317 
 318     @Benchmark
 319     public void mulAll(Blackhole bh) {
 320         double[] a = fa.apply(SPECIES.length());
 321         double ra = 1;
 322 
 323         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 324             ra = 1;
 325             for (int i = 0; i < a.length; i += SPECIES.length()) {
 326                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 327                 ra *= av.mulAll();
 328             }
 329         }
 330         bh.consume(ra);
 331     }
 332 
 333     @Benchmark
 334     public void minAll(Blackhole bh) {
 335         double[] a = fa.apply(SPECIES.length());
 336         double ra = Double.POSITIVE_INFINITY;
 337 
 338         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 339             ra = Double.POSITIVE_INFINITY;
 340             for (int i = 0; i < a.length; i += SPECIES.length()) {
 341                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 342                 ra = (double)Math.min(ra, av.minAll());
 343             }
 344         }
 345         bh.consume(ra);
 346     }
 347 
 348     @Benchmark
 349     public void maxAll(Blackhole bh) {
 350         double[] a = fa.apply(SPECIES.length());
 351         double ra = Double.NEGATIVE_INFINITY;
 352 
 353         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 354             ra = Double.NEGATIVE_INFINITY;
 355             for (int i = 0; i < a.length; i += SPECIES.length()) {
 356                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 357                 ra = (double)Math.max(ra, av.maxAll());
 358             }
 359         }
 360         bh.consume(ra);
 361     }
 362 
 363 
 364 
 365     @Benchmark
 366     public void with(Blackhole bh) {
 367         double[] a = fa.apply(SPECIES.length());
 368         double[] r = fr.apply(SPECIES.length());
 369 
 370         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 371             for (int i = 0; i < a.length; i += SPECIES.length()) {
 372                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 373                 av.with(0, (double)4).intoArray(r, i);
 374             }
 375         }
 376 
 377         bh.consume(r);
 378     }
 379 
 380     @Benchmark
 381     public Object lessThan() {
 382         double[] a = fa.apply(size);
 383         double[] b = fb.apply(size);
 384         boolean[] ms = fm.apply(size);
 385         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 386 
 387         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 388             for (int i = 0; i < a.length; i += SPECIES.length()) {
 389                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 390                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 391                 Vector.Mask<Double> mv = av.lessThan(bv);
 392 
 393                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 394             }
 395         }
 396         return m;
 397     }
 398 
 399 
 400     @Benchmark
 401     public Object greaterThan() {
 402         double[] a = fa.apply(size);
 403         double[] b = fb.apply(size);
 404         boolean[] ms = fm.apply(size);
 405         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 406 
 407         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 408             for (int i = 0; i < a.length; i += SPECIES.length()) {
 409                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 410                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 411                 Vector.Mask<Double> mv = av.greaterThan(bv);
 412 
 413                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 414             }
 415         }
 416         return m;
 417     }
 418 
 419 
 420     @Benchmark
 421     public Object equal() {
 422         double[] a = fa.apply(size);
 423         double[] b = fb.apply(size);
 424         boolean[] ms = fm.apply(size);
 425         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 426 
 427         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 428             for (int i = 0; i < a.length; i += SPECIES.length()) {
 429                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 430                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 431                 Vector.Mask<Double> mv = av.equal(bv);
 432 
 433                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 434             }
 435         }
 436         return m;
 437     }
 438 
 439 
 440     @Benchmark
 441     public Object notEqual() {
 442         double[] a = fa.apply(size);
 443         double[] b = fb.apply(size);
 444         boolean[] ms = fm.apply(size);
 445         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 446 
 447         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 448             for (int i = 0; i < a.length; i += SPECIES.length()) {
 449                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 450                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 451                 Vector.Mask<Double> mv = av.notEqual(bv);
 452 
 453                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 454             }
 455         }
 456         return m;
 457     }
 458 
 459 
 460     @Benchmark
 461     public Object lessThanEq() {
 462         double[] a = fa.apply(size);
 463         double[] b = fb.apply(size);
 464         boolean[] ms = fm.apply(size);
 465         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 466 
 467         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 468             for (int i = 0; i < a.length; i += SPECIES.length()) {
 469                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 470                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 471                 Vector.Mask<Double> mv = av.lessThanEq(bv);
 472 
 473                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 474             }
 475         }
 476         return m;
 477     }
 478 
 479 
 480     @Benchmark
 481     public Object greaterThanEq() {
 482         double[] a = fa.apply(size);
 483         double[] b = fb.apply(size);
 484         boolean[] ms = fm.apply(size);
 485         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 486 
 487         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 488             for (int i = 0; i < a.length; i += SPECIES.length()) {
 489                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 490                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 491                 Vector.Mask<Double> mv = av.greaterThanEq(bv);
 492 
 493                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 494             }
 495         }
 496         return m;
 497     }
 498 
 499 
 500     @Benchmark
 501     public void blend(Blackhole bh) {
 502         double[] a = fa.apply(SPECIES.length());
 503         double[] b = fb.apply(SPECIES.length());
 504         double[] r = fr.apply(SPECIES.length());
 505         boolean[] mask = fm.apply(SPECIES.length());
 506         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
 507 
 508         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 509             for (int i = 0; i < a.length; i += SPECIES.length()) {
 510                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 511                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 512                 av.blend(bv, vmask).intoArray(r, i);
 513             }
 514         }
 515 
 516         bh.consume(r);
 517     }
 518 
 519     @Benchmark
 520     public void rearrange(Blackhole bh) {
 521         double[] a = fa.apply(SPECIES.length());
 522         int[] order = fs.apply(a.length, SPECIES.length());
 523         double[] r = fr.apply(SPECIES.length());
 524 
 525         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 526             for (int i = 0; i < a.length; i += SPECIES.length()) {
 527                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 528                 av.rearrange(DoubleVector.shuffleFromArray(SPECIES, order, i)).intoArray(r, i);
 529             }
 530         }
 531 
 532         bh.consume(r);
 533     }
 534 
 535     @Benchmark
 536     public void extract(Blackhole bh) {
 537         double[] a = fa.apply(SPECIES.length());
 538         double[] r = fr.apply(SPECIES.length());
 539 
 540         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 541             for (int i = 0; i < a.length; i += SPECIES.length()) {
 542                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 543                 int num_lanes = SPECIES.length();
 544                 // Manually unroll because full unroll happens after intrinsification.
 545                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 546                 if (num_lanes == 1) {
 547                     r[i]=av.get(0);
 548                 } else if (num_lanes == 2) {
 549                     r[i]=av.get(0);
 550                     r[i+1]=av.get(1);
 551                 } else if (num_lanes == 4) {
 552                     r[i]=av.get(0);
 553                     r[i+1]=av.get(1);
 554                     r[i+2]=av.get(2);
 555                     r[i+3]=av.get(3);
 556                 } else if (num_lanes == 8) {
 557                     r[i]=av.get(0);
 558                     r[i+1]=av.get(1);
 559                     r[i+2]=av.get(2);
 560                     r[i+3]=av.get(3);
 561                     r[i+4]=av.get(4);
 562                     r[i+5]=av.get(5);
 563                     r[i+6]=av.get(6);
 564                     r[i+7]=av.get(7);
 565                 } else if (num_lanes == 16) {
 566                     r[i]=av.get(0);
 567                     r[i+1]=av.get(1);
 568                     r[i+2]=av.get(2);
 569                     r[i+3]=av.get(3);
 570                     r[i+4]=av.get(4);
 571                     r[i+5]=av.get(5);
 572                     r[i+6]=av.get(6);
 573                     r[i+7]=av.get(7);
 574                     r[i+8]=av.get(8);
 575                     r[i+9]=av.get(9);
 576                     r[i+10]=av.get(10);
 577                     r[i+11]=av.get(11);
 578                     r[i+12]=av.get(12);
 579                     r[i+13]=av.get(13);
 580                     r[i+14]=av.get(14);
 581                     r[i+15]=av.get(15);
 582                 } else if (num_lanes == 32) {
 583                     r[i]=av.get(0);
 584                     r[i+1]=av.get(1);
 585                     r[i+2]=av.get(2);
 586                     r[i+3]=av.get(3);
 587                     r[i+4]=av.get(4);
 588                     r[i+5]=av.get(5);
 589                     r[i+6]=av.get(6);
 590                     r[i+7]=av.get(7);
 591                     r[i+8]=av.get(8);
 592                     r[i+9]=av.get(9);
 593                     r[i+10]=av.get(10);
 594                     r[i+11]=av.get(11);
 595                     r[i+12]=av.get(12);
 596                     r[i+13]=av.get(13);
 597                     r[i+14]=av.get(14);
 598                     r[i+15]=av.get(15);
 599                     r[i+16]=av.get(16);
 600                     r[i+17]=av.get(17);
 601                     r[i+18]=av.get(18);
 602                     r[i+19]=av.get(19);
 603                     r[i+20]=av.get(20);
 604                     r[i+21]=av.get(21);
 605                     r[i+22]=av.get(22);
 606                     r[i+23]=av.get(23);
 607                     r[i+24]=av.get(24);
 608                     r[i+25]=av.get(25);
 609                     r[i+26]=av.get(26);
 610                     r[i+27]=av.get(27);
 611                     r[i+28]=av.get(28);
 612                     r[i+29]=av.get(29);
 613                     r[i+30]=av.get(30);
 614                     r[i+31]=av.get(31);
 615                 } else if (num_lanes == 64) {
 616                     r[i]=av.get(0);
 617                     r[i+1]=av.get(1);
 618                     r[i+2]=av.get(2);
 619                     r[i+3]=av.get(3);
 620                     r[i+4]=av.get(4);
 621                     r[i+5]=av.get(5);
 622                     r[i+6]=av.get(6);
 623                     r[i+7]=av.get(7);
 624                     r[i+8]=av.get(8);
 625                     r[i+9]=av.get(9);
 626                     r[i+10]=av.get(10);
 627                     r[i+11]=av.get(11);
 628                     r[i+12]=av.get(12);
 629                     r[i+13]=av.get(13);
 630                     r[i+14]=av.get(14);
 631                     r[i+15]=av.get(15);
 632                     r[i+16]=av.get(16);
 633                     r[i+17]=av.get(17);
 634                     r[i+18]=av.get(18);
 635                     r[i+19]=av.get(19);
 636                     r[i+20]=av.get(20);
 637                     r[i+21]=av.get(21);
 638                     r[i+22]=av.get(22);
 639                     r[i+23]=av.get(23);
 640                     r[i+24]=av.get(24);
 641                     r[i+25]=av.get(25);
 642                     r[i+26]=av.get(26);
 643                     r[i+27]=av.get(27);
 644                     r[i+28]=av.get(28);
 645                     r[i+29]=av.get(29);
 646                     r[i+30]=av.get(30);
 647                     r[i+31]=av.get(31);
 648                     r[i+32]=av.get(32);
 649                     r[i+33]=av.get(33);
 650                     r[i+34]=av.get(34);
 651                     r[i+35]=av.get(35);
 652                     r[i+36]=av.get(36);
 653                     r[i+37]=av.get(37);
 654                     r[i+38]=av.get(38);
 655                     r[i+39]=av.get(39);
 656                     r[i+40]=av.get(40);
 657                     r[i+41]=av.get(41);
 658                     r[i+42]=av.get(42);
 659                     r[i+43]=av.get(43);
 660                     r[i+44]=av.get(44);
 661                     r[i+45]=av.get(45);
 662                     r[i+46]=av.get(46);
 663                     r[i+47]=av.get(47);
 664                     r[i+48]=av.get(48);
 665                     r[i+49]=av.get(49);
 666                     r[i+50]=av.get(50);
 667                     r[i+51]=av.get(51);
 668                     r[i+52]=av.get(52);
 669                     r[i+53]=av.get(53);
 670                     r[i+54]=av.get(54);
 671                     r[i+55]=av.get(55);
 672                     r[i+56]=av.get(56);
 673                     r[i+57]=av.get(57);
 674                     r[i+58]=av.get(58);
 675                     r[i+59]=av.get(59);
 676                     r[i+60]=av.get(60);
 677                     r[i+61]=av.get(61);
 678                     r[i+62]=av.get(62);
 679                     r[i+63]=av.get(63);
 680                 } else {
 681                     for (int j = 0; j < SPECIES.length(); j++) {
 682                         r[i+j]=av.get(j);
 683                     }
 684                 }
 685             }
 686         }
 687 
 688         bh.consume(r);
 689     }
 690 
 691 
 692     @Benchmark
 693     public void sin(Blackhole bh) {
 694         double[] a = fa.apply(SPECIES.length());
 695         double[] r = fr.apply(SPECIES.length());
 696 
 697         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 698             for (int i = 0; i < a.length; i += SPECIES.length()) {
 699                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 700                 av.sin().intoArray(r, i);
 701             }
 702         }
 703 
 704         bh.consume(r);
 705     }
 706 
 707 
 708 
 709     @Benchmark
 710     public void exp(Blackhole bh) {
 711         double[] a = fa.apply(SPECIES.length());
 712         double[] r = fr.apply(SPECIES.length());
 713 
 714         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 715             for (int i = 0; i < a.length; i += SPECIES.length()) {
 716                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 717                 av.exp().intoArray(r, i);
 718             }
 719         }
 720 
 721         bh.consume(r);
 722     }
 723 
 724 
 725 
 726     @Benchmark
 727     public void log1p(Blackhole bh) {
 728         double[] a = fa.apply(SPECIES.length());
 729         double[] r = fr.apply(SPECIES.length());
 730 
 731         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 732             for (int i = 0; i < a.length; i += SPECIES.length()) {
 733                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 734                 av.log1p().intoArray(r, i);
 735             }
 736         }
 737 
 738         bh.consume(r);
 739     }
 740 
 741 
 742 
 743     @Benchmark
 744     public void log(Blackhole bh) {
 745         double[] a = fa.apply(SPECIES.length());
 746         double[] r = fr.apply(SPECIES.length());
 747 
 748         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 749             for (int i = 0; i < a.length; i += SPECIES.length()) {
 750                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 751                 av.log().intoArray(r, i);
 752             }
 753         }
 754 
 755         bh.consume(r);
 756     }
 757 
 758 
 759 
 760     @Benchmark
 761     public void log10(Blackhole bh) {
 762         double[] a = fa.apply(SPECIES.length());
 763         double[] r = fr.apply(SPECIES.length());
 764 
 765         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 766             for (int i = 0; i < a.length; i += SPECIES.length()) {
 767                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 768                 av.log10().intoArray(r, i);
 769             }
 770         }
 771 
 772         bh.consume(r);
 773     }
 774 
 775 
 776 
 777     @Benchmark
 778     public void expm1(Blackhole bh) {
 779         double[] a = fa.apply(SPECIES.length());
 780         double[] r = fr.apply(SPECIES.length());
 781 
 782         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 783             for (int i = 0; i < a.length; i += SPECIES.length()) {
 784                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 785                 av.expm1().intoArray(r, i);
 786             }
 787         }
 788 
 789         bh.consume(r);
 790     }
 791 
 792 
 793 
 794     @Benchmark
 795     public void cos(Blackhole bh) {
 796         double[] a = fa.apply(SPECIES.length());
 797         double[] r = fr.apply(SPECIES.length());
 798 
 799         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 800             for (int i = 0; i < a.length; i += SPECIES.length()) {
 801                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 802                 av.cos().intoArray(r, i);
 803             }
 804         }
 805 
 806         bh.consume(r);
 807     }
 808 
 809 
 810 
 811     @Benchmark
 812     public void tan(Blackhole bh) {
 813         double[] a = fa.apply(SPECIES.length());
 814         double[] r = fr.apply(SPECIES.length());
 815 
 816         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 817             for (int i = 0; i < a.length; i += SPECIES.length()) {
 818                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 819                 av.tan().intoArray(r, i);
 820             }
 821         }
 822 
 823         bh.consume(r);
 824     }
 825 
 826 
 827 
 828     @Benchmark
 829     public void sinh(Blackhole bh) {
 830         double[] a = fa.apply(SPECIES.length());
 831         double[] r = fr.apply(SPECIES.length());
 832 
 833         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 834             for (int i = 0; i < a.length; i += SPECIES.length()) {
 835                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 836                 av.sinh().intoArray(r, i);
 837             }
 838         }
 839 
 840         bh.consume(r);
 841     }
 842 
 843 
 844 
 845     @Benchmark
 846     public void cosh(Blackhole bh) {
 847         double[] a = fa.apply(SPECIES.length());
 848         double[] r = fr.apply(SPECIES.length());
 849 
 850         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 851             for (int i = 0; i < a.length; i += SPECIES.length()) {
 852                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 853                 av.cosh().intoArray(r, i);
 854             }
 855         }
 856 
 857         bh.consume(r);
 858     }
 859 
 860 
 861 
 862     @Benchmark
 863     public void tanh(Blackhole bh) {
 864         double[] a = fa.apply(SPECIES.length());
 865         double[] r = fr.apply(SPECIES.length());
 866 
 867         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 868             for (int i = 0; i < a.length; i += SPECIES.length()) {
 869                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 870                 av.tanh().intoArray(r, i);
 871             }
 872         }
 873 
 874         bh.consume(r);
 875     }
 876 
 877 
 878 
 879     @Benchmark
 880     public void asin(Blackhole bh) {
 881         double[] a = fa.apply(SPECIES.length());
 882         double[] r = fr.apply(SPECIES.length());
 883 
 884         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 885             for (int i = 0; i < a.length; i += SPECIES.length()) {
 886                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 887                 av.asin().intoArray(r, i);
 888             }
 889         }
 890 
 891         bh.consume(r);
 892     }
 893 
 894 
 895 
 896     @Benchmark
 897     public void acos(Blackhole bh) {
 898         double[] a = fa.apply(SPECIES.length());
 899         double[] r = fr.apply(SPECIES.length());
 900 
 901         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 902             for (int i = 0; i < a.length; i += SPECIES.length()) {
 903                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 904                 av.acos().intoArray(r, i);
 905             }
 906         }
 907 
 908         bh.consume(r);
 909     }
 910 
 911 
 912 
 913     @Benchmark
 914     public void atan(Blackhole bh) {
 915         double[] a = fa.apply(SPECIES.length());
 916         double[] r = fr.apply(SPECIES.length());
 917 
 918         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 919             for (int i = 0; i < a.length; i += SPECIES.length()) {
 920                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 921                 av.atan().intoArray(r, i);
 922             }
 923         }
 924 
 925         bh.consume(r);
 926     }
 927 
 928 
 929 
 930     @Benchmark
 931     public void cbrt(Blackhole bh) {
 932         double[] a = fa.apply(SPECIES.length());
 933         double[] r = fr.apply(SPECIES.length());
 934 
 935         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 936             for (int i = 0; i < a.length; i += SPECIES.length()) {
 937                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 938                 av.cbrt().intoArray(r, i);
 939             }
 940         }
 941 
 942         bh.consume(r);
 943     }
 944 
 945 
 946 
 947     @Benchmark
 948     public void hypot(Blackhole bh) {
 949         double[] a = fa.apply(SPECIES.length());
 950         double[] b = fb.apply(SPECIES.length());
 951         double[] r = fr.apply(SPECIES.length());
 952 
 953         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 954             for (int i = 0; i < a.length; i += SPECIES.length()) {
 955                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 956                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 957                 av.hypot(bv).intoArray(r, i);
 958             }
 959         }
 960 
 961         bh.consume(r);
 962     }
 963 
 964 
 965 
 966     @Benchmark
 967     public void pow(Blackhole bh) {
 968         double[] a = fa.apply(SPECIES.length());
 969         double[] b = fb.apply(SPECIES.length());
 970         double[] r = fr.apply(SPECIES.length());
 971 
 972         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 973             for (int i = 0; i < a.length; i += SPECIES.length()) {
 974                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 975                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 976                 av.pow(bv).intoArray(r, i);
 977             }
 978         }
 979 
 980         bh.consume(r);
 981     }
 982 
 983 
 984 
 985     @Benchmark
 986     public void atan2(Blackhole bh) {
 987         double[] a = fa.apply(SPECIES.length());
 988         double[] b = fb.apply(SPECIES.length());
 989         double[] r = fr.apply(SPECIES.length());
 990 
 991         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 992             for (int i = 0; i < a.length; i += SPECIES.length()) {
 993                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 994                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 995                 av.atan2(bv).intoArray(r, i);
 996             }
 997         }
 998 
 999         bh.consume(r);
1000     }
1001 
1002 
1003 
1004     @Benchmark
1005     public void fma(Blackhole bh) {
1006         double[] a = fa.apply(SPECIES.length());
1007         double[] b = fb.apply(SPECIES.length());
1008         double[] c = fc.apply(SPECIES.length());
1009         double[] r = fr.apply(SPECIES.length());
1010 
1011         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1012             for (int i = 0; i < a.length; i += SPECIES.length()) {
1013                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1014                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
1015                 DoubleVector cv = DoubleVector.fromArray(SPECIES, c, i);
1016                 av.fma(bv, cv).intoArray(r, i);
1017             }
1018         }
1019 
1020         bh.consume(r);
1021     }
1022 
1023 
1024 
1025     @Benchmark
1026     public void fmaMasked(Blackhole bh) {
1027         double[] a = fa.apply(SPECIES.length());
1028         double[] b = fb.apply(SPECIES.length());
1029         double[] c = fc.apply(SPECIES.length());
1030         double[] r = fr.apply(SPECIES.length());
1031         boolean[] mask = fm.apply(SPECIES.length());
1032         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
1033 
1034         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1035             for (int i = 0; i < a.length; i += SPECIES.length()) {
1036                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1037                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
1038                 DoubleVector cv = DoubleVector.fromArray(SPECIES, c, i);
1039                 av.fma(bv, cv, vmask).intoArray(r, i);
1040             }
1041         }
1042 
1043         bh.consume(r);
1044     }
1045 
1046 
1047     @Benchmark
1048     public void neg(Blackhole bh) {
1049         double[] a = fa.apply(SPECIES.length());
1050         double[] r = fr.apply(SPECIES.length());
1051 
1052         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1053             for (int i = 0; i < a.length; i += SPECIES.length()) {
1054                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1055                 av.neg().intoArray(r, i);
1056             }
1057         }
1058 
1059         bh.consume(r);
1060     }
1061 
1062     @Benchmark
1063     public void negMasked(Blackhole bh) {
1064         double[] a = fa.apply(SPECIES.length());
1065         double[] r = fr.apply(SPECIES.length());
1066         boolean[] mask = fm.apply(SPECIES.length());
1067         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
1068 
1069         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1070             for (int i = 0; i < a.length; i += SPECIES.length()) {
1071                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1072                 av.neg(vmask).intoArray(r, i);
1073             }
1074         }
1075 
1076         bh.consume(r);
1077     }
1078 
1079     @Benchmark
1080     public void abs(Blackhole bh) {
1081         double[] a = fa.apply(SPECIES.length());
1082         double[] r = fr.apply(SPECIES.length());
1083 
1084         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1085             for (int i = 0; i < a.length; i += SPECIES.length()) {
1086                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1087                 av.abs().intoArray(r, i);
1088             }
1089         }
1090 
1091         bh.consume(r);
1092     }
1093 
1094     @Benchmark
1095     public void absMasked(Blackhole bh) {
1096         double[] a = fa.apply(SPECIES.length());
1097         double[] r = fr.apply(SPECIES.length());
1098         boolean[] mask = fm.apply(SPECIES.length());
1099         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
1100 
1101         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1102             for (int i = 0; i < a.length; i += SPECIES.length()) {
1103                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1104                 av.abs(vmask).intoArray(r, i);
1105             }
1106         }
1107 
1108         bh.consume(r);
1109     }
1110 
1111 
1112 
1113 
1114     @Benchmark
1115     public void sqrt(Blackhole bh) {
1116         double[] a = fa.apply(SPECIES.length());
1117         double[] r = fr.apply(SPECIES.length());
1118 
1119         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1120             for (int i = 0; i < a.length; i += SPECIES.length()) {
1121                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1122                 av.sqrt().intoArray(r, i);
1123             }
1124         }
1125 
1126         bh.consume(r);
1127     }
1128 
1129 
1130 
1131     @Benchmark
1132     public void sqrtMasked(Blackhole bh) {
1133         double[] a = fa.apply(SPECIES.length());
1134         double[] r = fr.apply(SPECIES.length());
1135         boolean[] mask = fm.apply(SPECIES.length());
1136         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
1137 
1138         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1139             for (int i = 0; i < a.length; i += SPECIES.length()) {
1140                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1141                 av.sqrt(vmask).intoArray(r, i);
1142             }
1143         }
1144 
1145         bh.consume(r);
1146     }
1147 
1148 
1149 
1150     @Benchmark
1151     public void gather(Blackhole bh) {
1152         double[] a = fa.apply(SPECIES.length());
1153         int[] b    = fs.apply(a.length, SPECIES.length());
1154         double[] r = new double[a.length];
1155 
1156         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1157             for (int i = 0; i < a.length; i += SPECIES.length()) {
1158                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i, b, i);
1159                 av.intoArray(r, i);
1160             }
1161         }
1162 
1163         bh.consume(r);
1164     }
1165 
1166 
1167 
1168     @Benchmark
1169     public void scatter(Blackhole bh) {
1170         double[] a = fa.apply(SPECIES.length());
1171         int[] b = fs.apply(a.length, SPECIES.length());
1172         double[] r = new double[a.length];
1173 
1174         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1175             for (int i = 0; i < a.length; i += SPECIES.length()) {
1176                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1177                 av.intoArray(r, i, b, i);
1178             }
1179         }
1180 
1181         bh.consume(r);
1182     }
1183 
1184 }
1185