1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.Vector.Shape;
  28 import jdk.incubator.vector.Vector.Species;
  29 import jdk.incubator.vector.DoubleVector;
  30 
  31 import java.util.concurrent.TimeUnit;
  32 import java.util.function.BiFunction;
  33 import java.util.function.IntFunction;
  34 
  35 import org.openjdk.jmh.annotations.*;
  36 import org.openjdk.jmh.infra.Blackhole;
  37 
  38 @BenchmarkMode(Mode.Throughput)
  39 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  40 @State(Scope.Benchmark)
  41 @Warmup(iterations = 3, time = 1)
  42 @Measurement(iterations = 5, time = 1)
  43 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  44 public class Double256Vector extends AbstractVectorBenchmark {
  45     static final Species<Double> SPECIES = DoubleVector.SPECIES_256;
  46 
  47     static final int INVOC_COUNT = 1; // get rid of outer loop
  48 
  49     @Param("1024")
  50     int size;
  51 
  52     double[] fill(IntFunction<Double> f) {
  53         double[] array = new double[size];
  54         for (int i = 0; i < array.length; i++) {
  55             array[i] = f.apply(i);
  56         }
  57         return array;
  58     }
  59 
  60     double[] a, b, c, r;
  61     boolean[] m, rm;
  62     int[] s;
  63 
  64     @Setup
  65     public void init() {
  66         size += size % SPECIES.length(); // FIXME: add post-loops
  67 
  68         a = fill(i -> (double)(2*i));
  69         b = fill(i -> (double)(i+1));
  70         c = fill(i -> (double)(i+5));
  71         r = fill(i -> (double)0);
  72 
  73         m = fillMask(size, i -> (i % 2) == 0);
  74         rm = fillMask(size, i -> false);
  75 
  76         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  77     }
  78 
  79     final IntFunction<double[]> fa = vl -> a;
  80     final IntFunction<double[]> fb = vl -> b;
  81     final IntFunction<double[]> fc = vl -> c;
  82     final IntFunction<double[]> fr = vl -> r;
  83     final IntFunction<boolean[]> fm = vl -> m;
  84     final IntFunction<boolean[]> fmr = vl -> rm;
  85     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  86 
  87 
  88     @Benchmark
  89     public void add(Blackhole bh) {
  90         double[] a = fa.apply(SPECIES.length());
  91         double[] b = fb.apply(SPECIES.length());
  92         double[] r = fr.apply(SPECIES.length());
  93 
  94         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  95             for (int i = 0; i < a.length; i += SPECIES.length()) {
  96                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
  97                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
  98                 av.add(bv).intoArray(r, i);
  99             }
 100         }
 101 
 102         bh.consume(r);
 103     }
 104 
 105     @Benchmark
 106     public void addMasked(Blackhole bh) {
 107         double[] a = fa.apply(SPECIES.length());
 108         double[] b = fb.apply(SPECIES.length());
 109         double[] r = fr.apply(SPECIES.length());
 110         boolean[] mask = fm.apply(SPECIES.length());
 111         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
 112 
 113         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 114             for (int i = 0; i < a.length; i += SPECIES.length()) {
 115                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 116                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 117                 av.add(bv, vmask).intoArray(r, i);
 118             }
 119         }
 120 
 121         bh.consume(r);
 122     }
 123 
 124     @Benchmark
 125     public void sub(Blackhole bh) {
 126         double[] a = fa.apply(SPECIES.length());
 127         double[] b = fb.apply(SPECIES.length());
 128         double[] r = fr.apply(SPECIES.length());
 129 
 130         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 131             for (int i = 0; i < a.length; i += SPECIES.length()) {
 132                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 133                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 134                 av.sub(bv).intoArray(r, i);
 135             }
 136         }
 137 
 138         bh.consume(r);
 139     }
 140 
 141     @Benchmark
 142     public void subMasked(Blackhole bh) {
 143         double[] a = fa.apply(SPECIES.length());
 144         double[] b = fb.apply(SPECIES.length());
 145         double[] r = fr.apply(SPECIES.length());
 146         boolean[] mask = fm.apply(SPECIES.length());
 147         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
 148 
 149         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 150             for (int i = 0; i < a.length; i += SPECIES.length()) {
 151                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 152                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 153                 av.sub(bv, vmask).intoArray(r, i);
 154             }
 155         }
 156 
 157         bh.consume(r);
 158     }
 159 
 160 
 161     @Benchmark
 162     public void div(Blackhole bh) {
 163         double[] a = fa.apply(SPECIES.length());
 164         double[] b = fb.apply(SPECIES.length());
 165         double[] r = fr.apply(SPECIES.length());
 166 
 167         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 168             for (int i = 0; i < a.length; i += SPECIES.length()) {
 169                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 170                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 171                 av.div(bv).intoArray(r, i);
 172             }
 173         }
 174 
 175         bh.consume(r);
 176     }
 177 
 178 
 179 
 180     @Benchmark
 181     public void divMasked(Blackhole bh) {
 182         double[] a = fa.apply(SPECIES.length());
 183         double[] b = fb.apply(SPECIES.length());
 184         double[] r = fr.apply(SPECIES.length());
 185         boolean[] mask = fm.apply(SPECIES.length());
 186         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
 187 
 188         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 189             for (int i = 0; i < a.length; i += SPECIES.length()) {
 190                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 191                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 192                 av.div(bv, vmask).intoArray(r, i);
 193             }
 194         }
 195 
 196         bh.consume(r);
 197     }
 198 
 199 
 200     @Benchmark
 201     public void mul(Blackhole bh) {
 202         double[] a = fa.apply(SPECIES.length());
 203         double[] b = fb.apply(SPECIES.length());
 204         double[] r = fr.apply(SPECIES.length());
 205 
 206         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 207             for (int i = 0; i < a.length; i += SPECIES.length()) {
 208                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 209                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 210                 av.mul(bv).intoArray(r, i);
 211             }
 212         }
 213 
 214         bh.consume(r);
 215     }
 216 
 217     @Benchmark
 218     public void mulMasked(Blackhole bh) {
 219         double[] a = fa.apply(SPECIES.length());
 220         double[] b = fb.apply(SPECIES.length());
 221         double[] r = fr.apply(SPECIES.length());
 222         boolean[] mask = fm.apply(SPECIES.length());
 223         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
 224 
 225         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 226             for (int i = 0; i < a.length; i += SPECIES.length()) {
 227                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 228                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 229                 av.mul(bv, vmask).intoArray(r, i);
 230             }
 231         }
 232 
 233         bh.consume(r);
 234     }
 235 
 236 
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250 
 251 
 252 
 253 
 254 
 255 
 256 
 257 
 258 
 259 
 260 
 261 
 262 
 263 
 264 
 265 
 266     @Benchmark
 267     public void max(Blackhole bh) {
 268         double[] a = fa.apply(SPECIES.length());
 269         double[] b = fb.apply(SPECIES.length());
 270         double[] r = fr.apply(SPECIES.length());
 271 
 272         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 273             for (int i = 0; i < a.length; i += SPECIES.length()) {
 274                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 275                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 276                 av.max(bv).intoArray(r, i);
 277             }
 278         }
 279 
 280         bh.consume(r);
 281     }
 282 
 283     @Benchmark
 284     public void min(Blackhole bh) {
 285         double[] a = fa.apply(SPECIES.length());
 286         double[] b = fb.apply(SPECIES.length());
 287         double[] r = fr.apply(SPECIES.length());
 288 
 289         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 290             for (int i = 0; i < a.length; i += SPECIES.length()) {
 291                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 292                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 293                 av.min(bv).intoArray(r, i);
 294             }
 295         }
 296 
 297         bh.consume(r);
 298     }
 299 
 300 
 301 
 302 
 303     @Benchmark
 304     public void addAll(Blackhole bh) {
 305         double[] a = fa.apply(SPECIES.length());
 306         double[] r = fr.apply(SPECIES.length());
 307         double ra = 0;
 308 
 309         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 310             for (int i = 0; i < a.length; i += SPECIES.length()) {
 311                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 312                 r[i] = av.addAll();
 313             }
 314         }
 315 
 316         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 317             ra = 0;
 318             for (int i = 0; i < a.length; i += SPECIES.length()) {
 319                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 320                 ra += av.addAll();
 321             }
 322         }
 323 
 324         bh.consume(ra);
 325         bh.consume(r);
 326     }
 327 
 328     @Benchmark
 329     public void mulAll(Blackhole bh) {
 330         double[] a = fa.apply(SPECIES.length());
 331         double[] r = fr.apply(SPECIES.length());
 332         double ra = 1;
 333 
 334         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 335             for (int i = 0; i < a.length; i += SPECIES.length()) {
 336                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 337                 r[i] = av.mulAll();
 338             }
 339         }
 340 
 341         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 342             ra = 1;
 343             for (int i = 0; i < a.length; i += SPECIES.length()) {
 344                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 345                 ra *= av.mulAll();
 346             }
 347         }
 348 
 349         bh.consume(ra);
 350         bh.consume(r);
 351     }
 352 
 353     @Benchmark
 354     public void minAll(Blackhole bh) {
 355         double[] a = fa.apply(SPECIES.length());
 356         double[] r = fr.apply(SPECIES.length());
 357         double ra = Double.POSITIVE_INFINITY;
 358 
 359         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 360             for (int i = 0; i < a.length; i += SPECIES.length()) {
 361                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 362                 r[i] = av.minAll();
 363             }
 364         }
 365 
 366         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 367             ra = Double.POSITIVE_INFINITY;
 368             for (int i = 0; i < a.length; i += SPECIES.length()) {
 369                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 370                 ra = (double)Math.min(ra, av.minAll());
 371             }
 372         }
 373 
 374         bh.consume(ra);
 375         bh.consume(r);
 376     }
 377 
 378     @Benchmark
 379     public void maxAll(Blackhole bh) {
 380         double[] a = fa.apply(SPECIES.length());
 381         double[] r = fr.apply(SPECIES.length());
 382         double ra = Double.NEGATIVE_INFINITY;
 383 
 384         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 385             for (int i = 0; i < a.length; i += SPECIES.length()) {
 386                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 387                 r[i] = av.maxAll();
 388             }
 389         }
 390 
 391         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 392             ra = Double.NEGATIVE_INFINITY;
 393             for (int i = 0; i < a.length; i += SPECIES.length()) {
 394                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 395                 ra = (double)Math.max(ra, av.maxAll());
 396             }
 397         }
 398 
 399         bh.consume(ra);
 400         bh.consume(r);
 401     }
 402 
 403 
 404 
 405     @Benchmark
 406     public void with(Blackhole bh) {
 407         double[] a = fa.apply(SPECIES.length());
 408         double[] r = fr.apply(SPECIES.length());
 409 
 410         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 411             for (int i = 0; i < a.length; i += SPECIES.length()) {
 412                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 413                 av.with(0, (double)4).intoArray(r, i);
 414             }
 415         }
 416 
 417         bh.consume(r);
 418     }
 419 
 420     @Benchmark
 421     public Object lessThan() {
 422         double[] a = fa.apply(size);
 423         double[] b = fb.apply(size);
 424         boolean[] ms = fm.apply(size);
 425         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 426 
 427         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 428             for (int i = 0; i < a.length; i += SPECIES.length()) {
 429                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 430                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 431                 Vector.Mask<Double> mv = av.lessThan(bv);
 432 
 433                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 434             }
 435         }
 436         return m;
 437     }
 438 
 439 
 440     @Benchmark
 441     public Object greaterThan() {
 442         double[] a = fa.apply(size);
 443         double[] b = fb.apply(size);
 444         boolean[] ms = fm.apply(size);
 445         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 446 
 447         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 448             for (int i = 0; i < a.length; i += SPECIES.length()) {
 449                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 450                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 451                 Vector.Mask<Double> mv = av.greaterThan(bv);
 452 
 453                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 454             }
 455         }
 456         return m;
 457     }
 458 
 459 
 460     @Benchmark
 461     public Object equal() {
 462         double[] a = fa.apply(size);
 463         double[] b = fb.apply(size);
 464         boolean[] ms = fm.apply(size);
 465         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 466 
 467         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 468             for (int i = 0; i < a.length; i += SPECIES.length()) {
 469                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 470                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 471                 Vector.Mask<Double> mv = av.equal(bv);
 472 
 473                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 474             }
 475         }
 476         return m;
 477     }
 478 
 479 
 480     @Benchmark
 481     public Object notEqual() {
 482         double[] a = fa.apply(size);
 483         double[] b = fb.apply(size);
 484         boolean[] ms = fm.apply(size);
 485         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 486 
 487         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 488             for (int i = 0; i < a.length; i += SPECIES.length()) {
 489                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 490                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 491                 Vector.Mask<Double> mv = av.notEqual(bv);
 492 
 493                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 494             }
 495         }
 496         return m;
 497     }
 498 
 499 
 500     @Benchmark
 501     public Object lessThanEq() {
 502         double[] a = fa.apply(size);
 503         double[] b = fb.apply(size);
 504         boolean[] ms = fm.apply(size);
 505         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 506 
 507         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 508             for (int i = 0; i < a.length; i += SPECIES.length()) {
 509                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 510                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 511                 Vector.Mask<Double> mv = av.lessThanEq(bv);
 512 
 513                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 514             }
 515         }
 516         return m;
 517     }
 518 
 519 
 520     @Benchmark
 521     public Object greaterThanEq() {
 522         double[] a = fa.apply(size);
 523         double[] b = fb.apply(size);
 524         boolean[] ms = fm.apply(size);
 525         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 526 
 527         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 528             for (int i = 0; i < a.length; i += SPECIES.length()) {
 529                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 530                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 531                 Vector.Mask<Double> mv = av.greaterThanEq(bv);
 532 
 533                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 534             }
 535         }
 536         return m;
 537     }
 538 
 539 
 540     @Benchmark
 541     public void blend(Blackhole bh) {
 542         double[] a = fa.apply(SPECIES.length());
 543         double[] b = fb.apply(SPECIES.length());
 544         double[] r = fr.apply(SPECIES.length());
 545         boolean[] mask = fm.apply(SPECIES.length());
 546         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
 547 
 548         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 549             for (int i = 0; i < a.length; i += SPECIES.length()) {
 550                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 551                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 552                 av.blend(bv, vmask).intoArray(r, i);
 553             }
 554         }
 555 
 556         bh.consume(r);
 557     }
 558 
 559     @Benchmark
 560     public void rearrange(Blackhole bh) {
 561         double[] a = fa.apply(SPECIES.length());
 562         int[] order = fs.apply(a.length, SPECIES.length());
 563         double[] r = fr.apply(SPECIES.length());
 564 
 565         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 566             for (int i = 0; i < a.length; i += SPECIES.length()) {
 567                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 568                 av.rearrange(DoubleVector.shuffleFromArray(SPECIES, order, i)).intoArray(r, i);
 569             }
 570         }
 571 
 572         bh.consume(r);
 573     }
 574 
 575     @Benchmark
 576     public void extract(Blackhole bh) {
 577         double[] a = fa.apply(SPECIES.length());
 578         double[] r = fr.apply(SPECIES.length());
 579 
 580         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 581             for (int i = 0; i < a.length; i += SPECIES.length()) {
 582                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 583                 int num_lanes = SPECIES.length();
 584                 // Manually unroll because full unroll happens after intrinsification.
 585                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 586                 if (num_lanes == 1) {
 587                     r[i]=av.get(0);
 588                 } else if (num_lanes == 2) {
 589                     r[i]=av.get(0);
 590                     r[i+1]=av.get(1);
 591                 } else if (num_lanes == 4) {
 592                     r[i]=av.get(0);
 593                     r[i+1]=av.get(1);
 594                     r[i+2]=av.get(2);
 595                     r[i+3]=av.get(3);
 596                 } else if (num_lanes == 8) {
 597                     r[i]=av.get(0);
 598                     r[i+1]=av.get(1);
 599                     r[i+2]=av.get(2);
 600                     r[i+3]=av.get(3);
 601                     r[i+4]=av.get(4);
 602                     r[i+5]=av.get(5);
 603                     r[i+6]=av.get(6);
 604                     r[i+7]=av.get(7);
 605                 } else if (num_lanes == 16) {
 606                     r[i]=av.get(0);
 607                     r[i+1]=av.get(1);
 608                     r[i+2]=av.get(2);
 609                     r[i+3]=av.get(3);
 610                     r[i+4]=av.get(4);
 611                     r[i+5]=av.get(5);
 612                     r[i+6]=av.get(6);
 613                     r[i+7]=av.get(7);
 614                     r[i+8]=av.get(8);
 615                     r[i+9]=av.get(9);
 616                     r[i+10]=av.get(10);
 617                     r[i+11]=av.get(11);
 618                     r[i+12]=av.get(12);
 619                     r[i+13]=av.get(13);
 620                     r[i+14]=av.get(14);
 621                     r[i+15]=av.get(15);
 622                 } else if (num_lanes == 32) {
 623                     r[i]=av.get(0);
 624                     r[i+1]=av.get(1);
 625                     r[i+2]=av.get(2);
 626                     r[i+3]=av.get(3);
 627                     r[i+4]=av.get(4);
 628                     r[i+5]=av.get(5);
 629                     r[i+6]=av.get(6);
 630                     r[i+7]=av.get(7);
 631                     r[i+8]=av.get(8);
 632                     r[i+9]=av.get(9);
 633                     r[i+10]=av.get(10);
 634                     r[i+11]=av.get(11);
 635                     r[i+12]=av.get(12);
 636                     r[i+13]=av.get(13);
 637                     r[i+14]=av.get(14);
 638                     r[i+15]=av.get(15);
 639                     r[i+16]=av.get(16);
 640                     r[i+17]=av.get(17);
 641                     r[i+18]=av.get(18);
 642                     r[i+19]=av.get(19);
 643                     r[i+20]=av.get(20);
 644                     r[i+21]=av.get(21);
 645                     r[i+22]=av.get(22);
 646                     r[i+23]=av.get(23);
 647                     r[i+24]=av.get(24);
 648                     r[i+25]=av.get(25);
 649                     r[i+26]=av.get(26);
 650                     r[i+27]=av.get(27);
 651                     r[i+28]=av.get(28);
 652                     r[i+29]=av.get(29);
 653                     r[i+30]=av.get(30);
 654                     r[i+31]=av.get(31);
 655                 } else if (num_lanes == 64) {
 656                     r[i]=av.get(0);
 657                     r[i+1]=av.get(1);
 658                     r[i+2]=av.get(2);
 659                     r[i+3]=av.get(3);
 660                     r[i+4]=av.get(4);
 661                     r[i+5]=av.get(5);
 662                     r[i+6]=av.get(6);
 663                     r[i+7]=av.get(7);
 664                     r[i+8]=av.get(8);
 665                     r[i+9]=av.get(9);
 666                     r[i+10]=av.get(10);
 667                     r[i+11]=av.get(11);
 668                     r[i+12]=av.get(12);
 669                     r[i+13]=av.get(13);
 670                     r[i+14]=av.get(14);
 671                     r[i+15]=av.get(15);
 672                     r[i+16]=av.get(16);
 673                     r[i+17]=av.get(17);
 674                     r[i+18]=av.get(18);
 675                     r[i+19]=av.get(19);
 676                     r[i+20]=av.get(20);
 677                     r[i+21]=av.get(21);
 678                     r[i+22]=av.get(22);
 679                     r[i+23]=av.get(23);
 680                     r[i+24]=av.get(24);
 681                     r[i+25]=av.get(25);
 682                     r[i+26]=av.get(26);
 683                     r[i+27]=av.get(27);
 684                     r[i+28]=av.get(28);
 685                     r[i+29]=av.get(29);
 686                     r[i+30]=av.get(30);
 687                     r[i+31]=av.get(31);
 688                     r[i+32]=av.get(32);
 689                     r[i+33]=av.get(33);
 690                     r[i+34]=av.get(34);
 691                     r[i+35]=av.get(35);
 692                     r[i+36]=av.get(36);
 693                     r[i+37]=av.get(37);
 694                     r[i+38]=av.get(38);
 695                     r[i+39]=av.get(39);
 696                     r[i+40]=av.get(40);
 697                     r[i+41]=av.get(41);
 698                     r[i+42]=av.get(42);
 699                     r[i+43]=av.get(43);
 700                     r[i+44]=av.get(44);
 701                     r[i+45]=av.get(45);
 702                     r[i+46]=av.get(46);
 703                     r[i+47]=av.get(47);
 704                     r[i+48]=av.get(48);
 705                     r[i+49]=av.get(49);
 706                     r[i+50]=av.get(50);
 707                     r[i+51]=av.get(51);
 708                     r[i+52]=av.get(52);
 709                     r[i+53]=av.get(53);
 710                     r[i+54]=av.get(54);
 711                     r[i+55]=av.get(55);
 712                     r[i+56]=av.get(56);
 713                     r[i+57]=av.get(57);
 714                     r[i+58]=av.get(58);
 715                     r[i+59]=av.get(59);
 716                     r[i+60]=av.get(60);
 717                     r[i+61]=av.get(61);
 718                     r[i+62]=av.get(62);
 719                     r[i+63]=av.get(63);
 720                 } else {
 721                     for (int j = 0; j < SPECIES.length(); j++) {
 722                         r[i+j]=av.get(j);
 723                     }
 724                 }
 725             }
 726         }
 727 
 728         bh.consume(r);
 729     }
 730 
 731 
 732     @Benchmark
 733     public void sin(Blackhole bh) {
 734         double[] a = fa.apply(SPECIES.length());
 735         double[] r = fr.apply(SPECIES.length());
 736 
 737         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 738             for (int i = 0; i < a.length; i += SPECIES.length()) {
 739                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 740                 av.sin().intoArray(r, i);
 741             }
 742         }
 743 
 744         bh.consume(r);
 745     }
 746 
 747 
 748 
 749     @Benchmark
 750     public void exp(Blackhole bh) {
 751         double[] a = fa.apply(SPECIES.length());
 752         double[] r = fr.apply(SPECIES.length());
 753 
 754         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 755             for (int i = 0; i < a.length; i += SPECIES.length()) {
 756                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 757                 av.exp().intoArray(r, i);
 758             }
 759         }
 760 
 761         bh.consume(r);
 762     }
 763 
 764 
 765 
 766     @Benchmark
 767     public void log1p(Blackhole bh) {
 768         double[] a = fa.apply(SPECIES.length());
 769         double[] r = fr.apply(SPECIES.length());
 770 
 771         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 772             for (int i = 0; i < a.length; i += SPECIES.length()) {
 773                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 774                 av.log1p().intoArray(r, i);
 775             }
 776         }
 777 
 778         bh.consume(r);
 779     }
 780 
 781 
 782 
 783     @Benchmark
 784     public void log(Blackhole bh) {
 785         double[] a = fa.apply(SPECIES.length());
 786         double[] r = fr.apply(SPECIES.length());
 787 
 788         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 789             for (int i = 0; i < a.length; i += SPECIES.length()) {
 790                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 791                 av.log().intoArray(r, i);
 792             }
 793         }
 794 
 795         bh.consume(r);
 796     }
 797 
 798 
 799 
 800     @Benchmark
 801     public void log10(Blackhole bh) {
 802         double[] a = fa.apply(SPECIES.length());
 803         double[] r = fr.apply(SPECIES.length());
 804 
 805         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 806             for (int i = 0; i < a.length; i += SPECIES.length()) {
 807                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 808                 av.log10().intoArray(r, i);
 809             }
 810         }
 811 
 812         bh.consume(r);
 813     }
 814 
 815 
 816 
 817     @Benchmark
 818     public void expm1(Blackhole bh) {
 819         double[] a = fa.apply(SPECIES.length());
 820         double[] r = fr.apply(SPECIES.length());
 821 
 822         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 823             for (int i = 0; i < a.length; i += SPECIES.length()) {
 824                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 825                 av.expm1().intoArray(r, i);
 826             }
 827         }
 828 
 829         bh.consume(r);
 830     }
 831 
 832 
 833 
 834     @Benchmark
 835     public void cos(Blackhole bh) {
 836         double[] a = fa.apply(SPECIES.length());
 837         double[] r = fr.apply(SPECIES.length());
 838 
 839         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 840             for (int i = 0; i < a.length; i += SPECIES.length()) {
 841                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 842                 av.cos().intoArray(r, i);
 843             }
 844         }
 845 
 846         bh.consume(r);
 847     }
 848 
 849 
 850 
 851     @Benchmark
 852     public void tan(Blackhole bh) {
 853         double[] a = fa.apply(SPECIES.length());
 854         double[] r = fr.apply(SPECIES.length());
 855 
 856         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 857             for (int i = 0; i < a.length; i += SPECIES.length()) {
 858                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 859                 av.tan().intoArray(r, i);
 860             }
 861         }
 862 
 863         bh.consume(r);
 864     }
 865 
 866 
 867 
 868     @Benchmark
 869     public void sinh(Blackhole bh) {
 870         double[] a = fa.apply(SPECIES.length());
 871         double[] r = fr.apply(SPECIES.length());
 872 
 873         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 874             for (int i = 0; i < a.length; i += SPECIES.length()) {
 875                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 876                 av.sinh().intoArray(r, i);
 877             }
 878         }
 879 
 880         bh.consume(r);
 881     }
 882 
 883 
 884 
 885     @Benchmark
 886     public void cosh(Blackhole bh) {
 887         double[] a = fa.apply(SPECIES.length());
 888         double[] r = fr.apply(SPECIES.length());
 889 
 890         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 891             for (int i = 0; i < a.length; i += SPECIES.length()) {
 892                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 893                 av.cosh().intoArray(r, i);
 894             }
 895         }
 896 
 897         bh.consume(r);
 898     }
 899 
 900 
 901 
 902     @Benchmark
 903     public void tanh(Blackhole bh) {
 904         double[] a = fa.apply(SPECIES.length());
 905         double[] r = fr.apply(SPECIES.length());
 906 
 907         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 908             for (int i = 0; i < a.length; i += SPECIES.length()) {
 909                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 910                 av.tanh().intoArray(r, i);
 911             }
 912         }
 913 
 914         bh.consume(r);
 915     }
 916 
 917 
 918 
 919     @Benchmark
 920     public void asin(Blackhole bh) {
 921         double[] a = fa.apply(SPECIES.length());
 922         double[] r = fr.apply(SPECIES.length());
 923 
 924         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 925             for (int i = 0; i < a.length; i += SPECIES.length()) {
 926                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 927                 av.asin().intoArray(r, i);
 928             }
 929         }
 930 
 931         bh.consume(r);
 932     }
 933 
 934 
 935 
 936     @Benchmark
 937     public void acos(Blackhole bh) {
 938         double[] a = fa.apply(SPECIES.length());
 939         double[] r = fr.apply(SPECIES.length());
 940 
 941         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 942             for (int i = 0; i < a.length; i += SPECIES.length()) {
 943                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 944                 av.acos().intoArray(r, i);
 945             }
 946         }
 947 
 948         bh.consume(r);
 949     }
 950 
 951 
 952 
 953     @Benchmark
 954     public void atan(Blackhole bh) {
 955         double[] a = fa.apply(SPECIES.length());
 956         double[] r = fr.apply(SPECIES.length());
 957 
 958         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 959             for (int i = 0; i < a.length; i += SPECIES.length()) {
 960                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 961                 av.atan().intoArray(r, i);
 962             }
 963         }
 964 
 965         bh.consume(r);
 966     }
 967 
 968 
 969 
 970     @Benchmark
 971     public void cbrt(Blackhole bh) {
 972         double[] a = fa.apply(SPECIES.length());
 973         double[] r = fr.apply(SPECIES.length());
 974 
 975         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 976             for (int i = 0; i < a.length; i += SPECIES.length()) {
 977                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 978                 av.cbrt().intoArray(r, i);
 979             }
 980         }
 981 
 982         bh.consume(r);
 983     }
 984 
 985 
 986 
 987     @Benchmark
 988     public void hypot(Blackhole bh) {
 989         double[] a = fa.apply(SPECIES.length());
 990         double[] b = fb.apply(SPECIES.length());
 991         double[] r = fr.apply(SPECIES.length());
 992 
 993         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 994             for (int i = 0; i < a.length; i += SPECIES.length()) {
 995                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 996                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 997                 av.hypot(bv).intoArray(r, i);
 998             }
 999         }
1000 
1001         bh.consume(r);
1002     }
1003 
1004 
1005 
1006     @Benchmark
1007     public void pow(Blackhole bh) {
1008         double[] a = fa.apply(SPECIES.length());
1009         double[] b = fb.apply(SPECIES.length());
1010         double[] r = fr.apply(SPECIES.length());
1011 
1012         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1013             for (int i = 0; i < a.length; i += SPECIES.length()) {
1014                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1015                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
1016                 av.pow(bv).intoArray(r, i);
1017             }
1018         }
1019 
1020         bh.consume(r);
1021     }
1022 
1023 
1024 
1025     @Benchmark
1026     public void atan2(Blackhole bh) {
1027         double[] a = fa.apply(SPECIES.length());
1028         double[] b = fb.apply(SPECIES.length());
1029         double[] r = fr.apply(SPECIES.length());
1030 
1031         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1032             for (int i = 0; i < a.length; i += SPECIES.length()) {
1033                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1034                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
1035                 av.atan2(bv).intoArray(r, i);
1036             }
1037         }
1038 
1039         bh.consume(r);
1040     }
1041 
1042 
1043 
1044     @Benchmark
1045     public void fma(Blackhole bh) {
1046         double[] a = fa.apply(SPECIES.length());
1047         double[] b = fb.apply(SPECIES.length());
1048         double[] c = fc.apply(SPECIES.length());
1049         double[] r = fr.apply(SPECIES.length());
1050 
1051         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1052             for (int i = 0; i < a.length; i += SPECIES.length()) {
1053                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1054                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
1055                 DoubleVector cv = DoubleVector.fromArray(SPECIES, c, i);
1056                 av.fma(bv, cv).intoArray(r, i);
1057             }
1058         }
1059 
1060         bh.consume(r);
1061     }
1062 
1063 
1064 
1065     @Benchmark
1066     public void fmaMasked(Blackhole bh) {
1067         double[] a = fa.apply(SPECIES.length());
1068         double[] b = fb.apply(SPECIES.length());
1069         double[] c = fc.apply(SPECIES.length());
1070         double[] r = fr.apply(SPECIES.length());
1071         boolean[] mask = fm.apply(SPECIES.length());
1072         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
1073 
1074         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1075             for (int i = 0; i < a.length; i += SPECIES.length()) {
1076                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1077                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
1078                 DoubleVector cv = DoubleVector.fromArray(SPECIES, c, i);
1079                 av.fma(bv, cv, vmask).intoArray(r, i);
1080             }
1081         }
1082 
1083         bh.consume(r);
1084     }
1085 
1086 
1087     @Benchmark
1088     public void neg(Blackhole bh) {
1089         double[] a = fa.apply(SPECIES.length());
1090         double[] r = fr.apply(SPECIES.length());
1091 
1092         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1093             for (int i = 0; i < a.length; i += SPECIES.length()) {
1094                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1095                 av.neg().intoArray(r, i);
1096             }
1097         }
1098 
1099         bh.consume(r);
1100     }
1101 
1102     @Benchmark
1103     public void negMasked(Blackhole bh) {
1104         double[] a = fa.apply(SPECIES.length());
1105         double[] r = fr.apply(SPECIES.length());
1106         boolean[] mask = fm.apply(SPECIES.length());
1107         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
1108 
1109         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1110             for (int i = 0; i < a.length; i += SPECIES.length()) {
1111                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1112                 av.neg(vmask).intoArray(r, i);
1113             }
1114         }
1115 
1116         bh.consume(r);
1117     }
1118 
1119     @Benchmark
1120     public void abs(Blackhole bh) {
1121         double[] a = fa.apply(SPECIES.length());
1122         double[] r = fr.apply(SPECIES.length());
1123 
1124         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1125             for (int i = 0; i < a.length; i += SPECIES.length()) {
1126                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1127                 av.abs().intoArray(r, i);
1128             }
1129         }
1130 
1131         bh.consume(r);
1132     }
1133 
1134     @Benchmark
1135     public void absMasked(Blackhole bh) {
1136         double[] a = fa.apply(SPECIES.length());
1137         double[] r = fr.apply(SPECIES.length());
1138         boolean[] mask = fm.apply(SPECIES.length());
1139         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
1140 
1141         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1142             for (int i = 0; i < a.length; i += SPECIES.length()) {
1143                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1144                 av.abs(vmask).intoArray(r, i);
1145             }
1146         }
1147 
1148         bh.consume(r);
1149     }
1150 
1151 
1152 
1153 
1154     @Benchmark
1155     public void sqrt(Blackhole bh) {
1156         double[] a = fa.apply(SPECIES.length());
1157         double[] r = fr.apply(SPECIES.length());
1158 
1159         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1160             for (int i = 0; i < a.length; i += SPECIES.length()) {
1161                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1162                 av.sqrt().intoArray(r, i);
1163             }
1164         }
1165 
1166         bh.consume(r);
1167     }
1168 
1169 
1170 
1171     @Benchmark
1172     public void sqrtMasked(Blackhole bh) {
1173         double[] a = fa.apply(SPECIES.length());
1174         double[] r = fr.apply(SPECIES.length());
1175         boolean[] mask = fm.apply(SPECIES.length());
1176         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
1177 
1178         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1179             for (int i = 0; i < a.length; i += SPECIES.length()) {
1180                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1181                 av.sqrt(vmask).intoArray(r, i);
1182             }
1183         }
1184 
1185         bh.consume(r);
1186     }
1187 
1188 
1189 
1190     @Benchmark
1191     public void gather(Blackhole bh) {
1192         double[] a = fa.apply(SPECIES.length());
1193         int[] b    = fs.apply(a.length, SPECIES.length());
1194         double[] r = new double[a.length];
1195 
1196         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1197             for (int i = 0; i < a.length; i += SPECIES.length()) {
1198                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i, b, i);
1199                 av.intoArray(r, i);
1200             }
1201         }
1202 
1203         bh.consume(r);
1204     }
1205 
1206 
1207 
1208     @Benchmark
1209     public void scatter(Blackhole bh) {
1210         double[] a = fa.apply(SPECIES.length());
1211         int[] b = fs.apply(a.length, SPECIES.length());
1212         double[] r = new double[a.length];
1213 
1214         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1215             for (int i = 0; i < a.length; i += SPECIES.length()) {
1216                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1217                 av.intoArray(r, i, b, i);
1218             }
1219         }
1220 
1221         bh.consume(r);
1222     }
1223 
1224 }
1225