1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have
  21  * questions.
  22  */
  23 
  24 package benchmark.jdk.incubator.vector;
  25 
  26 import jdk.incubator.vector.Vector;
  27 import jdk.incubator.vector.Vector.Shape;
  28 import jdk.incubator.vector.DoubleVector;
  29 
  30 import java.util.concurrent.TimeUnit;
  31 import java.util.function.BiFunction;
  32 import java.util.function.IntFunction;
  33 
  34 import org.openjdk.jmh.annotations.*;
  35 import org.openjdk.jmh.infra.Blackhole;
  36 
  37 @BenchmarkMode(Mode.Throughput)
  38 @OutputTimeUnit(TimeUnit.MILLISECONDS)
  39 @State(Scope.Benchmark)
  40 @Warmup(iterations = 3, time = 1)
  41 @Measurement(iterations = 5, time = 1)
  42 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
  43 public class DoubleMaxVector extends AbstractVectorBenchmark {
  44     static final DoubleVector.DoubleSpecies SPECIES = DoubleVector.species(Shape.S_Max_BIT);
  45 
  46     static final int INVOC_COUNT = 1; // get rid of outer loop
  47 
  48     @Param("1024")
  49     int size;
  50 
  51     double[] fill(IntFunction<Double> f) {
  52         double[] array = new double[size];
  53         for (int i = 0; i < array.length; i++) {
  54             array[i] = f.apply(i);
  55         }
  56         return array;
  57     }
  58 
  59     double[] a, b, c, r;
  60     boolean[] m, rm;
  61     int[] s;
  62 
  63     @Setup
  64     public void init() {
  65         size += size % SPECIES.length(); // FIXME: add post-loops
  66 
  67         a = fill(i -> (double)(2*i));
  68         b = fill(i -> (double)(i+1));
  69         c = fill(i -> (double)(i+5));
  70         r = fill(i -> (double)0);
  71 
  72         m = fillMask(size, i -> (i % 2) == 0);
  73         rm = fillMask(size, i -> false);
  74 
  75         s = fillInt(size, i -> RANDOM.nextInt(SPECIES.length()));
  76     }
  77 
  78     final IntFunction<double[]> fa = vl -> a;
  79     final IntFunction<double[]> fb = vl -> b;
  80     final IntFunction<double[]> fc = vl -> c;
  81     final IntFunction<double[]> fr = vl -> r;
  82     final IntFunction<boolean[]> fm = vl -> m;
  83     final IntFunction<boolean[]> fmr = vl -> rm;
  84     final BiFunction<Integer,Integer,int[]> fs = (i,j) -> s;
  85 
  86 
  87     @Benchmark
  88     public void add(Blackhole bh) {
  89         double[] a = fa.apply(SPECIES.length());
  90         double[] b = fb.apply(SPECIES.length());
  91         double[] r = fr.apply(SPECIES.length());
  92 
  93         for (int ic = 0; ic < INVOC_COUNT; ic++) {
  94             for (int i = 0; i < a.length; i += SPECIES.length()) {
  95                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
  96                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
  97                 av.add(bv).intoArray(r, i);
  98             }
  99         }
 100 
 101         bh.consume(r);
 102     }
 103 
 104     @Benchmark
 105     public void addMasked(Blackhole bh) {
 106         double[] a = fa.apply(SPECIES.length());
 107         double[] b = fb.apply(SPECIES.length());
 108         double[] r = fr.apply(SPECIES.length());
 109         boolean[] mask = fm.apply(SPECIES.length());
 110         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
 111 
 112         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 113             for (int i = 0; i < a.length; i += SPECIES.length()) {
 114                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 115                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 116                 av.add(bv, vmask).intoArray(r, i);
 117             }
 118         }
 119 
 120         bh.consume(r);
 121     }
 122 
 123     @Benchmark
 124     public void sub(Blackhole bh) {
 125         double[] a = fa.apply(SPECIES.length());
 126         double[] b = fb.apply(SPECIES.length());
 127         double[] r = fr.apply(SPECIES.length());
 128 
 129         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 130             for (int i = 0; i < a.length; i += SPECIES.length()) {
 131                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 132                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 133                 av.sub(bv).intoArray(r, i);
 134             }
 135         }
 136 
 137         bh.consume(r);
 138     }
 139 
 140     @Benchmark
 141     public void subMasked(Blackhole bh) {
 142         double[] a = fa.apply(SPECIES.length());
 143         double[] b = fb.apply(SPECIES.length());
 144         double[] r = fr.apply(SPECIES.length());
 145         boolean[] mask = fm.apply(SPECIES.length());
 146         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
 147 
 148         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 149             for (int i = 0; i < a.length; i += SPECIES.length()) {
 150                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 151                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 152                 av.sub(bv, vmask).intoArray(r, i);
 153             }
 154         }
 155 
 156         bh.consume(r);
 157     }
 158 
 159 
 160     @Benchmark
 161     public void div(Blackhole bh) {
 162         double[] a = fa.apply(SPECIES.length());
 163         double[] b = fb.apply(SPECIES.length());
 164         double[] r = fr.apply(SPECIES.length());
 165 
 166         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 167             for (int i = 0; i < a.length; i += SPECIES.length()) {
 168                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 169                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 170                 av.div(bv).intoArray(r, i);
 171             }
 172         }
 173 
 174         bh.consume(r);
 175     }
 176 
 177 
 178 
 179     @Benchmark
 180     public void divMasked(Blackhole bh) {
 181         double[] a = fa.apply(SPECIES.length());
 182         double[] b = fb.apply(SPECIES.length());
 183         double[] r = fr.apply(SPECIES.length());
 184         boolean[] mask = fm.apply(SPECIES.length());
 185         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
 186 
 187         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 188             for (int i = 0; i < a.length; i += SPECIES.length()) {
 189                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 190                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 191                 av.div(bv, vmask).intoArray(r, i);
 192             }
 193         }
 194 
 195         bh.consume(r);
 196     }
 197 
 198 
 199     @Benchmark
 200     public void mul(Blackhole bh) {
 201         double[] a = fa.apply(SPECIES.length());
 202         double[] b = fb.apply(SPECIES.length());
 203         double[] r = fr.apply(SPECIES.length());
 204 
 205         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 206             for (int i = 0; i < a.length; i += SPECIES.length()) {
 207                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 208                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 209                 av.mul(bv).intoArray(r, i);
 210             }
 211         }
 212 
 213         bh.consume(r);
 214     }
 215 
 216     @Benchmark
 217     public void mulMasked(Blackhole bh) {
 218         double[] a = fa.apply(SPECIES.length());
 219         double[] b = fb.apply(SPECIES.length());
 220         double[] r = fr.apply(SPECIES.length());
 221         boolean[] mask = fm.apply(SPECIES.length());
 222         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
 223 
 224         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 225             for (int i = 0; i < a.length; i += SPECIES.length()) {
 226                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 227                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 228                 av.mul(bv, vmask).intoArray(r, i);
 229             }
 230         }
 231 
 232         bh.consume(r);
 233     }
 234 
 235 
 236 
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250 
 251 
 252 
 253 
 254 
 255 
 256 
 257 
 258 
 259 
 260 
 261 
 262 
 263 
 264 
 265     @Benchmark
 266     public void max(Blackhole bh) {
 267         double[] a = fa.apply(SPECIES.length());
 268         double[] b = fb.apply(SPECIES.length());
 269         double[] r = fr.apply(SPECIES.length());
 270 
 271         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 272             for (int i = 0; i < a.length; i += SPECIES.length()) {
 273                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 274                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 275                 av.max(bv).intoArray(r, i);
 276             }
 277         }
 278 
 279         bh.consume(r);
 280     }
 281 
 282     @Benchmark
 283     public void min(Blackhole bh) {
 284         double[] a = fa.apply(SPECIES.length());
 285         double[] b = fb.apply(SPECIES.length());
 286         double[] r = fr.apply(SPECIES.length());
 287 
 288         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 289             for (int i = 0; i < a.length; i += SPECIES.length()) {
 290                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 291                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 292                 av.min(bv).intoArray(r, i);
 293             }
 294         }
 295 
 296         bh.consume(r);
 297     }
 298 
 299 
 300 
 301 
 302     @Benchmark
 303     public void addAll(Blackhole bh) {
 304         double[] a = fa.apply(SPECIES.length());
 305         double[] r = fr.apply(SPECIES.length());
 306         double ra = 0;
 307 
 308         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 309             for (int i = 0; i < a.length; i += SPECIES.length()) {
 310                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 311                 r[i] = av.addAll();
 312             }
 313         }
 314 
 315         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 316             ra = 0;
 317             for (int i = 0; i < a.length; i += SPECIES.length()) {
 318                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 319                 ra += av.addAll();
 320             }
 321         }
 322 
 323         bh.consume(ra);
 324         bh.consume(r);
 325     }
 326 
 327     @Benchmark
 328     public void mulAll(Blackhole bh) {
 329         double[] a = fa.apply(SPECIES.length());
 330         double[] r = fr.apply(SPECIES.length());
 331         double ra = 1;
 332 
 333         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 334             for (int i = 0; i < a.length; i += SPECIES.length()) {
 335                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 336                 r[i] = av.mulAll();
 337             }
 338         }
 339 
 340         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 341             ra = 1;
 342             for (int i = 0; i < a.length; i += SPECIES.length()) {
 343                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 344                 ra *= av.mulAll();
 345             }
 346         }
 347 
 348         bh.consume(ra);
 349         bh.consume(r);
 350     }
 351 
 352     @Benchmark
 353     public void minAll(Blackhole bh) {
 354         double[] a = fa.apply(SPECIES.length());
 355         double[] r = fr.apply(SPECIES.length());
 356         double ra = Double.MAX_VALUE;
 357 
 358         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 359             for (int i = 0; i < a.length; i += SPECIES.length()) {
 360                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 361                 r[i] = av.minAll();
 362             }
 363         }
 364 
 365         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 366             ra = Double.MAX_VALUE;
 367             for (int i = 0; i < a.length; i += SPECIES.length()) {
 368                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 369                 ra = (double)Math.min(ra, av.minAll());
 370             }
 371         }
 372 
 373         bh.consume(ra);
 374         bh.consume(r);
 375     }
 376 
 377     @Benchmark
 378     public void maxAll(Blackhole bh) {
 379         double[] a = fa.apply(SPECIES.length());
 380         double[] r = fr.apply(SPECIES.length());
 381         double ra = Double.MIN_VALUE;
 382 
 383         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 384             for (int i = 0; i < a.length; i += SPECIES.length()) {
 385                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 386                 r[i] = av.maxAll();
 387             }
 388         }
 389 
 390         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 391             ra = Double.MIN_VALUE;
 392             for (int i = 0; i < a.length; i += SPECIES.length()) {
 393                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 394                 ra = (double)Math.max(ra, av.maxAll());
 395             }
 396         }
 397 
 398         bh.consume(ra);
 399         bh.consume(r);
 400     }
 401 
 402 
 403 
 404     @Benchmark
 405     public void with(Blackhole bh) {
 406         double[] a = fa.apply(SPECIES.length());
 407         double[] r = fr.apply(SPECIES.length());
 408 
 409         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 410             for (int i = 0; i < a.length; i += SPECIES.length()) {
 411                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 412                 av.with(0, (double)4).intoArray(r, i);
 413             }
 414         }
 415 
 416         bh.consume(r);
 417     }
 418 
 419     @Benchmark
 420     public Object lessThan() {
 421         double[] a = fa.apply(size);
 422         double[] b = fb.apply(size);
 423         boolean[] ms = fm.apply(size);
 424         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 425 
 426         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 427             for (int i = 0; i < a.length; i += SPECIES.length()) {
 428                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 429                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 430                 Vector.Mask<Double> mv = av.lessThan(bv);
 431 
 432                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 433             }
 434         }
 435         return m;
 436     }
 437 
 438 
 439     @Benchmark
 440     public Object greaterThan() {
 441         double[] a = fa.apply(size);
 442         double[] b = fb.apply(size);
 443         boolean[] ms = fm.apply(size);
 444         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 445 
 446         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 447             for (int i = 0; i < a.length; i += SPECIES.length()) {
 448                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 449                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 450                 Vector.Mask<Double> mv = av.greaterThan(bv);
 451 
 452                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 453             }
 454         }
 455         return m;
 456     }
 457 
 458 
 459     @Benchmark
 460     public Object equal() {
 461         double[] a = fa.apply(size);
 462         double[] b = fb.apply(size);
 463         boolean[] ms = fm.apply(size);
 464         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 465 
 466         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 467             for (int i = 0; i < a.length; i += SPECIES.length()) {
 468                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 469                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 470                 Vector.Mask<Double> mv = av.equal(bv);
 471 
 472                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 473             }
 474         }
 475         return m;
 476     }
 477 
 478 
 479     @Benchmark
 480     public Object notEqual() {
 481         double[] a = fa.apply(size);
 482         double[] b = fb.apply(size);
 483         boolean[] ms = fm.apply(size);
 484         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 485 
 486         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 487             for (int i = 0; i < a.length; i += SPECIES.length()) {
 488                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 489                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 490                 Vector.Mask<Double> mv = av.notEqual(bv);
 491 
 492                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 493             }
 494         }
 495         return m;
 496     }
 497 
 498 
 499     @Benchmark
 500     public Object lessThanEq() {
 501         double[] a = fa.apply(size);
 502         double[] b = fb.apply(size);
 503         boolean[] ms = fm.apply(size);
 504         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 505 
 506         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 507             for (int i = 0; i < a.length; i += SPECIES.length()) {
 508                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 509                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 510                 Vector.Mask<Double> mv = av.lessThanEq(bv);
 511 
 512                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 513             }
 514         }
 515         return m;
 516     }
 517 
 518 
 519     @Benchmark
 520     public Object greaterThanEq() {
 521         double[] a = fa.apply(size);
 522         double[] b = fb.apply(size);
 523         boolean[] ms = fm.apply(size);
 524         Vector.Mask<Double> m = DoubleVector.maskFromArray(SPECIES, ms, 0);
 525 
 526         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 527             for (int i = 0; i < a.length; i += SPECIES.length()) {
 528                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 529                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 530                 Vector.Mask<Double> mv = av.greaterThanEq(bv);
 531 
 532                 m = m.and(mv); // accumulate results, so JIT can't eliminate relevant computations
 533             }
 534         }
 535         return m;
 536     }
 537 
 538 
 539     @Benchmark
 540     public void blend(Blackhole bh) {
 541         double[] a = fa.apply(SPECIES.length());
 542         double[] b = fb.apply(SPECIES.length());
 543         double[] r = fr.apply(SPECIES.length());
 544         boolean[] mask = fm.apply(SPECIES.length());
 545         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
 546 
 547         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 548             for (int i = 0; i < a.length; i += SPECIES.length()) {
 549                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 550                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 551                 av.blend(bv, vmask).intoArray(r, i);
 552             }
 553         }
 554 
 555         bh.consume(r);
 556     }
 557 
 558     @Benchmark
 559     public void rearrange(Blackhole bh) {
 560         double[] a = fa.apply(SPECIES.length());
 561         int[] order = fs.apply(a.length, SPECIES.length());
 562         double[] r = fr.apply(SPECIES.length());
 563 
 564         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 565             for (int i = 0; i < a.length; i += SPECIES.length()) {
 566                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 567                 av.rearrange(DoubleVector.shuffleFromArray(SPECIES, order, i)).intoArray(r, i);
 568             }
 569         }
 570 
 571         bh.consume(r);
 572     }
 573 
 574     @Benchmark
 575     public void extract(Blackhole bh) {
 576         double[] a = fa.apply(SPECIES.length());
 577         double[] r = fr.apply(SPECIES.length());
 578 
 579         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 580             for (int i = 0; i < a.length; i += SPECIES.length()) {
 581                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 582                 int num_lanes = SPECIES.length();
 583                 // Manually unroll because full unroll happens after intrinsification.
 584                 // Unroll is needed because get intrinsic requires for index to be a known constant.
 585                 if (num_lanes == 1) {
 586                     r[i]=av.get(0);
 587                 } else if (num_lanes == 2) {
 588                     r[i]=av.get(0);
 589                     r[i+1]=av.get(1);
 590                 } else if (num_lanes == 4) {
 591                     r[i]=av.get(0);
 592                     r[i+1]=av.get(1);
 593                     r[i+2]=av.get(2);
 594                     r[i+3]=av.get(3);
 595                 } else if (num_lanes == 8) {
 596                     r[i]=av.get(0);
 597                     r[i+1]=av.get(1);
 598                     r[i+2]=av.get(2);
 599                     r[i+3]=av.get(3);
 600                     r[i+4]=av.get(4);
 601                     r[i+5]=av.get(5);
 602                     r[i+6]=av.get(6);
 603                     r[i+7]=av.get(7);
 604                 } else if (num_lanes == 16) {
 605                     r[i]=av.get(0);
 606                     r[i+1]=av.get(1);
 607                     r[i+2]=av.get(2);
 608                     r[i+3]=av.get(3);
 609                     r[i+4]=av.get(4);
 610                     r[i+5]=av.get(5);
 611                     r[i+6]=av.get(6);
 612                     r[i+7]=av.get(7);
 613                     r[i+8]=av.get(8);
 614                     r[i+9]=av.get(9);
 615                     r[i+10]=av.get(10);
 616                     r[i+11]=av.get(11);
 617                     r[i+12]=av.get(12);
 618                     r[i+13]=av.get(13);
 619                     r[i+14]=av.get(14);
 620                     r[i+15]=av.get(15);
 621                 } else if (num_lanes == 32) {
 622                     r[i]=av.get(0);
 623                     r[i+1]=av.get(1);
 624                     r[i+2]=av.get(2);
 625                     r[i+3]=av.get(3);
 626                     r[i+4]=av.get(4);
 627                     r[i+5]=av.get(5);
 628                     r[i+6]=av.get(6);
 629                     r[i+7]=av.get(7);
 630                     r[i+8]=av.get(8);
 631                     r[i+9]=av.get(9);
 632                     r[i+10]=av.get(10);
 633                     r[i+11]=av.get(11);
 634                     r[i+12]=av.get(12);
 635                     r[i+13]=av.get(13);
 636                     r[i+14]=av.get(14);
 637                     r[i+15]=av.get(15);
 638                     r[i+16]=av.get(16);
 639                     r[i+17]=av.get(17);
 640                     r[i+18]=av.get(18);
 641                     r[i+19]=av.get(19);
 642                     r[i+20]=av.get(20);
 643                     r[i+21]=av.get(21);
 644                     r[i+22]=av.get(22);
 645                     r[i+23]=av.get(23);
 646                     r[i+24]=av.get(24);
 647                     r[i+25]=av.get(25);
 648                     r[i+26]=av.get(26);
 649                     r[i+27]=av.get(27);
 650                     r[i+28]=av.get(28);
 651                     r[i+29]=av.get(29);
 652                     r[i+30]=av.get(30);
 653                     r[i+31]=av.get(31);
 654                 } else if (num_lanes == 64) {
 655                     r[i]=av.get(0);
 656                     r[i+1]=av.get(1);
 657                     r[i+2]=av.get(2);
 658                     r[i+3]=av.get(3);
 659                     r[i+4]=av.get(4);
 660                     r[i+5]=av.get(5);
 661                     r[i+6]=av.get(6);
 662                     r[i+7]=av.get(7);
 663                     r[i+8]=av.get(8);
 664                     r[i+9]=av.get(9);
 665                     r[i+10]=av.get(10);
 666                     r[i+11]=av.get(11);
 667                     r[i+12]=av.get(12);
 668                     r[i+13]=av.get(13);
 669                     r[i+14]=av.get(14);
 670                     r[i+15]=av.get(15);
 671                     r[i+16]=av.get(16);
 672                     r[i+17]=av.get(17);
 673                     r[i+18]=av.get(18);
 674                     r[i+19]=av.get(19);
 675                     r[i+20]=av.get(20);
 676                     r[i+21]=av.get(21);
 677                     r[i+22]=av.get(22);
 678                     r[i+23]=av.get(23);
 679                     r[i+24]=av.get(24);
 680                     r[i+25]=av.get(25);
 681                     r[i+26]=av.get(26);
 682                     r[i+27]=av.get(27);
 683                     r[i+28]=av.get(28);
 684                     r[i+29]=av.get(29);
 685                     r[i+30]=av.get(30);
 686                     r[i+31]=av.get(31);
 687                     r[i+32]=av.get(32);
 688                     r[i+33]=av.get(33);
 689                     r[i+34]=av.get(34);
 690                     r[i+35]=av.get(35);
 691                     r[i+36]=av.get(36);
 692                     r[i+37]=av.get(37);
 693                     r[i+38]=av.get(38);
 694                     r[i+39]=av.get(39);
 695                     r[i+40]=av.get(40);
 696                     r[i+41]=av.get(41);
 697                     r[i+42]=av.get(42);
 698                     r[i+43]=av.get(43);
 699                     r[i+44]=av.get(44);
 700                     r[i+45]=av.get(45);
 701                     r[i+46]=av.get(46);
 702                     r[i+47]=av.get(47);
 703                     r[i+48]=av.get(48);
 704                     r[i+49]=av.get(49);
 705                     r[i+50]=av.get(50);
 706                     r[i+51]=av.get(51);
 707                     r[i+52]=av.get(52);
 708                     r[i+53]=av.get(53);
 709                     r[i+54]=av.get(54);
 710                     r[i+55]=av.get(55);
 711                     r[i+56]=av.get(56);
 712                     r[i+57]=av.get(57);
 713                     r[i+58]=av.get(58);
 714                     r[i+59]=av.get(59);
 715                     r[i+60]=av.get(60);
 716                     r[i+61]=av.get(61);
 717                     r[i+62]=av.get(62);
 718                     r[i+63]=av.get(63);
 719                 } else {
 720                     for (int j = 0; j < SPECIES.length(); j++) {
 721                         r[i+j]=av.get(j);
 722                     }
 723                 }
 724             }
 725         }
 726 
 727         bh.consume(r);
 728     }
 729 
 730 
 731     @Benchmark
 732     public void sin(Blackhole bh) {
 733         double[] a = fa.apply(SPECIES.length());
 734         double[] r = fr.apply(SPECIES.length());
 735 
 736         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 737             for (int i = 0; i < a.length; i += SPECIES.length()) {
 738                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 739                 av.sin().intoArray(r, i);
 740             }
 741         }
 742 
 743         bh.consume(r);
 744     }
 745 
 746 
 747 
 748     @Benchmark
 749     public void exp(Blackhole bh) {
 750         double[] a = fa.apply(SPECIES.length());
 751         double[] r = fr.apply(SPECIES.length());
 752 
 753         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 754             for (int i = 0; i < a.length; i += SPECIES.length()) {
 755                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 756                 av.exp().intoArray(r, i);
 757             }
 758         }
 759 
 760         bh.consume(r);
 761     }
 762 
 763 
 764 
 765     @Benchmark
 766     public void log1p(Blackhole bh) {
 767         double[] a = fa.apply(SPECIES.length());
 768         double[] r = fr.apply(SPECIES.length());
 769 
 770         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 771             for (int i = 0; i < a.length; i += SPECIES.length()) {
 772                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 773                 av.log1p().intoArray(r, i);
 774             }
 775         }
 776 
 777         bh.consume(r);
 778     }
 779 
 780 
 781 
 782     @Benchmark
 783     public void log(Blackhole bh) {
 784         double[] a = fa.apply(SPECIES.length());
 785         double[] r = fr.apply(SPECIES.length());
 786 
 787         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 788             for (int i = 0; i < a.length; i += SPECIES.length()) {
 789                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 790                 av.log().intoArray(r, i);
 791             }
 792         }
 793 
 794         bh.consume(r);
 795     }
 796 
 797 
 798 
 799     @Benchmark
 800     public void log10(Blackhole bh) {
 801         double[] a = fa.apply(SPECIES.length());
 802         double[] r = fr.apply(SPECIES.length());
 803 
 804         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 805             for (int i = 0; i < a.length; i += SPECIES.length()) {
 806                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 807                 av.log10().intoArray(r, i);
 808             }
 809         }
 810 
 811         bh.consume(r);
 812     }
 813 
 814 
 815 
 816     @Benchmark
 817     public void expm1(Blackhole bh) {
 818         double[] a = fa.apply(SPECIES.length());
 819         double[] r = fr.apply(SPECIES.length());
 820 
 821         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 822             for (int i = 0; i < a.length; i += SPECIES.length()) {
 823                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 824                 av.expm1().intoArray(r, i);
 825             }
 826         }
 827 
 828         bh.consume(r);
 829     }
 830 
 831 
 832 
 833     @Benchmark
 834     public void cos(Blackhole bh) {
 835         double[] a = fa.apply(SPECIES.length());
 836         double[] r = fr.apply(SPECIES.length());
 837 
 838         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 839             for (int i = 0; i < a.length; i += SPECIES.length()) {
 840                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 841                 av.cos().intoArray(r, i);
 842             }
 843         }
 844 
 845         bh.consume(r);
 846     }
 847 
 848 
 849 
 850     @Benchmark
 851     public void tan(Blackhole bh) {
 852         double[] a = fa.apply(SPECIES.length());
 853         double[] r = fr.apply(SPECIES.length());
 854 
 855         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 856             for (int i = 0; i < a.length; i += SPECIES.length()) {
 857                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 858                 av.tan().intoArray(r, i);
 859             }
 860         }
 861 
 862         bh.consume(r);
 863     }
 864 
 865 
 866 
 867     @Benchmark
 868     public void sinh(Blackhole bh) {
 869         double[] a = fa.apply(SPECIES.length());
 870         double[] r = fr.apply(SPECIES.length());
 871 
 872         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 873             for (int i = 0; i < a.length; i += SPECIES.length()) {
 874                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 875                 av.sinh().intoArray(r, i);
 876             }
 877         }
 878 
 879         bh.consume(r);
 880     }
 881 
 882 
 883 
 884     @Benchmark
 885     public void cosh(Blackhole bh) {
 886         double[] a = fa.apply(SPECIES.length());
 887         double[] r = fr.apply(SPECIES.length());
 888 
 889         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 890             for (int i = 0; i < a.length; i += SPECIES.length()) {
 891                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 892                 av.cosh().intoArray(r, i);
 893             }
 894         }
 895 
 896         bh.consume(r);
 897     }
 898 
 899 
 900 
 901     @Benchmark
 902     public void tanh(Blackhole bh) {
 903         double[] a = fa.apply(SPECIES.length());
 904         double[] r = fr.apply(SPECIES.length());
 905 
 906         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 907             for (int i = 0; i < a.length; i += SPECIES.length()) {
 908                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 909                 av.tanh().intoArray(r, i);
 910             }
 911         }
 912 
 913         bh.consume(r);
 914     }
 915 
 916 
 917 
 918     @Benchmark
 919     public void asin(Blackhole bh) {
 920         double[] a = fa.apply(SPECIES.length());
 921         double[] r = fr.apply(SPECIES.length());
 922 
 923         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 924             for (int i = 0; i < a.length; i += SPECIES.length()) {
 925                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 926                 av.asin().intoArray(r, i);
 927             }
 928         }
 929 
 930         bh.consume(r);
 931     }
 932 
 933 
 934 
 935     @Benchmark
 936     public void acos(Blackhole bh) {
 937         double[] a = fa.apply(SPECIES.length());
 938         double[] r = fr.apply(SPECIES.length());
 939 
 940         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 941             for (int i = 0; i < a.length; i += SPECIES.length()) {
 942                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 943                 av.acos().intoArray(r, i);
 944             }
 945         }
 946 
 947         bh.consume(r);
 948     }
 949 
 950 
 951 
 952     @Benchmark
 953     public void atan(Blackhole bh) {
 954         double[] a = fa.apply(SPECIES.length());
 955         double[] r = fr.apply(SPECIES.length());
 956 
 957         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 958             for (int i = 0; i < a.length; i += SPECIES.length()) {
 959                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 960                 av.atan().intoArray(r, i);
 961             }
 962         }
 963 
 964         bh.consume(r);
 965     }
 966 
 967 
 968 
 969     @Benchmark
 970     public void cbrt(Blackhole bh) {
 971         double[] a = fa.apply(SPECIES.length());
 972         double[] r = fr.apply(SPECIES.length());
 973 
 974         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 975             for (int i = 0; i < a.length; i += SPECIES.length()) {
 976                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 977                 av.cbrt().intoArray(r, i);
 978             }
 979         }
 980 
 981         bh.consume(r);
 982     }
 983 
 984 
 985 
 986     @Benchmark
 987     public void hypot(Blackhole bh) {
 988         double[] a = fa.apply(SPECIES.length());
 989         double[] b = fb.apply(SPECIES.length());
 990         double[] r = fr.apply(SPECIES.length());
 991 
 992         for (int ic = 0; ic < INVOC_COUNT; ic++) {
 993             for (int i = 0; i < a.length; i += SPECIES.length()) {
 994                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
 995                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
 996                 av.hypot(bv).intoArray(r, i);
 997             }
 998         }
 999 
1000         bh.consume(r);
1001     }
1002 
1003 
1004 
1005     @Benchmark
1006     public void pow(Blackhole bh) {
1007         double[] a = fa.apply(SPECIES.length());
1008         double[] b = fb.apply(SPECIES.length());
1009         double[] r = fr.apply(SPECIES.length());
1010 
1011         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1012             for (int i = 0; i < a.length; i += SPECIES.length()) {
1013                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1014                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
1015                 av.pow(bv).intoArray(r, i);
1016             }
1017         }
1018 
1019         bh.consume(r);
1020     }
1021 
1022 
1023 
1024     @Benchmark
1025     public void atan2(Blackhole bh) {
1026         double[] a = fa.apply(SPECIES.length());
1027         double[] b = fb.apply(SPECIES.length());
1028         double[] r = fr.apply(SPECIES.length());
1029 
1030         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1031             for (int i = 0; i < a.length; i += SPECIES.length()) {
1032                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1033                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
1034                 av.atan2(bv).intoArray(r, i);
1035             }
1036         }
1037 
1038         bh.consume(r);
1039     }
1040 
1041 
1042 
1043     @Benchmark
1044     public void fma(Blackhole bh) {
1045         double[] a = fa.apply(SPECIES.length());
1046         double[] b = fb.apply(SPECIES.length());
1047         double[] c = fc.apply(SPECIES.length());
1048         double[] r = fr.apply(SPECIES.length());
1049 
1050         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1051             for (int i = 0; i < a.length; i += SPECIES.length()) {
1052                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1053                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
1054                 DoubleVector cv = DoubleVector.fromArray(SPECIES, c, i);
1055                 av.fma(bv, cv).intoArray(r, i);
1056             }
1057         }
1058 
1059         bh.consume(r);
1060     }
1061 
1062 
1063 
1064     @Benchmark
1065     public void fmaMasked(Blackhole bh) {
1066         double[] a = fa.apply(SPECIES.length());
1067         double[] b = fb.apply(SPECIES.length());
1068         double[] c = fc.apply(SPECIES.length());
1069         double[] r = fr.apply(SPECIES.length());
1070         boolean[] mask = fm.apply(SPECIES.length());
1071         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
1072 
1073         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1074             for (int i = 0; i < a.length; i += SPECIES.length()) {
1075                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1076                 DoubleVector bv = DoubleVector.fromArray(SPECIES, b, i);
1077                 DoubleVector cv = DoubleVector.fromArray(SPECIES, c, i);
1078                 av.fma(bv, cv, vmask).intoArray(r, i);
1079             }
1080         }
1081 
1082         bh.consume(r);
1083     }
1084 
1085 
1086     @Benchmark
1087     public void neg(Blackhole bh) {
1088         double[] a = fa.apply(SPECIES.length());
1089         double[] r = fr.apply(SPECIES.length());
1090 
1091         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1092             for (int i = 0; i < a.length; i += SPECIES.length()) {
1093                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1094                 av.neg().intoArray(r, i);
1095             }
1096         }
1097 
1098         bh.consume(r);
1099     }
1100 
1101     @Benchmark
1102     public void negMasked(Blackhole bh) {
1103         double[] a = fa.apply(SPECIES.length());
1104         double[] r = fr.apply(SPECIES.length());
1105         boolean[] mask = fm.apply(SPECIES.length());
1106         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
1107 
1108         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1109             for (int i = 0; i < a.length; i += SPECIES.length()) {
1110                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1111                 av.neg(vmask).intoArray(r, i);
1112             }
1113         }
1114 
1115         bh.consume(r);
1116     }
1117 
1118     @Benchmark
1119     public void abs(Blackhole bh) {
1120         double[] a = fa.apply(SPECIES.length());
1121         double[] r = fr.apply(SPECIES.length());
1122 
1123         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1124             for (int i = 0; i < a.length; i += SPECIES.length()) {
1125                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1126                 av.abs().intoArray(r, i);
1127             }
1128         }
1129 
1130         bh.consume(r);
1131     }
1132 
1133     @Benchmark
1134     public void absMasked(Blackhole bh) {
1135         double[] a = fa.apply(SPECIES.length());
1136         double[] r = fr.apply(SPECIES.length());
1137         boolean[] mask = fm.apply(SPECIES.length());
1138         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
1139 
1140         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1141             for (int i = 0; i < a.length; i += SPECIES.length()) {
1142                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1143                 av.abs(vmask).intoArray(r, i);
1144             }
1145         }
1146 
1147         bh.consume(r);
1148     }
1149 
1150 
1151 
1152 
1153     @Benchmark
1154     public void sqrt(Blackhole bh) {
1155         double[] a = fa.apply(SPECIES.length());
1156         double[] r = fr.apply(SPECIES.length());
1157 
1158         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1159             for (int i = 0; i < a.length; i += SPECIES.length()) {
1160                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1161                 av.sqrt().intoArray(r, i);
1162             }
1163         }
1164 
1165         bh.consume(r);
1166     }
1167 
1168 
1169 
1170     @Benchmark
1171     public void sqrtMasked(Blackhole bh) {
1172         double[] a = fa.apply(SPECIES.length());
1173         double[] r = fr.apply(SPECIES.length());
1174         boolean[] mask = fm.apply(SPECIES.length());
1175         Vector.Mask<Double> vmask = DoubleVector.maskFromValues(SPECIES, mask);
1176 
1177         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1178             for (int i = 0; i < a.length; i += SPECIES.length()) {
1179                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1180                 av.sqrt(vmask).intoArray(r, i);
1181             }
1182         }
1183 
1184         bh.consume(r);
1185     }
1186 
1187 
1188 
1189     @Benchmark
1190     public void gather(Blackhole bh) {
1191         double[] a = fa.apply(SPECIES.length());
1192         int[] b    = fs.apply(a.length, SPECIES.length());
1193         double[] r = new double[a.length];
1194 
1195         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1196             for (int i = 0; i < a.length; i += SPECIES.length()) {
1197                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i, b, i);
1198                 av.intoArray(r, i);
1199             }
1200         }
1201 
1202         bh.consume(r);
1203     }
1204 
1205 
1206 
1207     @Benchmark
1208     public void scatter(Blackhole bh) {
1209         double[] a = fa.apply(SPECIES.length());
1210         int[] b = fs.apply(a.length, SPECIES.length());
1211         double[] r = new double[a.length];
1212 
1213         for (int ic = 0; ic < INVOC_COUNT; ic++) {
1214             for (int i = 0; i < a.length; i += SPECIES.length()) {
1215                 DoubleVector av = DoubleVector.fromArray(SPECIES, a, i);
1216                 av.intoArray(r, i, b, i);
1217             }
1218         }
1219 
1220         bh.consume(r);
1221     }
1222 
1223 }
1224